@oomkapwn/enquire-mcp 3.6.1 → 3.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +125 -0
- package/README.md +21 -9
- package/assets/social-preview.png +0 -0
- package/dist/bases.d.ts +13 -3
- package/dist/bases.d.ts.map +1 -1
- package/dist/bases.js +64 -9
- package/dist/bases.js.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +6 -2
- package/dist/cli.js.map +1 -1
- package/dist/doctor.d.ts.map +1 -1
- package/dist/doctor.js +10 -2
- package/dist/doctor.js.map +1 -1
- package/dist/dql.d.ts +67 -0
- package/dist/dql.d.ts.map +1 -1
- package/dist/dql.js +45 -0
- package/dist/dql.js.map +1 -1
- package/dist/embed-db.d.ts +45 -0
- package/dist/embed-db.d.ts.map +1 -1
- package/dist/embed-db.js +31 -0
- package/dist/embed-db.js.map +1 -1
- package/dist/embeddings.d.ts +33 -0
- package/dist/embeddings.d.ts.map +1 -1
- package/dist/embeddings.js +29 -0
- package/dist/embeddings.js.map +1 -1
- package/dist/fts5.d.ts +123 -0
- package/dist/fts5.d.ts.map +1 -1
- package/dist/fts5.js +130 -4
- package/dist/fts5.js.map +1 -1
- package/dist/hnsw.d.ts.map +1 -1
- package/dist/hnsw.js +11 -0
- package/dist/hnsw.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/parser.d.ts +80 -0
- package/dist/parser.d.ts.map +1 -1
- package/dist/parser.js +48 -0
- package/dist/parser.js.map +1 -1
- package/dist/server.d.ts +34 -0
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +23 -3
- package/dist/server.js.map +1 -1
- package/dist/tool-registry.js +1 -1
- package/dist/tool-registry.js.map +1 -1
- package/dist/tools/media.d.ts +1 -1
- package/dist/tools/media.js +1 -1
- package/dist/tools/meta.d.ts +2 -2
- package/dist/tools/meta.js +2 -2
- package/dist/tools/search.d.ts +32 -1
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +51 -4
- package/dist/tools/search.js.map +1 -1
- package/dist/tools/write.d.ts +2 -2
- package/dist/tools/write.js +2 -2
- package/dist/tools.d.ts +980 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/tools.js +3132 -0
- package/dist/tools.js.map +1 -0
- package/dist/vault.d.ts +179 -0
- package/dist/vault.d.ts.map +1 -1
- package/dist/vault.js +157 -0
- package/dist/vault.js.map +1 -1
- package/docs/COMPARISON.md +5 -5
- package/docs/QUICKSTART.md +2 -2
- package/docs/api-reference/.nojekyll +1 -0
- package/docs/api-reference/assets/hierarchy.js +1 -0
- package/docs/api-reference/assets/highlight.css +71 -0
- package/docs/api-reference/assets/icons.js +18 -0
- package/docs/api-reference/assets/icons.svg +1 -0
- package/docs/api-reference/assets/main.js +60 -0
- package/docs/api-reference/assets/navigation.js +1 -0
- package/docs/api-reference/assets/search.js +1 -0
- package/docs/api-reference/assets/style.css +1633 -0
- package/docs/api-reference/functions/index.buildEmbedText.html +15 -0
- package/docs/api-reference/functions/index.buildMcpServer.html +4 -0
- package/docs/api-reference/functions/index.formatReadyBanner.html +4 -0
- package/docs/api-reference/functions/index.main.html +1 -0
- package/docs/api-reference/functions/index.parsePositiveInt.html +1 -0
- package/docs/api-reference/functions/index.parseQuantizationMode.html +5 -0
- package/docs/api-reference/functions/index.prepareServerDeps.html +5 -0
- package/docs/api-reference/functions/index.startServer.html +1 -0
- package/docs/api-reference/functions/tools.appendToNote.html +17 -0
- package/docs/api-reference/functions/tools.archiveNote.html +15 -0
- package/docs/api-reference/functions/tools.assertHnswModelMatchesEmbedder.html +13 -0
- package/docs/api-reference/functions/tools.chatThreadAppend.html +22 -0
- package/docs/api-reference/functions/tools.chatThreadRead.html +16 -0
- package/docs/api-reference/functions/tools.contextPack.html +21 -0
- package/docs/api-reference/functions/tools.createNote.html +19 -0
- package/docs/api-reference/functions/tools.dataviewQuery.html +16 -0
- package/docs/api-reference/functions/tools.embeddingsSearch.html +40 -0
- package/docs/api-reference/functions/tools.findPath.html +23 -0
- package/docs/api-reference/functions/tools.findSimilar.html +21 -0
- package/docs/api-reference/functions/tools.frontmatterGet.html +15 -0
- package/docs/api-reference/functions/tools.frontmatterSearch.html +16 -0
- package/docs/api-reference/functions/tools.frontmatterSet.html +19 -0
- package/docs/api-reference/functions/tools.getBacklinks.html +15 -0
- package/docs/api-reference/functions/tools.getNoteNeighbors.html +16 -0
- package/docs/api-reference/functions/tools.getOpenQuestions.html +19 -0
- package/docs/api-reference/functions/tools.getOutboundLinks.html +16 -0
- package/docs/api-reference/functions/tools.getRecentEdits.html +14 -0
- package/docs/api-reference/functions/tools.getUnresolvedWikilinks.html +14 -0
- package/docs/api-reference/functions/tools.getVaultStats.html +13 -0
- package/docs/api-reference/functions/tools.lintWiki.html +20 -0
- package/docs/api-reference/functions/tools.listCanvases.html +16 -0
- package/docs/api-reference/functions/tools.listNotes.html +19 -0
- package/docs/api-reference/functions/tools.listPdfs.html +15 -0
- package/docs/api-reference/functions/tools.listTags.html +14 -0
- package/docs/api-reference/functions/tools.ocrPdf.html +18 -0
- package/docs/api-reference/functions/tools.openInUi.html +17 -0
- package/docs/api-reference/functions/tools.paperAudit.html +16 -0
- package/docs/api-reference/functions/tools.pickEmbedTextForHyde.html +8 -0
- package/docs/api-reference/functions/tools.readCanvas.html +19 -0
- package/docs/api-reference/functions/tools.readNote.html +20 -0
- package/docs/api-reference/functions/tools.readPdf.html +18 -0
- package/docs/api-reference/functions/tools.renameNote.html +24 -0
- package/docs/api-reference/functions/tools.replaceInNotes.html +20 -0
- package/docs/api-reference/functions/tools.resolveTarget.html +24 -0
- package/docs/api-reference/functions/tools.resolveWikilink.html +20 -0
- package/docs/api-reference/functions/tools.searchHybrid.html +62 -0
- package/docs/api-reference/functions/tools.searchText.html +19 -0
- package/docs/api-reference/functions/tools.semanticSearch.html +19 -0
- package/docs/api-reference/functions/tools.validateNoteProposal.html +19 -0
- package/docs/api-reference/hierarchy.html +1 -0
- package/docs/api-reference/index.html +1 -0
- package/docs/api-reference/interfaces/index.ServeOptions.html +74 -0
- package/docs/api-reference/interfaces/index.ServerDeps.html +27 -0
- package/docs/api-reference/interfaces/tool-manifest.ToolManifestEntry.html +33 -0
- package/docs/api-reference/interfaces/tools.ArchiveNoteArgs.html +12 -0
- package/docs/api-reference/interfaces/tools.BacklinkHit.html +15 -0
- package/docs/api-reference/interfaces/tools.CanvasEdge.html +19 -0
- package/docs/api-reference/interfaces/tools.CanvasSummary.html +16 -0
- package/docs/api-reference/interfaces/tools.ChatThreadAppendArgs.html +10 -0
- package/docs/api-reference/interfaces/tools.ChatThreadMessage.html +14 -0
- package/docs/api-reference/interfaces/tools.ChatThreadReadResult.html +10 -0
- package/docs/api-reference/interfaces/tools.ContextPackArgs.html +12 -0
- package/docs/api-reference/interfaces/tools.ContextPackResult.html +20 -0
- package/docs/api-reference/interfaces/tools.EmbedHit.html +21 -0
- package/docs/api-reference/interfaces/tools.EmbedSearchResponse.html +14 -0
- package/docs/api-reference/interfaces/tools.FindPathResult.html +17 -0
- package/docs/api-reference/interfaces/tools.FrontmatterSearchArgs.html +20 -0
- package/docs/api-reference/interfaces/tools.FrontmatterSetArgs.html +13 -0
- package/docs/api-reference/interfaces/tools.HnswSearchContext.html +21 -0
- package/docs/api-reference/interfaces/tools.LintWikiArgs.html +14 -0
- package/docs/api-reference/interfaces/tools.LintWikiFinding.html +14 -0
- package/docs/api-reference/interfaces/tools.LintWikiResult.html +9 -0
- package/docs/api-reference/interfaces/tools.NoteNeighbors.html +17 -0
- package/docs/api-reference/interfaces/tools.NoteReadFull.html +20 -0
- package/docs/api-reference/interfaces/tools.NoteReadMap.html +25 -0
- package/docs/api-reference/interfaces/tools.NoteSummary.html +14 -0
- package/docs/api-reference/interfaces/tools.OcrPdfArgs.html +16 -0
- package/docs/api-reference/interfaces/tools.OcrPdfPage.html +15 -0
- package/docs/api-reference/interfaces/tools.OcrPdfResult.html +18 -0
- package/docs/api-reference/interfaces/tools.OpenInUiResult.html +11 -0
- package/docs/api-reference/interfaces/tools.OpenQuestion.html +20 -0
- package/docs/api-reference/interfaces/tools.OutboundLink.html +20 -0
- package/docs/api-reference/interfaces/tools.PaperAuditFinding.html +17 -0
- package/docs/api-reference/interfaces/tools.PathStep.html +9 -0
- package/docs/api-reference/interfaces/tools.PdfSummary.html +9 -0
- package/docs/api-reference/interfaces/tools.ReadCanvasResult.html +15 -0
- package/docs/api-reference/interfaces/tools.ReadPdfArgs.html +8 -0
- package/docs/api-reference/interfaces/tools.ReadPdfPage.html +13 -0
- package/docs/api-reference/interfaces/tools.ReadPdfResult.html +18 -0
- package/docs/api-reference/interfaces/tools.RenameNoteResult.html +14 -0
- package/docs/api-reference/interfaces/tools.RenameProposal.html +13 -0
- package/docs/api-reference/interfaces/tools.ReplaceInNotesArgs.html +15 -0
- package/docs/api-reference/interfaces/tools.ReplaceInNotesFileResult.html +6 -0
- package/docs/api-reference/interfaces/tools.ReplaceInNotesResult.html +21 -0
- package/docs/api-reference/interfaces/tools.SearchHit.html +16 -0
- package/docs/api-reference/interfaces/tools.SearchHybridHit.html +30 -0
- package/docs/api-reference/interfaces/tools.SearchHybridResponse.html +23 -0
- package/docs/api-reference/interfaces/tools.SearchResponse.html +13 -0
- package/docs/api-reference/interfaces/tools.SemanticHit.html +15 -0
- package/docs/api-reference/interfaces/tools.SimilarNote.html +15 -0
- package/docs/api-reference/interfaces/tools.TagSummary.html +13 -0
- package/docs/api-reference/interfaces/tools.UnresolvedWikilink.html +22 -0
- package/docs/api-reference/interfaces/tools.ValidateProposalArgs.html +10 -0
- package/docs/api-reference/interfaces/tools.ValidateProposalResult.html +14 -0
- package/docs/api-reference/interfaces/tools.VaultStats.html +26 -0
- package/docs/api-reference/modules/index.html +1 -0
- package/docs/api-reference/modules/tool-manifest.html +1 -0
- package/docs/api-reference/modules/tools.html +1 -0
- package/docs/api-reference/types/tools.CanvasNode.html +7 -0
- package/docs/api-reference/types/tools.SearchMode.html +7 -0
- package/docs/api-reference/variables/index.VERSION.html +9 -0
- package/docs/api-reference/variables/tool-manifest.TOOL_MANIFEST.html +1 -0
- package/docs/api.md +11 -11
- package/package.json +16 -4
package/dist/tools.js
ADDED
|
@@ -0,0 +1,3132 @@
|
|
|
1
|
+
import * as path from "node:path";
|
|
2
|
+
import matter from "gray-matter";
|
|
3
|
+
import { parseDql, runDql } from "./dql.js";
|
|
4
|
+
import { resolvePeriodicNoteName } from "./periodic.js";
|
|
5
|
+
export async function listNotes(vault, args) {
|
|
6
|
+
await vault.ensureExists();
|
|
7
|
+
const limit = args.limit ?? 50;
|
|
8
|
+
const sinceMs = args.since_date ? Date.parse(args.since_date) : null;
|
|
9
|
+
if (sinceMs !== null && Number.isNaN(sinceMs)) {
|
|
10
|
+
throw new Error(`Invalid since_date: ${args.since_date}. Use ISO 8601 (YYYY-MM-DD).`);
|
|
11
|
+
}
|
|
12
|
+
const wantTag = args.tag ? normalizeTag(args.tag) : null;
|
|
13
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
14
|
+
entries.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
|
15
|
+
const out = [];
|
|
16
|
+
for (const e of entries) {
|
|
17
|
+
if (sinceMs !== null && e.mtimeMs < sinceMs)
|
|
18
|
+
continue;
|
|
19
|
+
const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
20
|
+
if (wantTag && !parsed.tags.some((t) => normalizeTag(t) === wantTag))
|
|
21
|
+
continue;
|
|
22
|
+
out.push({
|
|
23
|
+
title: stripMd(e.basename),
|
|
24
|
+
path: e.relPath,
|
|
25
|
+
frontmatter: parsed.frontmatter,
|
|
26
|
+
tags: parsed.tags,
|
|
27
|
+
mtime: new Date(e.mtimeMs).toISOString()
|
|
28
|
+
});
|
|
29
|
+
if (out.length >= limit)
|
|
30
|
+
break;
|
|
31
|
+
}
|
|
32
|
+
return out;
|
|
33
|
+
}
|
|
34
|
+
export async function readNote(vault, args) {
|
|
35
|
+
await vault.ensureExists();
|
|
36
|
+
const entry = await resolveTarget(vault, args);
|
|
37
|
+
const { content, parsed, mtimeMs } = await vault.readNote(entry.absPath, entry.mtimeMs);
|
|
38
|
+
if (args.format === "map") {
|
|
39
|
+
// Document-map projection — headings + frontmatter keys + counts. Lets an
|
|
40
|
+
// LLM plan a surgical edit without paying token cost for the full body.
|
|
41
|
+
return {
|
|
42
|
+
path: entry.relPath,
|
|
43
|
+
title: stripMd(entry.basename),
|
|
44
|
+
format: "map",
|
|
45
|
+
frontmatter_keys: Object.keys(parsed.frontmatter),
|
|
46
|
+
headings: extractHeadings(parsed.body),
|
|
47
|
+
wikilinks_count: parsed.wikilinks.length,
|
|
48
|
+
embeds_count: parsed.embeds.length,
|
|
49
|
+
tags: parsed.tags,
|
|
50
|
+
mtime: new Date(mtimeMs).toISOString(),
|
|
51
|
+
byte_size: Buffer.byteLength(content, "utf8")
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
return {
|
|
55
|
+
path: entry.relPath,
|
|
56
|
+
title: stripMd(entry.basename),
|
|
57
|
+
content: parsed.body,
|
|
58
|
+
frontmatter: parsed.frontmatter,
|
|
59
|
+
wikilinks: parsed.wikilinks,
|
|
60
|
+
embeds: parsed.embeds,
|
|
61
|
+
tags: parsed.tags,
|
|
62
|
+
mtime: new Date(mtimeMs).toISOString()
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
/** Pull ATX headings (`#`, `##`, `###`, etc.) out of note body for the
|
|
66
|
+
* document-map projection. Skips ATX inside fenced code blocks via a simple
|
|
67
|
+
* line-by-line backtick toggle. */
|
|
68
|
+
function extractHeadings(body) {
|
|
69
|
+
const out = [];
|
|
70
|
+
const lines = body.split("\n");
|
|
71
|
+
let inFence = false;
|
|
72
|
+
for (let i = 0; i < lines.length; i++) {
|
|
73
|
+
const line = lines[i] ?? "";
|
|
74
|
+
if (/^\s*```/.test(line)) {
|
|
75
|
+
inFence = !inFence;
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
if (inFence)
|
|
79
|
+
continue;
|
|
80
|
+
const m = /^(#{1,6})\s+(.+?)\s*#*\s*$/.exec(line);
|
|
81
|
+
if (m?.[1] && m[2]) {
|
|
82
|
+
out.push({ level: m[1].length, text: m[2], line: i + 1 });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return out;
|
|
86
|
+
}
|
|
87
|
+
export async function resolveWikilink(vault, args) {
|
|
88
|
+
await vault.ensureExists();
|
|
89
|
+
const cleaned = args.wikilink.replace(/^!?\[\[|\]\]$/g, "");
|
|
90
|
+
const aliasIdx = cleaned.indexOf("|");
|
|
91
|
+
const alias = aliasIdx === -1 ? null : cleaned.slice(aliasIdx + 1).trim();
|
|
92
|
+
let rest = aliasIdx === -1 ? cleaned : cleaned.slice(0, aliasIdx);
|
|
93
|
+
const blockIdx = rest.indexOf("^");
|
|
94
|
+
const block = blockIdx === -1 ? null : rest.slice(blockIdx + 1).trim();
|
|
95
|
+
rest = blockIdx === -1 ? rest : rest.slice(0, blockIdx);
|
|
96
|
+
const hashIdx = rest.indexOf("#");
|
|
97
|
+
const section = hashIdx === -1 ? null : rest.slice(hashIdx + 1).trim();
|
|
98
|
+
const target = (hashIdx === -1 ? rest : rest.slice(0, hashIdx)).trim();
|
|
99
|
+
if (!target) {
|
|
100
|
+
return { found: false, path: null, title: null, content: null, section, block, alias };
|
|
101
|
+
}
|
|
102
|
+
const all = await vault.listMarkdown();
|
|
103
|
+
const match = findBestMatch(all, target, args.from_note);
|
|
104
|
+
if (!match) {
|
|
105
|
+
return { found: false, path: null, title: null, content: null, section, block, alias };
|
|
106
|
+
}
|
|
107
|
+
let body = null;
|
|
108
|
+
if (args.include_content !== false) {
|
|
109
|
+
const { parsed } = await vault.readNote(match.absPath, match.mtimeMs);
|
|
110
|
+
body = parsed.body;
|
|
111
|
+
}
|
|
112
|
+
return {
|
|
113
|
+
found: true,
|
|
114
|
+
path: match.relPath,
|
|
115
|
+
title: stripMd(match.basename),
|
|
116
|
+
content: body,
|
|
117
|
+
section,
|
|
118
|
+
block,
|
|
119
|
+
alias
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
export async function searchText(vault, args) {
|
|
123
|
+
await vault.ensureExists();
|
|
124
|
+
const limit = args.limit ?? 25;
|
|
125
|
+
const mode = args.mode ?? "all";
|
|
126
|
+
const q = args.query;
|
|
127
|
+
if (!q.trim())
|
|
128
|
+
throw new Error("query must not be empty");
|
|
129
|
+
// Tokenize on whitespace for "all" / "any". Phrase mode keeps the raw query.
|
|
130
|
+
const tokens = mode === "phrase" ? [q] : q.trim().split(/\s+/);
|
|
131
|
+
const lowerTokens = tokens.map((t) => t.toLowerCase());
|
|
132
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
133
|
+
// Parallel file reads — was sequential, slow on large vaults. Chunk to
|
|
134
|
+
// bound concurrency (avoid blowing the open-fd limit on huge vaults).
|
|
135
|
+
const CHUNK = 16;
|
|
136
|
+
const matches = [];
|
|
137
|
+
for (let i = 0; i < entries.length; i += CHUNK) {
|
|
138
|
+
const chunk = entries.slice(i, i + CHUNK);
|
|
139
|
+
const results = await Promise.all(chunk.map(async (e) => {
|
|
140
|
+
const { content } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
141
|
+
const lower = content.toLowerCase();
|
|
142
|
+
let totalScore = 0;
|
|
143
|
+
let firstHit = -1;
|
|
144
|
+
let firstHitLen = 0;
|
|
145
|
+
const matched = [];
|
|
146
|
+
for (let t = 0; t < lowerTokens.length; t++) {
|
|
147
|
+
const lowerT = lowerTokens[t];
|
|
148
|
+
if (lowerT === undefined || lowerT === "")
|
|
149
|
+
continue;
|
|
150
|
+
let tokenScore = 0;
|
|
151
|
+
let from = 0;
|
|
152
|
+
while (true) {
|
|
153
|
+
const idx = lower.indexOf(lowerT, from);
|
|
154
|
+
if (idx === -1)
|
|
155
|
+
break;
|
|
156
|
+
tokenScore += 1;
|
|
157
|
+
if (firstHit === -1 || idx < firstHit) {
|
|
158
|
+
firstHit = idx;
|
|
159
|
+
firstHitLen = lowerT.length;
|
|
160
|
+
}
|
|
161
|
+
from = idx + lowerT.length;
|
|
162
|
+
}
|
|
163
|
+
if (tokenScore > 0) {
|
|
164
|
+
totalScore += tokenScore;
|
|
165
|
+
matched.push(tokens[t] ?? lowerT);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
// Mode policy: "all" requires every token to match; "any" requires at
|
|
169
|
+
// least one; "phrase" requires the raw query (single token).
|
|
170
|
+
if (mode === "all" && matched.length !== lowerTokens.filter(Boolean).length)
|
|
171
|
+
return null;
|
|
172
|
+
if (totalScore === 0)
|
|
173
|
+
return null;
|
|
174
|
+
const { snippet, line } = sliceSnippet(content, firstHit, firstHitLen);
|
|
175
|
+
const hit = {
|
|
176
|
+
path: e.relPath,
|
|
177
|
+
snippet,
|
|
178
|
+
score: totalScore,
|
|
179
|
+
line,
|
|
180
|
+
matched_terms: matched
|
|
181
|
+
};
|
|
182
|
+
return hit;
|
|
183
|
+
}));
|
|
184
|
+
for (const r of results)
|
|
185
|
+
if (r)
|
|
186
|
+
matches.push(r);
|
|
187
|
+
}
|
|
188
|
+
matches.sort((a, b) => b.score - a.score);
|
|
189
|
+
return {
|
|
190
|
+
query: q,
|
|
191
|
+
mode,
|
|
192
|
+
scanned_notes: entries.length,
|
|
193
|
+
matches: matches.slice(0, limit)
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
export async function getRecentEdits(vault, args) {
|
|
197
|
+
await vault.ensureExists();
|
|
198
|
+
const limit = args.limit ?? 20;
|
|
199
|
+
const sinceMs = args.since_minutes !== undefined ? Date.now() - args.since_minutes * 60_000 : null;
|
|
200
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
201
|
+
entries.sort((a, b) => b.mtimeMs - a.mtimeMs);
|
|
202
|
+
const out = [];
|
|
203
|
+
for (const e of entries) {
|
|
204
|
+
if (sinceMs !== null && e.mtimeMs < sinceMs)
|
|
205
|
+
break;
|
|
206
|
+
const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
207
|
+
out.push({
|
|
208
|
+
title: stripMd(e.basename),
|
|
209
|
+
path: e.relPath,
|
|
210
|
+
frontmatter: parsed.frontmatter,
|
|
211
|
+
tags: parsed.tags,
|
|
212
|
+
mtime: new Date(e.mtimeMs).toISOString()
|
|
213
|
+
});
|
|
214
|
+
if (out.length >= limit)
|
|
215
|
+
break;
|
|
216
|
+
}
|
|
217
|
+
return out;
|
|
218
|
+
}
|
|
219
|
+
export async function getBacklinks(vault, args) {
|
|
220
|
+
await vault.ensureExists();
|
|
221
|
+
const limit = args.limit ?? 50;
|
|
222
|
+
const includeEmbeds = args.include_embeds !== false;
|
|
223
|
+
const target = await resolveTarget(vault, args);
|
|
224
|
+
const targetAbs = target.absPath;
|
|
225
|
+
const all = await vault.listMarkdown();
|
|
226
|
+
const hits = [];
|
|
227
|
+
for (const e of all) {
|
|
228
|
+
if (e.absPath === targetAbs)
|
|
229
|
+
continue;
|
|
230
|
+
const { content, parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
231
|
+
const linkBag = [
|
|
232
|
+
...parsed.wikilinks.map((l) => ({ link: l, kind: "wikilink" })),
|
|
233
|
+
...(includeEmbeds ? parsed.embeds.map((l) => ({ link: l, kind: "embed" })) : [])
|
|
234
|
+
];
|
|
235
|
+
if (!linkBag.length)
|
|
236
|
+
continue;
|
|
237
|
+
let count = 0;
|
|
238
|
+
const kindFlags = { wikilink: false, embed: false };
|
|
239
|
+
const snippets = [];
|
|
240
|
+
for (const { link, kind } of linkBag) {
|
|
241
|
+
const match = findBestMatch(all, link.target, e.relPath);
|
|
242
|
+
if (!match || match.absPath !== targetAbs)
|
|
243
|
+
continue;
|
|
244
|
+
count += 1;
|
|
245
|
+
kindFlags[kind] = true;
|
|
246
|
+
if (snippets.length < 2) {
|
|
247
|
+
const literal = `${(kind === "embed" ? "![[" : "[[") + link.raw}]]`;
|
|
248
|
+
const idx = content.indexOf(literal);
|
|
249
|
+
const { snippet } = sliceSnippet(content, idx, literal.length);
|
|
250
|
+
if (snippet)
|
|
251
|
+
snippets.push(snippet);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
if (count === 0)
|
|
255
|
+
continue;
|
|
256
|
+
hits.push({
|
|
257
|
+
path: e.relPath,
|
|
258
|
+
title: stripMd(e.basename),
|
|
259
|
+
count,
|
|
260
|
+
snippets,
|
|
261
|
+
link_kind: kindFlags.wikilink && kindFlags.embed ? "mixed" : kindFlags.embed ? "embed" : "wikilink"
|
|
262
|
+
});
|
|
263
|
+
}
|
|
264
|
+
hits.sort((a, b) => b.count - a.count);
|
|
265
|
+
return hits.slice(0, limit);
|
|
266
|
+
}
|
|
267
|
+
export async function dataviewQuery(vault, args) {
|
|
268
|
+
await vault.ensureExists();
|
|
269
|
+
const parsed = parseDql(args.query);
|
|
270
|
+
const rows = await runDql(vault, parsed);
|
|
271
|
+
return { query: args.query, rows };
|
|
272
|
+
}
|
|
273
|
+
export async function getUnresolvedWikilinks(vault, args) {
|
|
274
|
+
await vault.ensureExists();
|
|
275
|
+
const limit = args.limit ?? 200;
|
|
276
|
+
const includeEmbeds = args.include_embeds !== false;
|
|
277
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
278
|
+
const all = await vault.listMarkdown();
|
|
279
|
+
const out = [];
|
|
280
|
+
for (const e of entries) {
|
|
281
|
+
if (out.length >= limit)
|
|
282
|
+
break;
|
|
283
|
+
const { content, parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
284
|
+
const candidates = [
|
|
285
|
+
...parsed.wikilinks.map((l) => ({ link: l, kind: "wikilink" })),
|
|
286
|
+
...(includeEmbeds ? parsed.embeds.map((l) => ({ link: l, kind: "embed" })) : [])
|
|
287
|
+
];
|
|
288
|
+
for (const { link, kind } of candidates) {
|
|
289
|
+
if (out.length >= limit)
|
|
290
|
+
break;
|
|
291
|
+
if (!link.target)
|
|
292
|
+
continue;
|
|
293
|
+
const match = findBestMatch(all, link.target, e.relPath);
|
|
294
|
+
if (match)
|
|
295
|
+
continue;
|
|
296
|
+
const literal = `${(kind === "embed" ? "![[" : "[[") + link.raw}]]`;
|
|
297
|
+
const idx = content.indexOf(literal);
|
|
298
|
+
const { snippet, line } = sliceSnippet(content, idx, literal.length);
|
|
299
|
+
out.push({
|
|
300
|
+
from_path: e.relPath,
|
|
301
|
+
target: link.target,
|
|
302
|
+
raw: link.raw,
|
|
303
|
+
kind,
|
|
304
|
+
alias: link.alias ?? null,
|
|
305
|
+
section: link.section ?? null,
|
|
306
|
+
block: link.block ?? null,
|
|
307
|
+
line,
|
|
308
|
+
snippet
|
|
309
|
+
});
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
return out;
|
|
313
|
+
}
|
|
314
|
+
export async function getOutboundLinks(vault, args) {
|
|
315
|
+
await vault.ensureExists();
|
|
316
|
+
const includeEmbeds = args.include_embeds !== false;
|
|
317
|
+
const includeUnresolved = args.include_unresolved !== false;
|
|
318
|
+
const entry = await resolveTarget(vault, args);
|
|
319
|
+
const { parsed } = await vault.readNote(entry.absPath, entry.mtimeMs);
|
|
320
|
+
const all = await vault.listMarkdown();
|
|
321
|
+
const candidates = [
|
|
322
|
+
...parsed.wikilinks.map((l) => ({ link: l, kind: "wikilink" })),
|
|
323
|
+
...(includeEmbeds ? parsed.embeds.map((l) => ({ link: l, kind: "embed" })) : [])
|
|
324
|
+
];
|
|
325
|
+
const links = [];
|
|
326
|
+
for (const { link, kind } of candidates) {
|
|
327
|
+
const match = findBestMatch(all, link.target, entry.relPath);
|
|
328
|
+
if (!match && !includeUnresolved)
|
|
329
|
+
continue;
|
|
330
|
+
links.push({
|
|
331
|
+
raw: link.raw,
|
|
332
|
+
target: link.target,
|
|
333
|
+
kind,
|
|
334
|
+
alias: link.alias ?? null,
|
|
335
|
+
section: link.section ?? null,
|
|
336
|
+
block: link.block ?? null,
|
|
337
|
+
resolved_path: match ? match.relPath : null,
|
|
338
|
+
resolved_title: match ? stripMd(match.basename) : null
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
return {
|
|
342
|
+
from_path: entry.relPath,
|
|
343
|
+
from_title: stripMd(entry.basename),
|
|
344
|
+
links
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
export async function listTags(vault, args) {
|
|
348
|
+
await vault.ensureExists();
|
|
349
|
+
const limit = args.limit ?? 200;
|
|
350
|
+
const minCount = args.min_count ?? 1;
|
|
351
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
352
|
+
const counts = new Map();
|
|
353
|
+
for (const e of entries) {
|
|
354
|
+
const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
355
|
+
const fmSet = new Set(extractFrontmatterTagsLower(parsed.frontmatter));
|
|
356
|
+
for (const t of parsed.tags) {
|
|
357
|
+
const key = t.toLowerCase();
|
|
358
|
+
const slot = counts.get(key) ?? { count: 0, fm: 0, inline: 0 };
|
|
359
|
+
slot.count += 1;
|
|
360
|
+
if (fmSet.has(key))
|
|
361
|
+
slot.fm += 1;
|
|
362
|
+
else
|
|
363
|
+
slot.inline += 1;
|
|
364
|
+
counts.set(key, slot);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
const out = [];
|
|
368
|
+
for (const [tag, slot] of counts) {
|
|
369
|
+
if (slot.count < minCount)
|
|
370
|
+
continue;
|
|
371
|
+
out.push({ tag, count: slot.count, frontmatter_count: slot.fm, inline_count: slot.inline });
|
|
372
|
+
}
|
|
373
|
+
out.sort((a, b) => b.count - a.count || a.tag.localeCompare(b.tag));
|
|
374
|
+
return out.slice(0, limit);
|
|
375
|
+
}
|
|
376
|
+
export async function createNote(vault, args) {
|
|
377
|
+
await vault.ensureExists();
|
|
378
|
+
const body = composeNote(args.frontmatter, args.content);
|
|
379
|
+
const result = await vault.writeNote(args.path, body, { overwrite: args.overwrite });
|
|
380
|
+
return {
|
|
381
|
+
path: result.relPath,
|
|
382
|
+
mtime: new Date(result.mtimeMs).toISOString(),
|
|
383
|
+
bytes: result.bytes
|
|
384
|
+
};
|
|
385
|
+
}
|
|
386
|
+
export async function appendToNote(vault, args) {
|
|
387
|
+
await vault.ensureExists();
|
|
388
|
+
const target = await resolveTarget(vault, args);
|
|
389
|
+
const sep = args.separator ?? "\n\n";
|
|
390
|
+
const result = await vault.appendNote(target.absPath, sep + args.content);
|
|
391
|
+
return {
|
|
392
|
+
path: result.relPath,
|
|
393
|
+
mtime: new Date(result.mtimeMs).toISOString(),
|
|
394
|
+
appended_bytes: result.appended_bytes
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
export async function renameNote(vault, args) {
|
|
398
|
+
await vault.ensureExists();
|
|
399
|
+
const dryRun = args.dry_run === true;
|
|
400
|
+
const fromRelNorm = args.from.toLowerCase().endsWith(".md") ? args.from : `${args.from}.md`;
|
|
401
|
+
const toRelNorm = args.to.toLowerCase().endsWith(".md") ? args.to : `${args.to}.md`;
|
|
402
|
+
// Resolve from (must exist) — vault.stat() rejects traversal + excluded paths
|
|
403
|
+
// and confirms the file is real. resolveInside() is the public wrapper for
|
|
404
|
+
// the same path-normalization logic without an existence check.
|
|
405
|
+
const fromAbs = vault.resolveInside(fromRelNorm);
|
|
406
|
+
const fromRel = vault.toRel(fromAbs);
|
|
407
|
+
await vault.stat(fromAbs); // throws on missing source — fail fast.
|
|
408
|
+
// Validate to-path early so we don't do O(N) work then fail.
|
|
409
|
+
const toAbsCheck = vault.resolveInside(toRelNorm);
|
|
410
|
+
const toRelCheck = vault.toRel(toAbsCheck);
|
|
411
|
+
const renameReason = vault.exclusionReason(toRelCheck);
|
|
412
|
+
if (renameReason) {
|
|
413
|
+
// v2.0.0-beta.2 P1 fix: distinguish allowlist-vs-denylist same as
|
|
414
|
+
// writeNote and Vault.renameFile do. Pre-fix the message always blamed
|
|
415
|
+
// --exclude-glob even when --read-paths was the reason.
|
|
416
|
+
throw new Error(`Refusing to rename — destination is excluded by ${renameReason}: ${toRelCheck}`);
|
|
417
|
+
}
|
|
418
|
+
if (fromRel === toRelCheck) {
|
|
419
|
+
throw new Error(`from and to are the same path: ${fromRel}`);
|
|
420
|
+
}
|
|
421
|
+
if (!args.overwrite) {
|
|
422
|
+
const exists = await vault
|
|
423
|
+
.stat(toAbsCheck)
|
|
424
|
+
.then(() => true)
|
|
425
|
+
.catch(() => false);
|
|
426
|
+
if (exists) {
|
|
427
|
+
throw new Error(`Destination already exists: ${toRelCheck} (pass overwrite=true to replace)`);
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
const newBasename = stripMd(path.basename(toRelNorm));
|
|
431
|
+
const newDir = path.dirname(toRelNorm).replace(/\\/g, "/");
|
|
432
|
+
const entries = await vault.listMarkdown();
|
|
433
|
+
// Build the rewrite plan. INCLUDES the source file itself so that any
|
|
434
|
+
// self-references (e.g. `[[Foo]]` inside `Foo.md`) are also rewritten —
|
|
435
|
+
// otherwise the renamed file would ship with a broken self-link. The source
|
|
436
|
+
// is rewritten in place at the OLD path; fs.rename then carries the new
|
|
437
|
+
// content to the new path in one atomic step.
|
|
438
|
+
const plan = [];
|
|
439
|
+
let totalRewrites = 0;
|
|
440
|
+
let sourcePlan = null;
|
|
441
|
+
for (const e of entries) {
|
|
442
|
+
const isSource = e.absPath === fromAbs;
|
|
443
|
+
const { content, parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
444
|
+
// Find every wikilink + embed whose target resolves to fromAbs. Group by
|
|
445
|
+
// raw inner text — multiple identical literals in the same file rewrite
|
|
446
|
+
// together.
|
|
447
|
+
const oldRawsToNew = new Map();
|
|
448
|
+
const candidates = [
|
|
449
|
+
...parsed.wikilinks.map((l) => ({ raw: l.raw, target: l.target, kind: "wikilink" })),
|
|
450
|
+
...parsed.embeds.map((l) => ({ raw: l.raw, target: l.target, kind: "embed" }))
|
|
451
|
+
];
|
|
452
|
+
for (const c of candidates) {
|
|
453
|
+
if (oldRawsToNew.has(c.raw))
|
|
454
|
+
continue; // already mapped
|
|
455
|
+
const m = findBestMatch(entries, c.target, e.relPath);
|
|
456
|
+
if (!m || m.absPath !== fromAbs)
|
|
457
|
+
continue;
|
|
458
|
+
const newRaw = rewriteRawTarget(c.raw, c.target, newBasename, newDir);
|
|
459
|
+
if (newRaw === c.raw)
|
|
460
|
+
continue; // already correct (e.g., basename happened to match)
|
|
461
|
+
oldRawsToNew.set(c.raw, { kind: c.kind, newRaw });
|
|
462
|
+
}
|
|
463
|
+
if (oldRawsToNew.size === 0)
|
|
464
|
+
continue;
|
|
465
|
+
// Apply the replacements with a code-fence-aware line walker so wikilinks
|
|
466
|
+
// inside ``` / ~~~ blocks (which the parser ignores) stay verbatim.
|
|
467
|
+
const { content: newContent, count } = rewriteOutsideCodeFences(content, oldRawsToNew);
|
|
468
|
+
if (count === 0)
|
|
469
|
+
continue;
|
|
470
|
+
const proposal = { path: e.relPath, rewrites: count, before: content, after: newContent };
|
|
471
|
+
if (isSource) {
|
|
472
|
+
// The source file's rewrite is held separately so we can write it last,
|
|
473
|
+
// immediately before fs.rename, keeping the disk in a maximally-recoverable
|
|
474
|
+
// state if anything between writes fails.
|
|
475
|
+
sourcePlan = proposal;
|
|
476
|
+
}
|
|
477
|
+
else {
|
|
478
|
+
plan.push(proposal);
|
|
479
|
+
}
|
|
480
|
+
totalRewrites += count;
|
|
481
|
+
}
|
|
482
|
+
if (!dryRun) {
|
|
483
|
+
// Write order:
|
|
484
|
+
// 1. All backlink-bearing files (other notes pointing at the source).
|
|
485
|
+
// 2. Source file's rewritten content, written to its OLD path.
|
|
486
|
+
// 3. fs.rename source's old path → new path.
|
|
487
|
+
// A failure at any step leaves backlinks pointing at the still-present old
|
|
488
|
+
// name (worst case: safe, recoverable).
|
|
489
|
+
for (const p of plan) {
|
|
490
|
+
await vault.writeNote(p.path, p.after, { overwrite: true });
|
|
491
|
+
}
|
|
492
|
+
if (sourcePlan) {
|
|
493
|
+
await vault.writeNote(sourcePlan.path, sourcePlan.after, { overwrite: true });
|
|
494
|
+
}
|
|
495
|
+
// Atomic file move + cache invalidation.
|
|
496
|
+
await vault.renameFile(fromRelNorm, toRelNorm, { overwrite: args.overwrite });
|
|
497
|
+
}
|
|
498
|
+
// Combine plans for the response so the caller sees the full picture.
|
|
499
|
+
const allPlans = sourcePlan ? [...plan, sourcePlan] : plan;
|
|
500
|
+
// Strip `before`/`after` from the response — the caller doesn't need the
|
|
501
|
+
// full file contents back, just the per-file count. We kept them for the
|
|
502
|
+
// pre-write loop; the response trims them. The source-file entry uses its
|
|
503
|
+
// POST-rename path so the caller sees where the rewrite ended up.
|
|
504
|
+
const trimmedPlan = allPlans.map((p) => ({
|
|
505
|
+
path: p === sourcePlan ? toRelCheck : p.path,
|
|
506
|
+
rewrites: p.rewrites,
|
|
507
|
+
before: "",
|
|
508
|
+
after: ""
|
|
509
|
+
}));
|
|
510
|
+
return {
|
|
511
|
+
from: fromRel,
|
|
512
|
+
to: toRelCheck,
|
|
513
|
+
dry_run: dryRun,
|
|
514
|
+
files_updated: trimmedPlan,
|
|
515
|
+
total_links_rewritten: totalRewrites
|
|
516
|
+
};
|
|
517
|
+
}
|
|
518
|
+
const CHAT_HEADING_RE = /^### (user|assistant|system) · (.+?)\s*$/;
|
|
519
|
+
// Multi-line flag: `## Chat:` heading can appear anywhere in the body, not
|
|
520
|
+
// only at string start. The append codepath uses .test(body); the read
|
|
521
|
+
// codepath uses .exec(line) per-line so the flag is harmless there.
|
|
522
|
+
const CHAT_THREAD_TITLE_RE = /^## Chat: (.+?)\s*$/m;
|
|
523
|
+
/** Append a message to a note's chat thread. Creates the note (and the
|
|
524
|
+
* `## Chat: <title>` heading) if absent. Idempotent in the sense that
|
|
525
|
+
* appending always creates a fresh `### <role> · <timestamp>` block — no
|
|
526
|
+
* silent overwrites. */
|
|
527
|
+
export async function chatThreadAppend(vault, args) {
|
|
528
|
+
await vault.ensureExists();
|
|
529
|
+
if (!args.note_path?.trim())
|
|
530
|
+
throw new Error("chat_thread_append: `note_path` is required");
|
|
531
|
+
if (!args.content?.trim())
|
|
532
|
+
throw new Error("chat_thread_append: `content` is required");
|
|
533
|
+
const role = args.role;
|
|
534
|
+
if (role !== "user" && role !== "assistant" && role !== "system") {
|
|
535
|
+
throw new Error(`chat_thread_append: invalid role "${role}" (must be user|assistant|system)`);
|
|
536
|
+
}
|
|
537
|
+
const targetRel = args.note_path.toLowerCase().endsWith(".md") ? args.note_path : `${args.note_path}.md`;
|
|
538
|
+
const abs = vault.resolveInside(targetRel);
|
|
539
|
+
const timestamp = new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
540
|
+
const messageBlock = `\n### ${role} · ${timestamp}\n\n${args.content.trim()}\n`;
|
|
541
|
+
// Read existing or create new with thread heading.
|
|
542
|
+
let existed = true;
|
|
543
|
+
let body = "";
|
|
544
|
+
try {
|
|
545
|
+
body = await vault.readFile(abs);
|
|
546
|
+
}
|
|
547
|
+
catch {
|
|
548
|
+
existed = false;
|
|
549
|
+
}
|
|
550
|
+
let toAppend;
|
|
551
|
+
if (existed && CHAT_THREAD_TITLE_RE.test(body)) {
|
|
552
|
+
// Existing thread — just append message.
|
|
553
|
+
toAppend = messageBlock;
|
|
554
|
+
}
|
|
555
|
+
else if (existed) {
|
|
556
|
+
// Existing note without a chat heading — add heading first.
|
|
557
|
+
const title = args.thread_title?.trim() || `chat — ${timestamp.slice(0, 10)}`;
|
|
558
|
+
toAppend = `\n\n## Chat: ${title}\n${messageBlock}`;
|
|
559
|
+
}
|
|
560
|
+
else {
|
|
561
|
+
// New note from scratch.
|
|
562
|
+
const title = args.thread_title?.trim() || `chat — ${timestamp.slice(0, 10)}`;
|
|
563
|
+
const initial = `# ${title}\n\n## Chat: ${title}\n${messageBlock}`;
|
|
564
|
+
const result = await vault.writeNote(targetRel, initial, { overwrite: false });
|
|
565
|
+
return {
|
|
566
|
+
note_path: result.relPath,
|
|
567
|
+
line_start: 4,
|
|
568
|
+
line_end: 4 + messageBlock.split("\n").length
|
|
569
|
+
};
|
|
570
|
+
}
|
|
571
|
+
const before = body.length;
|
|
572
|
+
const newBody = body.replace(/\n+$/, "") + toAppend;
|
|
573
|
+
await vault.writeNote(targetRel, newBody, { overwrite: true });
|
|
574
|
+
const lineStart = (body.slice(0, before).match(/\n/g) ?? []).length + 1;
|
|
575
|
+
return {
|
|
576
|
+
note_path: vault.toRel(abs),
|
|
577
|
+
line_start: lineStart,
|
|
578
|
+
line_end: lineStart + toAppend.split("\n").length
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
/** Parse a note's chat thread into structured messages. Non-chat content
|
|
582
|
+
* (anything outside the `## Chat: <title>` block) is ignored. */
|
|
583
|
+
export async function chatThreadRead(vault, args) {
|
|
584
|
+
await vault.ensureExists();
|
|
585
|
+
const targetRel = args.note_path.toLowerCase().endsWith(".md") ? args.note_path : `${args.note_path}.md`;
|
|
586
|
+
const abs = vault.resolveInside(targetRel);
|
|
587
|
+
const body = await vault.readFile(abs);
|
|
588
|
+
const lines = body.split("\n");
|
|
589
|
+
let threadTitle = null;
|
|
590
|
+
let inThread = false;
|
|
591
|
+
const messages = [];
|
|
592
|
+
let current = null;
|
|
593
|
+
for (let i = 0; i < lines.length; i++) {
|
|
594
|
+
const ln = lines[i] ?? "";
|
|
595
|
+
const titleMatch = CHAT_THREAD_TITLE_RE.exec(ln);
|
|
596
|
+
if (titleMatch) {
|
|
597
|
+
if (current) {
|
|
598
|
+
messages.push({
|
|
599
|
+
role: current.role,
|
|
600
|
+
timestamp: current.timestamp,
|
|
601
|
+
content: current.lines.join("\n").trim(),
|
|
602
|
+
line_start: current.line_start,
|
|
603
|
+
line_end: i
|
|
604
|
+
});
|
|
605
|
+
current = null;
|
|
606
|
+
}
|
|
607
|
+
threadTitle = (titleMatch[1] ?? "").trim();
|
|
608
|
+
inThread = true;
|
|
609
|
+
continue;
|
|
610
|
+
}
|
|
611
|
+
if (!inThread)
|
|
612
|
+
continue;
|
|
613
|
+
// Higher-level heading or a different `## Chat:` block ends the thread.
|
|
614
|
+
if (/^# /.test(ln) || (/^## /.test(ln) && !CHAT_THREAD_TITLE_RE.test(ln))) {
|
|
615
|
+
if (current) {
|
|
616
|
+
messages.push({
|
|
617
|
+
role: current.role,
|
|
618
|
+
timestamp: current.timestamp,
|
|
619
|
+
content: current.lines.join("\n").trim(),
|
|
620
|
+
line_start: current.line_start,
|
|
621
|
+
line_end: i
|
|
622
|
+
});
|
|
623
|
+
current = null;
|
|
624
|
+
}
|
|
625
|
+
inThread = false;
|
|
626
|
+
continue;
|
|
627
|
+
}
|
|
628
|
+
const headingMatch = CHAT_HEADING_RE.exec(ln);
|
|
629
|
+
if (headingMatch?.[1] && headingMatch[2]) {
|
|
630
|
+
if (current) {
|
|
631
|
+
messages.push({
|
|
632
|
+
role: current.role,
|
|
633
|
+
timestamp: current.timestamp,
|
|
634
|
+
content: current.lines.join("\n").trim(),
|
|
635
|
+
line_start: current.line_start,
|
|
636
|
+
line_end: i
|
|
637
|
+
});
|
|
638
|
+
}
|
|
639
|
+
current = {
|
|
640
|
+
role: headingMatch[1],
|
|
641
|
+
timestamp: headingMatch[2].trim(),
|
|
642
|
+
line_start: i + 1,
|
|
643
|
+
lines: []
|
|
644
|
+
};
|
|
645
|
+
continue;
|
|
646
|
+
}
|
|
647
|
+
if (current)
|
|
648
|
+
current.lines.push(ln);
|
|
649
|
+
}
|
|
650
|
+
if (current) {
|
|
651
|
+
messages.push({
|
|
652
|
+
role: current.role,
|
|
653
|
+
timestamp: current.timestamp,
|
|
654
|
+
content: current.lines.join("\n").trim(),
|
|
655
|
+
line_start: current.line_start,
|
|
656
|
+
line_end: lines.length
|
|
657
|
+
});
|
|
658
|
+
}
|
|
659
|
+
return {
|
|
660
|
+
note_path: vault.toRel(abs),
|
|
661
|
+
thread_title: threadTitle,
|
|
662
|
+
messages,
|
|
663
|
+
message_count: messages.length
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
// ─── obsidian_frontmatter_{get,set,search} (v2.3.0 — atomic YAML ops) ──────
|
|
667
|
+
// Surgical YAML manipulation. Pre-fix, agents wanting to set `status:
|
|
668
|
+
// published` on 12 notes had to find/replace text — error-prone (multi-line
|
|
669
|
+
// strings, special chars, key-collision). Now: parse via gray-matter, edit,
|
|
670
|
+
// rewrite. Code-fence-aware via gray-matter (frontmatter is delimited
|
|
671
|
+
// strictly by leading `---`, so no fence ambiguity).
|
|
672
|
+
//
|
|
673
|
+
// _get is read-only; _set + _delete are write-gated.
|
|
674
|
+
export async function frontmatterGet(vault, args) {
|
|
675
|
+
await vault.ensureExists();
|
|
676
|
+
const target = await resolveTarget(vault, args);
|
|
677
|
+
const note = await vault.readNote(target.absPath, target.mtimeMs);
|
|
678
|
+
if (args.key) {
|
|
679
|
+
return {
|
|
680
|
+
path: target.relPath,
|
|
681
|
+
frontmatter: note.parsed.frontmatter,
|
|
682
|
+
value: note.parsed.frontmatter[args.key]
|
|
683
|
+
};
|
|
684
|
+
}
|
|
685
|
+
return { path: target.relPath, frontmatter: note.parsed.frontmatter };
|
|
686
|
+
}
|
|
687
|
+
export async function frontmatterSet(vault, args) {
|
|
688
|
+
await vault.ensureExists();
|
|
689
|
+
if (!args.set || Object.keys(args.set).length === 0) {
|
|
690
|
+
throw new Error("frontmatter_set: `set` must be a non-empty object");
|
|
691
|
+
}
|
|
692
|
+
const target = await resolveTarget(vault, args);
|
|
693
|
+
const note = await vault.readNote(target.absPath, target.mtimeMs);
|
|
694
|
+
const before = { ...note.parsed.frontmatter };
|
|
695
|
+
const after = { ...before };
|
|
696
|
+
const changed = [];
|
|
697
|
+
for (const [k, v] of Object.entries(args.set)) {
|
|
698
|
+
if (v === null) {
|
|
699
|
+
if (k in after) {
|
|
700
|
+
delete after[k];
|
|
701
|
+
changed.push(`-${k}`);
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
else {
|
|
705
|
+
const prev = after[k];
|
|
706
|
+
if (JSON.stringify(prev) !== JSON.stringify(v)) {
|
|
707
|
+
after[k] = v;
|
|
708
|
+
changed.push(`${k in before ? "~" : "+"}${k}`);
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
if (changed.length === 0 || args.dry_run === true) {
|
|
713
|
+
return { path: target.relPath, changed_keys: changed, before, after, dry_run: args.dry_run === true };
|
|
714
|
+
}
|
|
715
|
+
// Round-trip via gray-matter — same writer pattern as createNote.
|
|
716
|
+
const newDoc = matter.stringify(note.parsed.body, after);
|
|
717
|
+
await vault.writeNote(target.relPath, newDoc, { overwrite: true });
|
|
718
|
+
return { path: target.relPath, changed_keys: changed, before, after, dry_run: false };
|
|
719
|
+
}
|
|
720
|
+
export async function frontmatterSearch(vault, args) {
|
|
721
|
+
await vault.ensureExists();
|
|
722
|
+
if (!args.key)
|
|
723
|
+
throw new Error("frontmatter_search: `key` is required");
|
|
724
|
+
const predicates = [args.equals !== undefined, args.exists !== undefined, args.contains !== undefined].filter(Boolean);
|
|
725
|
+
if (predicates.length !== 1) {
|
|
726
|
+
throw new Error("frontmatter_search: exactly one of `equals` / `exists` / `contains` must be set");
|
|
727
|
+
}
|
|
728
|
+
const limit = args.limit ?? 100;
|
|
729
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
730
|
+
const matches = [];
|
|
731
|
+
for (const e of entries) {
|
|
732
|
+
if (matches.length >= limit)
|
|
733
|
+
break;
|
|
734
|
+
try {
|
|
735
|
+
const note = await vault.readNote(e.absPath, e.mtimeMs);
|
|
736
|
+
const value = note.parsed.frontmatter[args.key];
|
|
737
|
+
let hit = false;
|
|
738
|
+
if (args.exists === true)
|
|
739
|
+
hit = value !== undefined;
|
|
740
|
+
else if (args.equals !== undefined)
|
|
741
|
+
hit = JSON.stringify(value) === JSON.stringify(args.equals);
|
|
742
|
+
else if (args.contains !== undefined) {
|
|
743
|
+
if (Array.isArray(value)) {
|
|
744
|
+
hit = value.some((v) => JSON.stringify(v) === JSON.stringify(args.contains));
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
if (hit) {
|
|
748
|
+
matches.push({ path: e.relPath, value, mtime: new Date(e.mtimeMs).toISOString() });
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
catch {
|
|
752
|
+
// skip unparseable notes
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
return { key: args.key, total_matches: matches.length, matches };
|
|
756
|
+
}
|
|
757
|
+
export async function archiveNote(vault, args) {
|
|
758
|
+
await vault.ensureExists();
|
|
759
|
+
if (!args.path)
|
|
760
|
+
throw new Error("archive_note: `path` is required");
|
|
761
|
+
const folder = (args.archive_folder ?? "Archive").replace(/\/+$/, "");
|
|
762
|
+
// Strip leading folders from the source so the basename lands cleanly in
|
|
763
|
+
// the archive — e.g. `Inbox/Foo.md` → `Archive/Foo.md`, not
|
|
764
|
+
// `Archive/Inbox/Foo.md`. Preserves the user's `.md` extension or appends
|
|
765
|
+
// it if missing (renameNote handles that anyway).
|
|
766
|
+
const basename = path.basename(args.path);
|
|
767
|
+
const renameArgs = {
|
|
768
|
+
from: args.path,
|
|
769
|
+
to: `${folder}/${basename}`
|
|
770
|
+
};
|
|
771
|
+
if (args.dry_run !== undefined)
|
|
772
|
+
renameArgs.dry_run = args.dry_run;
|
|
773
|
+
if (args.overwrite !== undefined)
|
|
774
|
+
renameArgs.overwrite = args.overwrite;
|
|
775
|
+
return renameNote(vault, renameArgs);
|
|
776
|
+
}
|
|
777
|
+
export async function replaceInNotes(vault, args) {
|
|
778
|
+
await vault.ensureExists();
|
|
779
|
+
const dryRun = args.dry_run === true;
|
|
780
|
+
const caseSensitive = args.case_sensitive !== false;
|
|
781
|
+
if (!args.search) {
|
|
782
|
+
throw new Error("replace_in_notes: `search` must be a non-empty string");
|
|
783
|
+
}
|
|
784
|
+
if (args.search === args.replace) {
|
|
785
|
+
throw new Error("replace_in_notes: `search` and `replace` are identical — no-op refused");
|
|
786
|
+
}
|
|
787
|
+
// v2.0.0-beta.2 P2 fix: reject early if `args.folder` itself is excluded.
|
|
788
|
+
// Pre-fix, listMarkdown(excludedFolder) returned [] and the response said
|
|
789
|
+
// "scope: 02_Personal/, files_scanned: 0" — confirming the folder name
|
|
790
|
+
// existed in the user's vault layout. Now we refuse, returning a clean
|
|
791
|
+
// error that doesn't reveal whether the folder is real-but-empty,
|
|
792
|
+
// real-but-excluded, or nonexistent.
|
|
793
|
+
// Test both `<folder>` (folder itself excluded) and `<folder>/_probe.md`
|
|
794
|
+
// (a representative path inside) — the user's glob may use `**` which
|
|
795
|
+
// matches subpaths but not the bare folder name.
|
|
796
|
+
if (args.folder) {
|
|
797
|
+
const folderTrim = args.folder.replace(/\/+$/, "");
|
|
798
|
+
if (vault.isExcluded(folderTrim) || vault.isExcluded(`${folderTrim}/_probe.md`)) {
|
|
799
|
+
throw new Error(`replace_in_notes: folder is excluded by privacy filter: ${args.folder}`);
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
803
|
+
const plan = [];
|
|
804
|
+
let total = 0;
|
|
805
|
+
for (const e of entries) {
|
|
806
|
+
const { content } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
807
|
+
const { content: rewritten, count } = replaceStringOutsideCodeFences(content, args.search, args.replace, caseSensitive);
|
|
808
|
+
if (count === 0)
|
|
809
|
+
continue;
|
|
810
|
+
plan.push({ path: e.relPath, before: content, after: rewritten, count });
|
|
811
|
+
total += count;
|
|
812
|
+
}
|
|
813
|
+
// v2.0.0-beta.2 P1 fix: per-file error collection on apply. Pre-fix, a
|
|
814
|
+
// throw on file 5 of 20 would lose the response — files 1-4 silently
|
|
815
|
+
// committed, agent had no way to discover which. Now we continue past
|
|
816
|
+
// failures, collect errors, and return both `files_updated` (committed)
|
|
817
|
+
// and `errors` (uncommitted) with `partial: true` flag.
|
|
818
|
+
//
|
|
819
|
+
// Systemic-error fast-path: if the vault is read-only OR the first write
|
|
820
|
+
// fails synchronously (e.g. all paths excluded by --read-paths), throw
|
|
821
|
+
// immediately rather than returning a "partial: true" with N errors —
|
|
822
|
+
// that's a config problem, not a per-file failure.
|
|
823
|
+
const updated = [];
|
|
824
|
+
const errors = [];
|
|
825
|
+
if (!dryRun) {
|
|
826
|
+
if (!vault.writeEnabled) {
|
|
827
|
+
throw new Error("Vault is read-only — start the server with --enable-write to allow note creation");
|
|
828
|
+
}
|
|
829
|
+
for (const p of plan) {
|
|
830
|
+
try {
|
|
831
|
+
await vault.writeNote(p.path, p.after, { overwrite: true });
|
|
832
|
+
updated.push({ path: p.path, occurrences: p.count });
|
|
833
|
+
}
|
|
834
|
+
catch (err) {
|
|
835
|
+
errors.push({ path: p.path, message: err instanceof Error ? err.message : String(err) });
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
else {
|
|
840
|
+
for (const p of plan)
|
|
841
|
+
updated.push({ path: p.path, occurrences: p.count });
|
|
842
|
+
}
|
|
843
|
+
const result = {
|
|
844
|
+
search: args.search,
|
|
845
|
+
replace: args.replace,
|
|
846
|
+
case_sensitive: caseSensitive,
|
|
847
|
+
dry_run: dryRun,
|
|
848
|
+
scope: args.folder ?? "(whole vault)",
|
|
849
|
+
files_scanned: entries.length,
|
|
850
|
+
files_updated: updated,
|
|
851
|
+
total_replacements: total,
|
|
852
|
+
partial: errors.length > 0
|
|
853
|
+
};
|
|
854
|
+
if (errors.length > 0)
|
|
855
|
+
result.errors = errors;
|
|
856
|
+
return result;
|
|
857
|
+
}
|
|
858
|
+
/** Given the raw inner text of a wikilink (`Foo|alias`, `Folder/Foo#sec`, etc.)
|
|
859
|
+
* and the resolved target string the parser already extracted, produce the new
|
|
860
|
+
* raw text after the file has been renamed. Preserves alias/section/block and
|
|
861
|
+
* the user's chosen path-qualification convention (bare-basename vs path). */
|
|
862
|
+
function rewriteRawTarget(raw, oldTarget, newBasename, newDir) {
|
|
863
|
+
const wasPathQualified = oldTarget.includes("/");
|
|
864
|
+
const newTargetBare = wasPathQualified
|
|
865
|
+
? newDir === "." || newDir === ""
|
|
866
|
+
? newBasename
|
|
867
|
+
: `${newDir}/${newBasename}`
|
|
868
|
+
: newBasename;
|
|
869
|
+
// The raw text is `<target><suffix>` where suffix starts with the first of
|
|
870
|
+
// |, #, or ^. Find the boundary.
|
|
871
|
+
const pipeIdx = raw.indexOf("|");
|
|
872
|
+
const hashIdx = raw.indexOf("#");
|
|
873
|
+
const blockIdx = raw.indexOf("^");
|
|
874
|
+
const idxs = [pipeIdx, hashIdx, blockIdx].filter((i) => i !== -1);
|
|
875
|
+
const suffixStart = idxs.length === 0 ? raw.length : Math.min(...idxs);
|
|
876
|
+
const suffix = raw.slice(suffixStart);
|
|
877
|
+
return `${newTargetBare}${suffix}`;
|
|
878
|
+
}
|
|
879
|
+
/** Walk file content line by line. Toggle `inFence` at any line that opens or
|
|
880
|
+
* closes a ``` or ~~~ fence. Inside a fence, leave content untouched. Outside,
|
|
881
|
+
* replace each old literal with its new literal. Returns { content, count }
|
|
882
|
+
* where count is the total number of literal replacements applied. */
|
|
883
|
+
function rewriteOutsideCodeFences(content, oldRawsToNew) {
|
|
884
|
+
const lines = content.split("\n");
|
|
885
|
+
let inFence = false;
|
|
886
|
+
let count = 0;
|
|
887
|
+
const out = [];
|
|
888
|
+
for (const line of lines) {
|
|
889
|
+
if (/^\s*(```|~~~)/.test(line)) {
|
|
890
|
+
inFence = !inFence;
|
|
891
|
+
out.push(line);
|
|
892
|
+
continue;
|
|
893
|
+
}
|
|
894
|
+
if (inFence) {
|
|
895
|
+
out.push(line);
|
|
896
|
+
continue;
|
|
897
|
+
}
|
|
898
|
+
let mutated = line;
|
|
899
|
+
for (const [oldRaw, { kind, newRaw }] of oldRawsToNew) {
|
|
900
|
+
const oldLit = `${kind === "embed" ? "![[" : "[["}${oldRaw}]]`;
|
|
901
|
+
const newLit = `${kind === "embed" ? "![[" : "[["}${newRaw}]]`;
|
|
902
|
+
if (oldLit === newLit)
|
|
903
|
+
continue;
|
|
904
|
+
// Use indexOf-based replacement so we count occurrences accurately.
|
|
905
|
+
let idx = mutated.indexOf(oldLit);
|
|
906
|
+
while (idx !== -1) {
|
|
907
|
+
mutated = mutated.slice(0, idx) + newLit + mutated.slice(idx + oldLit.length);
|
|
908
|
+
count += 1;
|
|
909
|
+
idx = mutated.indexOf(oldLit, idx + newLit.length);
|
|
910
|
+
}
|
|
911
|
+
}
|
|
912
|
+
out.push(mutated);
|
|
913
|
+
}
|
|
914
|
+
return { content: out.join("\n"), count };
|
|
915
|
+
}
|
|
916
|
+
/** Generic code-fence-aware string replacer used by replaceInNotes (v1.9).
|
|
917
|
+
* Walks line-by-line, tracks ` ``` ` / `~~~` fences, and replaces every
|
|
918
|
+
* occurrence of `search` with `replace` outside fenced blocks. Case-sensitive
|
|
919
|
+
* by default; pass `caseSensitive: false` for case-insensitive substring
|
|
920
|
+
* match. Returns the rewritten content + replacement count. */
|
|
921
|
+
function replaceStringOutsideCodeFences(content, search, replace, caseSensitive) {
|
|
922
|
+
if (!search)
|
|
923
|
+
return { content, count: 0 };
|
|
924
|
+
const lines = content.split("\n");
|
|
925
|
+
let inFence = false;
|
|
926
|
+
let count = 0;
|
|
927
|
+
const out = [];
|
|
928
|
+
const needle = caseSensitive ? search : search.toLowerCase();
|
|
929
|
+
for (const line of lines) {
|
|
930
|
+
if (/^\s*(```|~~~)/.test(line)) {
|
|
931
|
+
inFence = !inFence;
|
|
932
|
+
out.push(line);
|
|
933
|
+
continue;
|
|
934
|
+
}
|
|
935
|
+
if (inFence) {
|
|
936
|
+
out.push(line);
|
|
937
|
+
continue;
|
|
938
|
+
}
|
|
939
|
+
if (caseSensitive) {
|
|
940
|
+
let mutated = line;
|
|
941
|
+
let idx = mutated.indexOf(needle);
|
|
942
|
+
while (idx !== -1) {
|
|
943
|
+
mutated = mutated.slice(0, idx) + replace + mutated.slice(idx + search.length);
|
|
944
|
+
count += 1;
|
|
945
|
+
idx = mutated.indexOf(needle, idx + replace.length);
|
|
946
|
+
}
|
|
947
|
+
out.push(mutated);
|
|
948
|
+
}
|
|
949
|
+
else {
|
|
950
|
+
// Case-insensitive: walk by lowering only when comparing, but preserve
|
|
951
|
+
// the rest of the original line. Replace verbatim with `replace`.
|
|
952
|
+
let mutated = line;
|
|
953
|
+
let lowered = mutated.toLowerCase();
|
|
954
|
+
let idx = lowered.indexOf(needle);
|
|
955
|
+
while (idx !== -1) {
|
|
956
|
+
mutated = mutated.slice(0, idx) + replace + mutated.slice(idx + search.length);
|
|
957
|
+
lowered = mutated.toLowerCase();
|
|
958
|
+
count += 1;
|
|
959
|
+
idx = lowered.indexOf(needle, idx + replace.length);
|
|
960
|
+
}
|
|
961
|
+
out.push(mutated);
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
return { content: out.join("\n"), count };
|
|
965
|
+
}
|
|
966
|
+
function composeNote(frontmatter, content) {
|
|
967
|
+
if (!frontmatter || Object.keys(frontmatter).length === 0)
|
|
968
|
+
return content;
|
|
969
|
+
// Use gray-matter's stringify (backed by js-yaml) so YAML-special strings —
|
|
970
|
+
// date-like ("2026-05-03"), !-prefixed, pipe-containing, etc. — are
|
|
971
|
+
// round-trip-safe. The hand-rolled renderer this replaced silently corrupted
|
|
972
|
+
// a long tail of valid string values (e.g. "due: 2026-05-03" came back as a
|
|
973
|
+
// Date object on read).
|
|
974
|
+
return matter.stringify(content, frontmatter);
|
|
975
|
+
}
|
|
976
|
+
function extractFrontmatterTagsLower(fm) {
|
|
977
|
+
const raw = fm.tags ?? fm.tag;
|
|
978
|
+
if (!raw)
|
|
979
|
+
return [];
|
|
980
|
+
const list = Array.isArray(raw)
|
|
981
|
+
? raw.filter((t) => typeof t === "string")
|
|
982
|
+
: typeof raw === "string"
|
|
983
|
+
? raw.split(/[,\s]+/).filter(Boolean)
|
|
984
|
+
: [];
|
|
985
|
+
return list.map((t) => t.replace(/^#+/, "").toLowerCase());
|
|
986
|
+
}
|
|
987
|
+
/** Resolve "today"/"daily"/"weekly"/"monthly" to today's periodic-note name
|
|
988
|
+
* using the standard Obsidian Daily-Notes-plugin formats. Custom formats are
|
|
989
|
+
* out of scope (users with non-default conventions address by exact name). */
|
|
990
|
+
function resolvePeriodicAlias(title) {
|
|
991
|
+
const lower = title.trim().toLowerCase();
|
|
992
|
+
if (lower !== "daily" && lower !== "today" && lower !== "weekly" && lower !== "monthly") {
|
|
993
|
+
return null;
|
|
994
|
+
}
|
|
995
|
+
const now = new Date();
|
|
996
|
+
const yyyy = now.getFullYear();
|
|
997
|
+
const mm = String(now.getMonth() + 1).padStart(2, "0");
|
|
998
|
+
const dd = String(now.getDate()).padStart(2, "0");
|
|
999
|
+
if (lower === "daily" || lower === "today")
|
|
1000
|
+
return `${yyyy}-${mm}-${dd}`;
|
|
1001
|
+
if (lower === "monthly")
|
|
1002
|
+
return `${yyyy}-${mm}`;
|
|
1003
|
+
// ISO week number (Mon-based, ISO 8601). Weekly format: YYYY-Www.
|
|
1004
|
+
const target = new Date(Date.UTC(now.getFullYear(), now.getMonth(), now.getDate()));
|
|
1005
|
+
const dayNum = target.getUTCDay() || 7; // Mon=1..Sun=7
|
|
1006
|
+
target.setUTCDate(target.getUTCDate() + 4 - dayNum); // Thursday of this week
|
|
1007
|
+
const yearStart = new Date(Date.UTC(target.getUTCFullYear(), 0, 1));
|
|
1008
|
+
const weekNo = Math.ceil(((target.valueOf() - yearStart.valueOf()) / 86400000 + 1) / 7);
|
|
1009
|
+
return `${target.getUTCFullYear()}-W${String(weekNo).padStart(2, "0")}`;
|
|
1010
|
+
}
|
|
1011
|
+
/** Up to 3 vault-relative paths whose basename or relPath looks similar to
|
|
1012
|
+
* the missing target. Used to enrich `Note not found` errors with did-you-mean
|
|
1013
|
+
* hints — meaningful for LLMs that mistype a note name. */
|
|
1014
|
+
async function suggestSimilar(vault, target) {
|
|
1015
|
+
try {
|
|
1016
|
+
const all = await vault.listMarkdown();
|
|
1017
|
+
const lower = target.toLowerCase().replace(/\.md$/i, "");
|
|
1018
|
+
const ranked = all
|
|
1019
|
+
.map((e) => {
|
|
1020
|
+
const baseLower = stripMd(e.basename).toLowerCase();
|
|
1021
|
+
const relLower = e.relPath.toLowerCase();
|
|
1022
|
+
let score = 0;
|
|
1023
|
+
if (baseLower === lower)
|
|
1024
|
+
score = 100;
|
|
1025
|
+
else if (baseLower.startsWith(lower) || lower.startsWith(baseLower))
|
|
1026
|
+
score = 70;
|
|
1027
|
+
else if (baseLower.includes(lower) || lower.includes(baseLower))
|
|
1028
|
+
score = 50;
|
|
1029
|
+
else if (relLower.includes(lower))
|
|
1030
|
+
score = 30;
|
|
1031
|
+
return { path: e.relPath, score };
|
|
1032
|
+
})
|
|
1033
|
+
.filter((r) => r.score > 0)
|
|
1034
|
+
.sort((a, b) => b.score - a.score)
|
|
1035
|
+
.slice(0, 3);
|
|
1036
|
+
return ranked.map((r) => r.path);
|
|
1037
|
+
}
|
|
1038
|
+
catch {
|
|
1039
|
+
return [];
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
1042
|
+
async function resolveTarget(vault, args) {
|
|
1043
|
+
if (args.path) {
|
|
1044
|
+
const candidates = args.path.toLowerCase().endsWith(".md") ? [args.path] : [args.path, `${args.path}.md`];
|
|
1045
|
+
let lastErr;
|
|
1046
|
+
for (const candidate of candidates) {
|
|
1047
|
+
const abs = vault.resolveInside(candidate);
|
|
1048
|
+
try {
|
|
1049
|
+
const stat = await vault.stat(abs);
|
|
1050
|
+
return {
|
|
1051
|
+
absPath: abs,
|
|
1052
|
+
relPath: vault.toRel(abs),
|
|
1053
|
+
basename: path.basename(abs),
|
|
1054
|
+
mtimeMs: stat.mtimeMs
|
|
1055
|
+
};
|
|
1056
|
+
}
|
|
1057
|
+
catch (err) {
|
|
1058
|
+
lastErr = err;
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
const suggestions = await suggestSimilar(vault, args.path);
|
|
1062
|
+
const hint = suggestions.length ? `. Did you mean: ${suggestions.join(", ")}?` : "";
|
|
1063
|
+
throw lastErr instanceof Error
|
|
1064
|
+
? new Error(`${lastErr.message}${hint}`)
|
|
1065
|
+
: new Error(`Note not found: ${args.path}${hint}`);
|
|
1066
|
+
}
|
|
1067
|
+
if (args.title) {
|
|
1068
|
+
// Try literal title first — a user may have an actual file named
|
|
1069
|
+
// "Daily.md" / "Today.md" they meant to address. Only fall back to the
|
|
1070
|
+
// periodic-note alias when the literal lookup misses.
|
|
1071
|
+
const literal = await vault.findByTitle(args.title);
|
|
1072
|
+
if (literal)
|
|
1073
|
+
return literal;
|
|
1074
|
+
// v1.10: try the user's Daily / Periodic Notes plugin config first. The
|
|
1075
|
+
// user may have configured `Daily Notes/YYYY-MM-DD` or a custom format —
|
|
1076
|
+
// honor that before the v0.11 hard-coded defaults.
|
|
1077
|
+
const periodicConfig = await vault.getPeriodicConfig();
|
|
1078
|
+
const periodicResolved = resolvePeriodicNoteName(args.title, periodicConfig);
|
|
1079
|
+
if (periodicResolved) {
|
|
1080
|
+
// The user's config produced a vault-relative path stem. Look it up by
|
|
1081
|
+
// path (with .md appended); if THAT misses, fall back to basename match
|
|
1082
|
+
// for users whose plugin folder is empty (vault-root files).
|
|
1083
|
+
try {
|
|
1084
|
+
const tryPath = `${periodicResolved.relPath}.md`;
|
|
1085
|
+
const abs = vault.resolveInside(tryPath);
|
|
1086
|
+
const stat = await vault.stat(abs);
|
|
1087
|
+
return {
|
|
1088
|
+
absPath: abs,
|
|
1089
|
+
relPath: vault.toRel(abs),
|
|
1090
|
+
basename: path.basename(abs),
|
|
1091
|
+
mtimeMs: stat.mtimeMs
|
|
1092
|
+
};
|
|
1093
|
+
}
|
|
1094
|
+
catch (err) {
|
|
1095
|
+
// v1.11.1: surface exclusion errors instead of masking them as
|
|
1096
|
+
// "not found". The path-based lookup above already does this via
|
|
1097
|
+
// lastErr — keep both codepaths consistent. Exclusion errors come
|
|
1098
|
+
// from a user's own --read-paths / --exclude-glob config, so they
|
|
1099
|
+
// deserve a clear "excluded" message rather than silent fallthrough
|
|
1100
|
+
// to the legacy alias resolver (which won't help anyway).
|
|
1101
|
+
if (err instanceof Error && /excluded by --(read-paths|exclude-glob)/.test(err.message)) {
|
|
1102
|
+
throw err;
|
|
1103
|
+
}
|
|
1104
|
+
// Fall through to basename match on ENOENT-class errors only.
|
|
1105
|
+
}
|
|
1106
|
+
// v2.0.0-beta.2 P1 fix: only fall through to basename match if the
|
|
1107
|
+
// user's periodic config produces a folder-less stem (i.e., they keep
|
|
1108
|
+
// periodic notes at the vault root). If they configured a specific
|
|
1109
|
+
// folder, returning a same-basename note from a DIFFERENT folder is a
|
|
1110
|
+
// privacy/correctness hazard — silently redirects "today" to a note
|
|
1111
|
+
// the user never configured. The architecture audit (P1-4) traced an
|
|
1112
|
+
// exploit: with `--exclude-glob 'Daily Notes/**'` set AND a Public/
|
|
1113
|
+
// file named `2026-05-08.md`, basename match would surface that
|
|
1114
|
+
// unrelated note as "today".
|
|
1115
|
+
const periodicHasFolder = periodicResolved.relPath.includes("/");
|
|
1116
|
+
if (!periodicHasFolder) {
|
|
1117
|
+
const basenameMatch = await vault.findByTitle(path.basename(periodicResolved.relPath));
|
|
1118
|
+
if (basenameMatch)
|
|
1119
|
+
return basenameMatch;
|
|
1120
|
+
}
|
|
1121
|
+
}
|
|
1122
|
+
// Last-resort: legacy v0.11 hard-coded alias resolver, in case the user
|
|
1123
|
+
// has neither plugin configured but expects the default formats to work.
|
|
1124
|
+
const aliased = resolvePeriodicAlias(args.title);
|
|
1125
|
+
if (aliased) {
|
|
1126
|
+
const aliasMatch = await vault.findByTitle(aliased);
|
|
1127
|
+
if (aliasMatch)
|
|
1128
|
+
return aliasMatch;
|
|
1129
|
+
}
|
|
1130
|
+
const suggestions = await suggestSimilar(vault, args.title);
|
|
1131
|
+
const hint = suggestions.length ? `. Did you mean: ${suggestions.join(", ")}?` : "";
|
|
1132
|
+
const aliasHint = periodicResolved ? ` (also tried periodic alias "${periodicResolved.relPath}")` : "";
|
|
1133
|
+
throw new Error(`No note found with title: ${args.title}${aliasHint}${hint}`);
|
|
1134
|
+
}
|
|
1135
|
+
throw new Error("Either path or title is required");
|
|
1136
|
+
}
|
|
1137
|
+
export async function validateNoteProposal(vault, args) {
|
|
1138
|
+
await vault.ensureExists();
|
|
1139
|
+
const mode = args.mode ?? "create";
|
|
1140
|
+
const errors = [];
|
|
1141
|
+
const warnings = [];
|
|
1142
|
+
// 1. Path sanity. resolveInside throws on traversal — capture as error,
|
|
1143
|
+
// don't let it propagate as a generic exception (the validator should
|
|
1144
|
+
// return a structured result for ANY input).
|
|
1145
|
+
let normalizedPath = args.path.toLowerCase().endsWith(".md") ? args.path : `${args.path}.md`;
|
|
1146
|
+
let absPath = null;
|
|
1147
|
+
try {
|
|
1148
|
+
absPath = vault.resolveInside(normalizedPath);
|
|
1149
|
+
normalizedPath = vault.toRel(absPath);
|
|
1150
|
+
}
|
|
1151
|
+
catch (err) {
|
|
1152
|
+
errors.push({
|
|
1153
|
+
kind: "path-traversal",
|
|
1154
|
+
message: err instanceof Error ? err.message : String(err)
|
|
1155
|
+
});
|
|
1156
|
+
}
|
|
1157
|
+
// 2. YAML parse via gray-matter (the same parser used at write time).
|
|
1158
|
+
const yamlReport = { parsed: false, error: null, keys: [] };
|
|
1159
|
+
let bodyAfterFm = args.content;
|
|
1160
|
+
try {
|
|
1161
|
+
const parsed = matter(args.content);
|
|
1162
|
+
yamlReport.parsed = true;
|
|
1163
|
+
yamlReport.keys = Object.keys(parsed.data ?? {});
|
|
1164
|
+
bodyAfterFm = parsed.content;
|
|
1165
|
+
}
|
|
1166
|
+
catch (err) {
|
|
1167
|
+
yamlReport.error = err instanceof Error ? err.message : String(err);
|
|
1168
|
+
errors.push({ kind: "yaml-invalid", message: `YAML frontmatter could not be parsed: ${yamlReport.error}` });
|
|
1169
|
+
}
|
|
1170
|
+
// 3. Wikilink resolution against the live vault.
|
|
1171
|
+
const all = await vault.listMarkdown();
|
|
1172
|
+
const wikilinkRe = /(?<!!)\[\[([^\]\n]+?)\]\]/g;
|
|
1173
|
+
const wikilinks = [];
|
|
1174
|
+
for (const m of bodyAfterFm.matchAll(wikilinkRe)) {
|
|
1175
|
+
const raw = m[0];
|
|
1176
|
+
const inner = (m[1] ?? "").trim();
|
|
1177
|
+
if (!inner)
|
|
1178
|
+
continue;
|
|
1179
|
+
// Strip alias / section / block to get the bare target name.
|
|
1180
|
+
const beforePipe = inner.split("|")[0] ?? "";
|
|
1181
|
+
const beforeHash = beforePipe.split("#")[0] ?? "";
|
|
1182
|
+
const target = beforeHash.split("^")[0]?.trim() ?? "";
|
|
1183
|
+
if (!target)
|
|
1184
|
+
continue;
|
|
1185
|
+
const match = findBestMatch(all, target, normalizedPath);
|
|
1186
|
+
if (match) {
|
|
1187
|
+
wikilinks.push({
|
|
1188
|
+
raw,
|
|
1189
|
+
target,
|
|
1190
|
+
status: "resolved",
|
|
1191
|
+
resolved_path: match.relPath,
|
|
1192
|
+
suggestions: []
|
|
1193
|
+
});
|
|
1194
|
+
}
|
|
1195
|
+
else {
|
|
1196
|
+
const suggestions = await suggestSimilar(vault, target);
|
|
1197
|
+
wikilinks.push({
|
|
1198
|
+
raw,
|
|
1199
|
+
target,
|
|
1200
|
+
status: "broken",
|
|
1201
|
+
resolved_path: null,
|
|
1202
|
+
suggestions
|
|
1203
|
+
});
|
|
1204
|
+
warnings.push({
|
|
1205
|
+
kind: "broken-wikilink",
|
|
1206
|
+
message: `[[${target}]] does not resolve to any existing note`,
|
|
1207
|
+
suggestion: suggestions.length ? `Closest matches: ${suggestions.join(", ")}` : undefined
|
|
1208
|
+
});
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
1211
|
+
// 4. Tag pre-classification (existing vs new).
|
|
1212
|
+
const existingTags = new Set((await listTags(vault, {})).map((t) => t.tag.toLowerCase()));
|
|
1213
|
+
const proposedTagsRaw = new Set();
|
|
1214
|
+
// Frontmatter tags.
|
|
1215
|
+
const fmData = yamlReport.parsed ? matter(args.content).data : {};
|
|
1216
|
+
const fmTags = fmData.tags ?? fmData.tag;
|
|
1217
|
+
if (Array.isArray(fmTags)) {
|
|
1218
|
+
for (const t of fmTags)
|
|
1219
|
+
if (typeof t === "string" && t)
|
|
1220
|
+
proposedTagsRaw.add(t.replace(/^#/, ""));
|
|
1221
|
+
}
|
|
1222
|
+
else if (typeof fmTags === "string" && fmTags) {
|
|
1223
|
+
for (const t of fmTags.split(/[\s,]+/))
|
|
1224
|
+
if (t)
|
|
1225
|
+
proposedTagsRaw.add(t.replace(/^#/, ""));
|
|
1226
|
+
}
|
|
1227
|
+
// Inline tags.
|
|
1228
|
+
const inlineTagRe = /(?:^|[\s([{>])#([\p{L}][\p{L}\p{N}_/-]*)/gu;
|
|
1229
|
+
for (const m of bodyAfterFm.matchAll(inlineTagRe)) {
|
|
1230
|
+
if (m[1])
|
|
1231
|
+
proposedTagsRaw.add(m[1]);
|
|
1232
|
+
}
|
|
1233
|
+
const tags = [];
|
|
1234
|
+
for (const t of proposedTagsRaw) {
|
|
1235
|
+
const status = existingTags.has(t.toLowerCase()) ? "existing" : "new";
|
|
1236
|
+
tags.push({ name: t, status });
|
|
1237
|
+
if (status === "new") {
|
|
1238
|
+
warnings.push({
|
|
1239
|
+
kind: "new-tag",
|
|
1240
|
+
message: `#${t} is new — won't fork an existing tag (case-insensitive check)`
|
|
1241
|
+
});
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
// 5. Path collision check.
|
|
1245
|
+
let collision = { kind: "none" };
|
|
1246
|
+
if (absPath) {
|
|
1247
|
+
try {
|
|
1248
|
+
await vault.stat(absPath);
|
|
1249
|
+
// Path exists.
|
|
1250
|
+
if (mode === "create") {
|
|
1251
|
+
errors.push({
|
|
1252
|
+
kind: "path-collision",
|
|
1253
|
+
message: `Note already exists at ${normalizedPath} (mode="create" refuses overwrite)`
|
|
1254
|
+
});
|
|
1255
|
+
}
|
|
1256
|
+
collision = { kind: "path-exists", existing_path: normalizedPath };
|
|
1257
|
+
}
|
|
1258
|
+
catch {
|
|
1259
|
+
// Path doesn't exist — try title collision (an existing note at a different path).
|
|
1260
|
+
const titleFromBasename = stripMd(path.basename(normalizedPath));
|
|
1261
|
+
const existing = await vault.findByTitle(titleFromBasename);
|
|
1262
|
+
if (existing && existing.relPath !== normalizedPath) {
|
|
1263
|
+
warnings.push({
|
|
1264
|
+
kind: "title-collision",
|
|
1265
|
+
message: `A note titled "${titleFromBasename}" already exists at ${existing.relPath} — proceeding will create a same-titled file at a different path`,
|
|
1266
|
+
suggestion: existing.relPath
|
|
1267
|
+
});
|
|
1268
|
+
collision = { kind: "title-exists-elsewhere", existing_path: existing.relPath };
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
return {
|
|
1273
|
+
ok: errors.length === 0,
|
|
1274
|
+
proposed_path: normalizedPath,
|
|
1275
|
+
mode,
|
|
1276
|
+
errors,
|
|
1277
|
+
warnings,
|
|
1278
|
+
yaml: yamlReport,
|
|
1279
|
+
wikilinks,
|
|
1280
|
+
tags,
|
|
1281
|
+
collision
|
|
1282
|
+
};
|
|
1283
|
+
}
|
|
1284
|
+
export async function findSimilar(vault, args) {
|
|
1285
|
+
await vault.ensureExists();
|
|
1286
|
+
const limit = args.limit ?? 10;
|
|
1287
|
+
const minScore = args.min_score ?? 0.05;
|
|
1288
|
+
const target = await resolveTarget(vault, args);
|
|
1289
|
+
const entries = await vault.listMarkdown();
|
|
1290
|
+
const metas = new Map();
|
|
1291
|
+
for (const e of entries) {
|
|
1292
|
+
const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
1293
|
+
const tags = new Set(parsed.tags.map((t) => t.toLowerCase()));
|
|
1294
|
+
const title3grams = ngrams(stripMd(e.basename).toLowerCase(), 3);
|
|
1295
|
+
const outbound = new Set();
|
|
1296
|
+
for (const link of parsed.wikilinks) {
|
|
1297
|
+
const m = findBestMatch(entries, link.target, e.relPath);
|
|
1298
|
+
if (m)
|
|
1299
|
+
outbound.add(m.relPath);
|
|
1300
|
+
}
|
|
1301
|
+
metas.set(e.relPath, { entry: e, tags, title3grams, outbound });
|
|
1302
|
+
}
|
|
1303
|
+
const targetMeta = metas.get(target.relPath);
|
|
1304
|
+
if (!targetMeta) {
|
|
1305
|
+
// The target was found by resolveTarget but may have been excluded from
|
|
1306
|
+
// listMarkdown by --exclude-glob. Treat as zero results rather than crash.
|
|
1307
|
+
return [];
|
|
1308
|
+
}
|
|
1309
|
+
// For co-backlink: build "who links to X?" for everyone we care about
|
|
1310
|
+
// (target + all candidates). Single pass over outbound sets.
|
|
1311
|
+
const inboundFor = new Map();
|
|
1312
|
+
for (const [from, m] of metas) {
|
|
1313
|
+
for (const to of m.outbound) {
|
|
1314
|
+
const set = inboundFor.get(to) ?? new Set();
|
|
1315
|
+
set.add(from);
|
|
1316
|
+
inboundFor.set(to, set);
|
|
1317
|
+
}
|
|
1318
|
+
}
|
|
1319
|
+
const targetInbound = inboundFor.get(target.relPath) ?? new Set();
|
|
1320
|
+
const out = [];
|
|
1321
|
+
for (const [relPath, m] of metas) {
|
|
1322
|
+
if (relPath === target.relPath)
|
|
1323
|
+
continue;
|
|
1324
|
+
const tagJ = jaccard(targetMeta.tags, m.tags);
|
|
1325
|
+
const titleJ = jaccard(targetMeta.title3grams, m.title3grams);
|
|
1326
|
+
const candInbound = inboundFor.get(relPath) ?? new Set();
|
|
1327
|
+
// shared_outbound: how much of A's outbound is also in B's
|
|
1328
|
+
const sharedOut = targetMeta.outbound.size === 0 ? 0 : intersectionSize(targetMeta.outbound, m.outbound) / targetMeta.outbound.size;
|
|
1329
|
+
// co_backlink: how many notes link to both target and candidate, over union
|
|
1330
|
+
const coBack = jaccard(targetInbound, candInbound);
|
|
1331
|
+
const score = 3.0 * tagJ + 1.5 * titleJ + 2.0 * sharedOut + 2.0 * coBack;
|
|
1332
|
+
if (score < minScore)
|
|
1333
|
+
continue;
|
|
1334
|
+
const shared = [];
|
|
1335
|
+
for (const t of targetMeta.tags)
|
|
1336
|
+
if (m.tags.has(t))
|
|
1337
|
+
shared.push(t);
|
|
1338
|
+
shared.sort();
|
|
1339
|
+
out.push({
|
|
1340
|
+
path: m.entry.relPath,
|
|
1341
|
+
title: stripMd(m.entry.basename),
|
|
1342
|
+
score: Math.round(score * 10000) / 10000,
|
|
1343
|
+
signals: {
|
|
1344
|
+
tag_jaccard: Math.round(tagJ * 10000) / 10000,
|
|
1345
|
+
title_3gram: Math.round(titleJ * 10000) / 10000,
|
|
1346
|
+
shared_outbound: Math.round(sharedOut * 10000) / 10000,
|
|
1347
|
+
co_backlink: Math.round(coBack * 10000) / 10000
|
|
1348
|
+
},
|
|
1349
|
+
shared_tags: shared,
|
|
1350
|
+
mtime: new Date(m.entry.mtimeMs).toISOString()
|
|
1351
|
+
});
|
|
1352
|
+
}
|
|
1353
|
+
out.sort((a, b) => b.score - a.score);
|
|
1354
|
+
return out.slice(0, limit);
|
|
1355
|
+
}
|
|
1356
|
+
export async function getNoteNeighbors(vault, args) {
|
|
1357
|
+
await vault.ensureExists();
|
|
1358
|
+
const cap = args.max_per_bucket ?? 20;
|
|
1359
|
+
const target = await resolveTarget(vault, args);
|
|
1360
|
+
const entries = await vault.listMarkdown();
|
|
1361
|
+
const { parsed: targetParsed } = await vault.readNote(target.absPath, target.mtimeMs);
|
|
1362
|
+
const targetTagsLower = new Set(targetParsed.tags.map((t) => t.toLowerCase()));
|
|
1363
|
+
// Outbound: resolved unique destinations from the target.
|
|
1364
|
+
const seenOut = new Set();
|
|
1365
|
+
const outbound = [];
|
|
1366
|
+
for (const link of targetParsed.wikilinks) {
|
|
1367
|
+
const m = findBestMatch(entries, link.target, target.relPath);
|
|
1368
|
+
if (!m || seenOut.has(m.relPath))
|
|
1369
|
+
continue;
|
|
1370
|
+
seenOut.add(m.relPath);
|
|
1371
|
+
const { parsed: nbrParsed } = await vault.readNote(m.absPath, m.mtimeMs);
|
|
1372
|
+
outbound.push({ path: m.relPath, title: stripMd(m.basename), tags: nbrParsed.tags });
|
|
1373
|
+
if (outbound.length >= cap)
|
|
1374
|
+
break;
|
|
1375
|
+
}
|
|
1376
|
+
// Inbound: notes that link to target, with backlink count.
|
|
1377
|
+
const inboundCounts = new Map();
|
|
1378
|
+
for (const e of entries) {
|
|
1379
|
+
if (e.absPath === target.absPath)
|
|
1380
|
+
continue;
|
|
1381
|
+
const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
1382
|
+
let cnt = 0;
|
|
1383
|
+
for (const link of parsed.wikilinks) {
|
|
1384
|
+
const m = findBestMatch(entries, link.target, e.relPath);
|
|
1385
|
+
if (m && m.absPath === target.absPath)
|
|
1386
|
+
cnt += 1;
|
|
1387
|
+
}
|
|
1388
|
+
if (cnt > 0)
|
|
1389
|
+
inboundCounts.set(e.relPath, { entry: e, count: cnt, tags: parsed.tags });
|
|
1390
|
+
}
|
|
1391
|
+
const inbound = [...inboundCounts.values()]
|
|
1392
|
+
.sort((a, b) => b.count - a.count)
|
|
1393
|
+
.slice(0, cap)
|
|
1394
|
+
.map((x) => ({ path: x.entry.relPath, title: stripMd(x.entry.basename), tags: x.tags, count: x.count }));
|
|
1395
|
+
// Tag siblings: notes sharing ≥1 tag with target, excluding outbound/inbound.
|
|
1396
|
+
const tag_siblings = [];
|
|
1397
|
+
if (targetTagsLower.size > 0) {
|
|
1398
|
+
const exclude = new Set([target.relPath, ...seenOut, ...inboundCounts.keys()]);
|
|
1399
|
+
const candidates = [];
|
|
1400
|
+
for (const e of entries) {
|
|
1401
|
+
if (exclude.has(e.relPath))
|
|
1402
|
+
continue;
|
|
1403
|
+
const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
1404
|
+
const shared = [];
|
|
1405
|
+
for (const t of parsed.tags) {
|
|
1406
|
+
if (targetTagsLower.has(t.toLowerCase()))
|
|
1407
|
+
shared.push(t);
|
|
1408
|
+
}
|
|
1409
|
+
if (shared.length > 0) {
|
|
1410
|
+
candidates.push({ path: e.relPath, title: stripMd(e.basename), shared });
|
|
1411
|
+
}
|
|
1412
|
+
}
|
|
1413
|
+
candidates.sort((a, b) => b.shared.length - a.shared.length);
|
|
1414
|
+
for (const c of candidates.slice(0, cap)) {
|
|
1415
|
+
tag_siblings.push({ path: c.path, title: c.title, shared_tags: c.shared });
|
|
1416
|
+
}
|
|
1417
|
+
}
|
|
1418
|
+
return {
|
|
1419
|
+
center: {
|
|
1420
|
+
path: target.relPath,
|
|
1421
|
+
title: stripMd(target.basename),
|
|
1422
|
+
tags: targetParsed.tags,
|
|
1423
|
+
mtime: new Date(target.mtimeMs).toISOString()
|
|
1424
|
+
},
|
|
1425
|
+
outbound,
|
|
1426
|
+
inbound,
|
|
1427
|
+
tag_siblings
|
|
1428
|
+
};
|
|
1429
|
+
}
|
|
1430
|
+
export async function getVaultStats(vault, args) {
|
|
1431
|
+
await vault.ensureExists();
|
|
1432
|
+
const topTagsLimit = args.top_tags ?? 10;
|
|
1433
|
+
const entries = await vault.listMarkdown();
|
|
1434
|
+
const sevenDaysMs = Date.now() - 7 * 24 * 3600 * 1000;
|
|
1435
|
+
let totalSize = 0;
|
|
1436
|
+
let totalWords = 0;
|
|
1437
|
+
let recent = 0;
|
|
1438
|
+
let withFm = 0;
|
|
1439
|
+
const tagCounts = new Map();
|
|
1440
|
+
// Build inbound map in one pass so orphans and broken counts are O(N).
|
|
1441
|
+
const inbound = new Map();
|
|
1442
|
+
let broken = 0;
|
|
1443
|
+
// outboundPresence is collected in the same single pass (cache hits keep
|
|
1444
|
+
// this O(N) instead of the previous O(2N) re-read).
|
|
1445
|
+
const outboundPresence = new Set();
|
|
1446
|
+
for (const e of entries) {
|
|
1447
|
+
const { content, parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
1448
|
+
totalSize += Buffer.byteLength(content, "utf8");
|
|
1449
|
+
totalWords += content.trim() ? content.trim().split(/\s+/).length : 0;
|
|
1450
|
+
if (e.mtimeMs >= sevenDaysMs)
|
|
1451
|
+
recent += 1;
|
|
1452
|
+
if (Object.keys(parsed.frontmatter).length > 0)
|
|
1453
|
+
withFm += 1;
|
|
1454
|
+
if (parsed.wikilinks.length > 0)
|
|
1455
|
+
outboundPresence.add(e.relPath);
|
|
1456
|
+
for (const t of parsed.tags) {
|
|
1457
|
+
const key = t.toLowerCase();
|
|
1458
|
+
tagCounts.set(key, (tagCounts.get(key) ?? 0) + 1);
|
|
1459
|
+
}
|
|
1460
|
+
for (const link of parsed.wikilinks) {
|
|
1461
|
+
const m = findBestMatch(entries, link.target, e.relPath);
|
|
1462
|
+
if (!m) {
|
|
1463
|
+
broken += 1;
|
|
1464
|
+
continue;
|
|
1465
|
+
}
|
|
1466
|
+
inbound.set(m.relPath, (inbound.get(m.relPath) ?? 0) + 1);
|
|
1467
|
+
}
|
|
1468
|
+
}
|
|
1469
|
+
let orphans = 0;
|
|
1470
|
+
for (const e of entries) {
|
|
1471
|
+
if (!inbound.get(e.relPath) && !outboundPresence.has(e.relPath))
|
|
1472
|
+
orphans += 1;
|
|
1473
|
+
}
|
|
1474
|
+
const top_tags = [...tagCounts.entries()]
|
|
1475
|
+
.sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))
|
|
1476
|
+
.slice(0, topTagsLimit)
|
|
1477
|
+
.map(([tag, count]) => ({ tag, count }));
|
|
1478
|
+
return {
|
|
1479
|
+
total_notes: entries.length,
|
|
1480
|
+
total_size_bytes: totalSize,
|
|
1481
|
+
avg_note_words: entries.length === 0 ? 0 : Math.round(totalWords / entries.length),
|
|
1482
|
+
recently_modified_7d: recent,
|
|
1483
|
+
orphans,
|
|
1484
|
+
broken_wikilinks: broken,
|
|
1485
|
+
total_tags: tagCounts.size,
|
|
1486
|
+
top_tags,
|
|
1487
|
+
notes_with_frontmatter: withFm,
|
|
1488
|
+
generated_at: new Date().toISOString()
|
|
1489
|
+
};
|
|
1490
|
+
}
|
|
1491
|
+
export async function lintWiki(vault, args) {
|
|
1492
|
+
await vault.ensureExists();
|
|
1493
|
+
const stubThreshold = args.stub_word_threshold ?? 100;
|
|
1494
|
+
const staleDays = args.stale_days ?? 365;
|
|
1495
|
+
const conceptMinMentions = args.concept_min_mentions ?? 3;
|
|
1496
|
+
const cap = args.max_per_bucket ?? 50;
|
|
1497
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
1498
|
+
const allEntries = await vault.listMarkdown();
|
|
1499
|
+
const staleMs = Date.now() - staleDays * 24 * 3600 * 1000;
|
|
1500
|
+
// Single pass: collect inbound counts, outbound presence, broken links,
|
|
1501
|
+
// word counts, last-reviewed times, capitalised-phrase mentions.
|
|
1502
|
+
const inbound = new Map();
|
|
1503
|
+
const outboundPresence = new Set();
|
|
1504
|
+
const broken = [];
|
|
1505
|
+
const stubs = [];
|
|
1506
|
+
const stale = [];
|
|
1507
|
+
const titleSet = new Set();
|
|
1508
|
+
for (const e of allEntries)
|
|
1509
|
+
titleSet.add(stripMd(e.basename).toLowerCase());
|
|
1510
|
+
// Capitalised-phrase mentions across the whole vault. A phrase is 1-3
|
|
1511
|
+
// CapitalCase tokens (e.g. "Reinforcement Learning", "Attention Heads").
|
|
1512
|
+
// Stop-words: dropped when they appear at the start of a phrase.
|
|
1513
|
+
const conceptStopwords = new Set([
|
|
1514
|
+
"The",
|
|
1515
|
+
"A",
|
|
1516
|
+
"An",
|
|
1517
|
+
"This",
|
|
1518
|
+
"That",
|
|
1519
|
+
"These",
|
|
1520
|
+
"Those",
|
|
1521
|
+
"If",
|
|
1522
|
+
"When",
|
|
1523
|
+
"While",
|
|
1524
|
+
"But",
|
|
1525
|
+
"And",
|
|
1526
|
+
"Or"
|
|
1527
|
+
]);
|
|
1528
|
+
const capPhraseRe = /\b((?:[A-Z][a-z][a-z]+(?:\s+[A-Z][a-z][a-z]+){0,2}))\b/g;
|
|
1529
|
+
const conceptMentions = new Map(); // phrase → set of source paths
|
|
1530
|
+
for (const e of entries) {
|
|
1531
|
+
const { parsed, mtimeMs } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
1532
|
+
// Outbound + broken pass.
|
|
1533
|
+
if (parsed.wikilinks.length > 0)
|
|
1534
|
+
outboundPresence.add(e.relPath);
|
|
1535
|
+
for (const link of parsed.wikilinks) {
|
|
1536
|
+
const m = findBestMatch(allEntries, link.target, e.relPath);
|
|
1537
|
+
if (m) {
|
|
1538
|
+
inbound.set(m.relPath, (inbound.get(m.relPath) ?? 0) + 1);
|
|
1539
|
+
}
|
|
1540
|
+
else if (broken.length < cap) {
|
|
1541
|
+
broken.push({
|
|
1542
|
+
kind: "broken-link",
|
|
1543
|
+
path: e.relPath,
|
|
1544
|
+
message: `[[${link.target}]] in ${e.relPath} doesn't resolve`,
|
|
1545
|
+
suggestion: "create the missing note, fix the link, or remove it",
|
|
1546
|
+
details: { target: link.target, raw: link.raw }
|
|
1547
|
+
});
|
|
1548
|
+
}
|
|
1549
|
+
}
|
|
1550
|
+
// Stub pass.
|
|
1551
|
+
const wordCount = parsed.body.trim() ? parsed.body.trim().split(/\s+/).length : 0;
|
|
1552
|
+
if (wordCount < stubThreshold && stubs.length < cap) {
|
|
1553
|
+
stubs.push({
|
|
1554
|
+
kind: "stub",
|
|
1555
|
+
path: e.relPath,
|
|
1556
|
+
message: `${e.relPath} is ${wordCount} words (threshold ${stubThreshold})`,
|
|
1557
|
+
suggestion: "develop, merge into a hub, or archive",
|
|
1558
|
+
details: { word_count: wordCount, mtime: new Date(mtimeMs).toISOString() }
|
|
1559
|
+
});
|
|
1560
|
+
}
|
|
1561
|
+
// Stale pass — frontmatter `last_reviewed` overrides mtime if present.
|
|
1562
|
+
// gray-matter (js-yaml) parses ISO dates into Date objects automatically,
|
|
1563
|
+
// so we accept Date | string | number.
|
|
1564
|
+
const lastReviewedRaw = parsed.frontmatter?.last_reviewed ?? parsed.frontmatter?.["last-reviewed"];
|
|
1565
|
+
let lastTouchedMs = mtimeMs;
|
|
1566
|
+
if (lastReviewedRaw instanceof Date) {
|
|
1567
|
+
const t = lastReviewedRaw.getTime();
|
|
1568
|
+
if (Number.isFinite(t))
|
|
1569
|
+
lastTouchedMs = t;
|
|
1570
|
+
}
|
|
1571
|
+
else if (typeof lastReviewedRaw === "string") {
|
|
1572
|
+
const t = Date.parse(lastReviewedRaw);
|
|
1573
|
+
if (Number.isFinite(t))
|
|
1574
|
+
lastTouchedMs = t;
|
|
1575
|
+
}
|
|
1576
|
+
else if (typeof lastReviewedRaw === "number" && Number.isFinite(lastReviewedRaw)) {
|
|
1577
|
+
lastTouchedMs = lastReviewedRaw;
|
|
1578
|
+
}
|
|
1579
|
+
if (lastTouchedMs < staleMs && stale.length < cap) {
|
|
1580
|
+
stale.push({
|
|
1581
|
+
kind: "stale",
|
|
1582
|
+
path: e.relPath,
|
|
1583
|
+
message: `${e.relPath} not touched since ${new Date(lastTouchedMs).toISOString().slice(0, 10)}`,
|
|
1584
|
+
suggestion: "review for accuracy or archive",
|
|
1585
|
+
details: {
|
|
1586
|
+
last_touched: new Date(lastTouchedMs).toISOString(),
|
|
1587
|
+
source: lastReviewedRaw !== undefined ? "frontmatter.last_reviewed" : "mtime"
|
|
1588
|
+
}
|
|
1589
|
+
});
|
|
1590
|
+
}
|
|
1591
|
+
// Concept-mention pass — capitalised phrases in the body that aren't
|
|
1592
|
+
// already a wikilink target. Cap at 30 unique phrases per source to
|
|
1593
|
+
// bound memory, but loose enough that real concepts in long notes don't
|
|
1594
|
+
// get truncated.
|
|
1595
|
+
const seenInThisNote = new Set();
|
|
1596
|
+
for (const m of parsed.body.matchAll(capPhraseRe)) {
|
|
1597
|
+
const phrase = m[1];
|
|
1598
|
+
if (!phrase)
|
|
1599
|
+
continue;
|
|
1600
|
+
const firstWord = phrase.split(/\s+/)[0];
|
|
1601
|
+
if (firstWord !== undefined && conceptStopwords.has(firstWord))
|
|
1602
|
+
continue;
|
|
1603
|
+
if (seenInThisNote.has(phrase))
|
|
1604
|
+
continue;
|
|
1605
|
+
if (seenInThisNote.size >= 30)
|
|
1606
|
+
break;
|
|
1607
|
+
// Skip phrases that are already a vault note (basename match).
|
|
1608
|
+
if (titleSet.has(phrase.toLowerCase()))
|
|
1609
|
+
continue;
|
|
1610
|
+
seenInThisNote.add(phrase);
|
|
1611
|
+
const set = conceptMentions.get(phrase) ?? new Set();
|
|
1612
|
+
set.add(e.relPath);
|
|
1613
|
+
conceptMentions.set(phrase, set);
|
|
1614
|
+
}
|
|
1615
|
+
}
|
|
1616
|
+
// Orphan findings (no inbound AND no outbound).
|
|
1617
|
+
const orphans = [];
|
|
1618
|
+
for (const e of entries) {
|
|
1619
|
+
if (orphans.length >= cap)
|
|
1620
|
+
break;
|
|
1621
|
+
if (!inbound.get(e.relPath) && !outboundPresence.has(e.relPath)) {
|
|
1622
|
+
orphans.push({
|
|
1623
|
+
kind: "orphan",
|
|
1624
|
+
path: e.relPath,
|
|
1625
|
+
message: `${e.relPath} has no inbound or outbound wikilinks`,
|
|
1626
|
+
suggestion: "link from a hub note, archive, or delete",
|
|
1627
|
+
details: { mtime: new Date(e.mtimeMs).toISOString() }
|
|
1628
|
+
});
|
|
1629
|
+
}
|
|
1630
|
+
}
|
|
1631
|
+
// Concept candidates — phrases mentioned by ≥ N distinct notes.
|
|
1632
|
+
const conceptCandidates = [];
|
|
1633
|
+
const ranked = [...conceptMentions.entries()]
|
|
1634
|
+
.filter(([, sources]) => sources.size >= conceptMinMentions)
|
|
1635
|
+
.sort((a, b) => b[1].size - a[1].size);
|
|
1636
|
+
for (const [phrase, sources] of ranked) {
|
|
1637
|
+
if (conceptCandidates.length >= cap)
|
|
1638
|
+
break;
|
|
1639
|
+
conceptCandidates.push({
|
|
1640
|
+
kind: "concept-without-page",
|
|
1641
|
+
message: `"${phrase}" is mentioned by ${sources.size} notes but has no page of its own`,
|
|
1642
|
+
suggestion: `create a page \`${phrase}.md\` and refile the most-developed mentions into it`,
|
|
1643
|
+
details: { phrase, mention_count: sources.size, sources: [...sources].slice(0, 5) }
|
|
1644
|
+
});
|
|
1645
|
+
}
|
|
1646
|
+
return {
|
|
1647
|
+
scope: args.folder ?? "(whole vault)",
|
|
1648
|
+
scanned: entries.length,
|
|
1649
|
+
generated_at: new Date().toISOString(),
|
|
1650
|
+
summary: {
|
|
1651
|
+
orphans: orphans.length,
|
|
1652
|
+
broken_links: broken.length,
|
|
1653
|
+
stubs: stubs.length,
|
|
1654
|
+
stale: stale.length,
|
|
1655
|
+
concept_candidates: conceptCandidates.length
|
|
1656
|
+
},
|
|
1657
|
+
findings: {
|
|
1658
|
+
orphans,
|
|
1659
|
+
broken_links: broken,
|
|
1660
|
+
stubs,
|
|
1661
|
+
stale,
|
|
1662
|
+
concept_candidates: conceptCandidates
|
|
1663
|
+
}
|
|
1664
|
+
};
|
|
1665
|
+
}
|
|
1666
|
+
export async function getOpenQuestions(vault, args) {
|
|
1667
|
+
await vault.ensureExists();
|
|
1668
|
+
const limit = args.limit ?? 100;
|
|
1669
|
+
// Default pattern: "Open question:" / "Open question -" / "Q:" / "TODO?" / "??"
|
|
1670
|
+
// followed by space + question text. Anchored at line start (with optional
|
|
1671
|
+
// list-bullet / quote / heading prefix).
|
|
1672
|
+
// Default pattern matches deferred-thinking markers at line start (with
|
|
1673
|
+
// optional list-bullet / quote / heading prefix). Single-line `i` flag —
|
|
1674
|
+
// we apply it line-by-line below.
|
|
1675
|
+
const defaultPat = "^\\s*(?:[#\\->\\*\\d\\.]+\\s+)?(?:open\\s+question|q|todo\\?|\\?\\?)\\s*[:\\-]?\\s*(.+)$";
|
|
1676
|
+
const re = new RegExp(args.pattern ?? defaultPat, "i");
|
|
1677
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
1678
|
+
const out = [];
|
|
1679
|
+
const now = Date.now();
|
|
1680
|
+
for (const e of entries) {
|
|
1681
|
+
if (out.length >= limit)
|
|
1682
|
+
break;
|
|
1683
|
+
const { parsed, mtimeMs } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
1684
|
+
// Scan parsed.body so frontmatter lines (which can contain "Q:" -ish
|
|
1685
|
+
// tokens) don't pollute results.
|
|
1686
|
+
const lines = parsed.body.split("\n");
|
|
1687
|
+
let currentHeading = null;
|
|
1688
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1689
|
+
const line = lines[i] ?? "";
|
|
1690
|
+
const headingMatch = /^(#{1,6})\s+(.+?)\s*#*\s*$/.exec(line);
|
|
1691
|
+
if (headingMatch?.[2]) {
|
|
1692
|
+
currentHeading = headingMatch[2];
|
|
1693
|
+
// A heading line itself isn't a question hit — skip the regex match.
|
|
1694
|
+
continue;
|
|
1695
|
+
}
|
|
1696
|
+
const m = re.exec(line);
|
|
1697
|
+
if (!m?.[1])
|
|
1698
|
+
continue;
|
|
1699
|
+
out.push({
|
|
1700
|
+
question: m[1].trim(),
|
|
1701
|
+
source_path: e.relPath,
|
|
1702
|
+
source_title: stripMd(e.basename),
|
|
1703
|
+
context_heading: currentHeading,
|
|
1704
|
+
line: i + 1,
|
|
1705
|
+
age_days: Math.round((now - mtimeMs) / (24 * 3600 * 1000)),
|
|
1706
|
+
mtime: new Date(mtimeMs).toISOString()
|
|
1707
|
+
});
|
|
1708
|
+
if (out.length >= limit)
|
|
1709
|
+
break;
|
|
1710
|
+
}
|
|
1711
|
+
}
|
|
1712
|
+
// Sort oldest-first so things aging out surface first.
|
|
1713
|
+
out.sort((a, b) => b.age_days - a.age_days);
|
|
1714
|
+
return out;
|
|
1715
|
+
}
|
|
1716
|
+
export async function paperAudit(vault, args) {
|
|
1717
|
+
await vault.ensureExists();
|
|
1718
|
+
const tag = (args.tag ?? "paper").replace(/^#+/, "").toLowerCase();
|
|
1719
|
+
const limit = args.limit ?? 100;
|
|
1720
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
1721
|
+
const arxivRe = /\barxiv[:\s]*([0-9]{4}\.[0-9]{4,5}(?:v\d+)?)\b/gi;
|
|
1722
|
+
const doiRe = /\bdoi[:\s]*(10\.\d{4,9}\/[\w\-._;()/:]+)/gi;
|
|
1723
|
+
const urlRe = /\bhttps?:\/\/[^\s<>")\]]+/g;
|
|
1724
|
+
let scanned = 0;
|
|
1725
|
+
const flagged = [];
|
|
1726
|
+
for (const e of entries) {
|
|
1727
|
+
if (flagged.length >= limit)
|
|
1728
|
+
break;
|
|
1729
|
+
const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
1730
|
+
const tagsLower = parsed.tags.map((t) => t.toLowerCase());
|
|
1731
|
+
if (!tagsLower.includes(tag))
|
|
1732
|
+
continue;
|
|
1733
|
+
scanned += 1;
|
|
1734
|
+
const fm = parsed.frontmatter ?? {};
|
|
1735
|
+
const fmKeys = new Set(Object.keys(fm).map((k) => k.toLowerCase()));
|
|
1736
|
+
const hasFmCitation = fmKeys.has("arxiv") || fmKeys.has("doi") || fmKeys.has("url") || fmKeys.has("isbn");
|
|
1737
|
+
// Scan parsed.body so the frontmatter's own arxiv/doi keys don't get
|
|
1738
|
+
// re-detected as "found in body".
|
|
1739
|
+
const body = parsed.body;
|
|
1740
|
+
const arxivIds = [...body.matchAll(arxivRe)].map((m) => m[1]).filter((v) => !!v);
|
|
1741
|
+
const doiIds = [...body.matchAll(doiRe)].map((m) => m[1]).filter((v) => !!v);
|
|
1742
|
+
const urls = [...body.matchAll(urlRe)].map((m) => m[0]);
|
|
1743
|
+
const foundInBody = {
|
|
1744
|
+
arxiv: [...new Set(arxivIds)],
|
|
1745
|
+
doi: [...new Set(doiIds)],
|
|
1746
|
+
url: [...new Set(urls)].slice(0, 3)
|
|
1747
|
+
};
|
|
1748
|
+
const bodyHasAnyId = foundInBody.arxiv.length > 0 || foundInBody.doi.length > 0 || foundInBody.url.length > 0;
|
|
1749
|
+
// Clean ⇒ has a frontmatter citation. The body might cite OTHER papers,
|
|
1750
|
+
// but this note itself is properly identified.
|
|
1751
|
+
if (hasFmCitation)
|
|
1752
|
+
continue;
|
|
1753
|
+
let proposed = null;
|
|
1754
|
+
if (bodyHasAnyId) {
|
|
1755
|
+
proposed = {};
|
|
1756
|
+
if (foundInBody.arxiv[0])
|
|
1757
|
+
proposed.arxiv = foundInBody.arxiv[0];
|
|
1758
|
+
if (foundInBody.doi[0])
|
|
1759
|
+
proposed.doi = foundInBody.doi[0];
|
|
1760
|
+
if (foundInBody.url[0] && !proposed.arxiv && !proposed.doi)
|
|
1761
|
+
proposed.url = foundInBody.url[0];
|
|
1762
|
+
}
|
|
1763
|
+
const msg = bodyHasAnyId
|
|
1764
|
+
? `${e.relPath} has identifiers in body (${[
|
|
1765
|
+
...foundInBody.arxiv.map((v) => `arxiv:${v}`),
|
|
1766
|
+
...foundInBody.doi.map((v) => `doi:${v}`)
|
|
1767
|
+
]
|
|
1768
|
+
.slice(0, 2)
|
|
1769
|
+
.join(", ")}) but missing frontmatter`
|
|
1770
|
+
: `${e.relPath} has #${tag} but no arxiv/doi/url anywhere — citation missing`;
|
|
1771
|
+
flagged.push({
|
|
1772
|
+
path: e.relPath,
|
|
1773
|
+
title: stripMd(e.basename),
|
|
1774
|
+
has_frontmatter_citation: hasFmCitation,
|
|
1775
|
+
found_in_body: foundInBody,
|
|
1776
|
+
proposed_frontmatter_patch: proposed,
|
|
1777
|
+
message: msg
|
|
1778
|
+
});
|
|
1779
|
+
}
|
|
1780
|
+
return { scanned, flagged };
|
|
1781
|
+
}
|
|
1782
|
+
export async function findPath(vault, args) {
|
|
1783
|
+
await vault.ensureExists();
|
|
1784
|
+
const maxDepth = args.max_depth ?? 5;
|
|
1785
|
+
const includeAlts = args.include_alternatives === true;
|
|
1786
|
+
const followEmbeds = args.follow_embeds !== false;
|
|
1787
|
+
const fromArgs = {};
|
|
1788
|
+
if (args.from !== undefined)
|
|
1789
|
+
fromArgs.path = args.from;
|
|
1790
|
+
else if (args.from_title !== undefined)
|
|
1791
|
+
fromArgs.title = args.from_title;
|
|
1792
|
+
const fromEntry = await resolveTarget(vault, fromArgs);
|
|
1793
|
+
const toArgs = {};
|
|
1794
|
+
if (args.to !== undefined)
|
|
1795
|
+
toArgs.path = args.to;
|
|
1796
|
+
else if (args.to_title !== undefined)
|
|
1797
|
+
toArgs.title = args.to_title;
|
|
1798
|
+
const toEntry = await resolveTarget(vault, toArgs);
|
|
1799
|
+
if (fromEntry.absPath === toEntry.absPath) {
|
|
1800
|
+
return {
|
|
1801
|
+
from: fromEntry.relPath,
|
|
1802
|
+
to: toEntry.relPath,
|
|
1803
|
+
found: true,
|
|
1804
|
+
hops: 0,
|
|
1805
|
+
path: [{ path: fromEntry.relPath, title: stripMd(fromEntry.basename), via: "" }]
|
|
1806
|
+
};
|
|
1807
|
+
}
|
|
1808
|
+
const entries = await vault.listMarkdown();
|
|
1809
|
+
// BFS layer-by-layer. visited tracks shortest-known-depth so we don't
|
|
1810
|
+
// revisit at greater depths. We continue collecting at the depth where
|
|
1811
|
+
// we first hit the target IF include_alternatives is set.
|
|
1812
|
+
// v1.8.1 perf fix: build a relPath → entry map ONCE before the BFS loop.
|
|
1813
|
+
// Pre-fix: entries.find((e) => e.relPath === node.rel) was O(N) per visited
|
|
1814
|
+
// node, making the whole BFS O(N²) on large vaults.
|
|
1815
|
+
const byRel = new Map();
|
|
1816
|
+
for (const e of entries)
|
|
1817
|
+
byRel.set(e.relPath, e);
|
|
1818
|
+
const visited = new Set([fromEntry.relPath]);
|
|
1819
|
+
let frontier = [
|
|
1820
|
+
{ rel: fromEntry.relPath, trail: [{ path: fromEntry.relPath, title: stripMd(fromEntry.basename), via: "" }] }
|
|
1821
|
+
];
|
|
1822
|
+
const found = [];
|
|
1823
|
+
let foundDepth = -1;
|
|
1824
|
+
for (let depth = 0; depth < maxDepth && frontier.length > 0; depth++) {
|
|
1825
|
+
const next = [];
|
|
1826
|
+
for (const node of frontier) {
|
|
1827
|
+
const entry = byRel.get(node.rel);
|
|
1828
|
+
if (!entry)
|
|
1829
|
+
continue;
|
|
1830
|
+
const { parsed } = await vault.readNote(entry.absPath, entry.mtimeMs);
|
|
1831
|
+
const links = followEmbeds ? [...parsed.wikilinks, ...parsed.embeds] : parsed.wikilinks;
|
|
1832
|
+
for (const link of links) {
|
|
1833
|
+
const m = findBestMatch(entries, link.target, entry.relPath);
|
|
1834
|
+
if (!m)
|
|
1835
|
+
continue;
|
|
1836
|
+
if (visited.has(m.relPath) && m.absPath !== toEntry.absPath)
|
|
1837
|
+
continue;
|
|
1838
|
+
const newTrail = [...node.trail, { path: m.relPath, title: stripMd(m.basename), via: link.raw }];
|
|
1839
|
+
if (m.absPath === toEntry.absPath) {
|
|
1840
|
+
if (foundDepth === -1)
|
|
1841
|
+
foundDepth = depth + 1;
|
|
1842
|
+
if (foundDepth === depth + 1) {
|
|
1843
|
+
found.push(newTrail);
|
|
1844
|
+
if (!includeAlts) {
|
|
1845
|
+
return {
|
|
1846
|
+
from: fromEntry.relPath,
|
|
1847
|
+
to: toEntry.relPath,
|
|
1848
|
+
found: true,
|
|
1849
|
+
hops: foundDepth,
|
|
1850
|
+
path: newTrail
|
|
1851
|
+
};
|
|
1852
|
+
}
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1855
|
+
else {
|
|
1856
|
+
visited.add(m.relPath);
|
|
1857
|
+
next.push({ rel: m.relPath, trail: newTrail });
|
|
1858
|
+
}
|
|
1859
|
+
}
|
|
1860
|
+
}
|
|
1861
|
+
if (foundDepth !== -1 && depth + 1 === foundDepth)
|
|
1862
|
+
break;
|
|
1863
|
+
frontier = next;
|
|
1864
|
+
}
|
|
1865
|
+
if (found.length > 0) {
|
|
1866
|
+
found.sort((a, b) => a.length - b.length || (a[0]?.path ?? "").localeCompare(b[0]?.path ?? ""));
|
|
1867
|
+
const first = found[0];
|
|
1868
|
+
if (!first) {
|
|
1869
|
+
return { from: fromEntry.relPath, to: toEntry.relPath, found: false, hops: -1, path: [] };
|
|
1870
|
+
}
|
|
1871
|
+
const result = {
|
|
1872
|
+
from: fromEntry.relPath,
|
|
1873
|
+
to: toEntry.relPath,
|
|
1874
|
+
found: true,
|
|
1875
|
+
hops: foundDepth,
|
|
1876
|
+
path: first
|
|
1877
|
+
};
|
|
1878
|
+
if (includeAlts)
|
|
1879
|
+
result.alternatives = found.slice(0, 10);
|
|
1880
|
+
return result;
|
|
1881
|
+
}
|
|
1882
|
+
return { from: fromEntry.relPath, to: toEntry.relPath, found: false, hops: -1, path: [] };
|
|
1883
|
+
}
|
|
1884
|
+
export async function openInUi(vault, args) {
|
|
1885
|
+
await vault.ensureExists();
|
|
1886
|
+
const target = await resolveTarget(vault, args);
|
|
1887
|
+
// Vault name = leaf of the vault root path. obsidian:// matches by name OR
|
|
1888
|
+
// by the file's absolute path; if the user opened the vault from a
|
|
1889
|
+
// different name in Obsidian, the file argument still resolves correctly.
|
|
1890
|
+
const vaultName = path.basename(vault.root);
|
|
1891
|
+
const noteRel = stripMd(target.relPath);
|
|
1892
|
+
const params = new URLSearchParams({ vault: vaultName, file: noteRel });
|
|
1893
|
+
if (args.new_pane)
|
|
1894
|
+
params.set("newpane", "true");
|
|
1895
|
+
return {
|
|
1896
|
+
uri: `obsidian://open?${params.toString()}`,
|
|
1897
|
+
vault_name: vaultName,
|
|
1898
|
+
path: target.relPath,
|
|
1899
|
+
title: stripMd(target.basename)
|
|
1900
|
+
};
|
|
1901
|
+
}
|
|
1902
|
+
export async function listCanvases(vault, args) {
|
|
1903
|
+
await vault.ensureExists();
|
|
1904
|
+
const limit = args.limit ?? 100;
|
|
1905
|
+
const all = await vault.listFilesByExtension(".canvas", args.folder);
|
|
1906
|
+
const out = [];
|
|
1907
|
+
for (const e of all) {
|
|
1908
|
+
if (out.length >= limit)
|
|
1909
|
+
break;
|
|
1910
|
+
let nodeCount = 0;
|
|
1911
|
+
let edgeCount = 0;
|
|
1912
|
+
let size = e.mtimeMs; // placeholder; replaced below
|
|
1913
|
+
try {
|
|
1914
|
+
const buf = await vault.readBinaryFile(e.absPath);
|
|
1915
|
+
size = buf.byteLength;
|
|
1916
|
+
const txt = buf.toString("utf8");
|
|
1917
|
+
const parsed = JSON.parse(txt);
|
|
1918
|
+
nodeCount = Array.isArray(parsed.nodes) ? parsed.nodes.length : 0;
|
|
1919
|
+
edgeCount = Array.isArray(parsed.edges) ? parsed.edges.length : 0;
|
|
1920
|
+
}
|
|
1921
|
+
catch {
|
|
1922
|
+
// Malformed canvas — fall through with 0 counts. Don't poison the listing.
|
|
1923
|
+
}
|
|
1924
|
+
out.push({
|
|
1925
|
+
path: e.relPath,
|
|
1926
|
+
name: e.basename.replace(/\.canvas$/i, ""),
|
|
1927
|
+
size_bytes: size,
|
|
1928
|
+
mtime: new Date(e.mtimeMs).toISOString(),
|
|
1929
|
+
node_count: nodeCount,
|
|
1930
|
+
edge_count: edgeCount
|
|
1931
|
+
});
|
|
1932
|
+
}
|
|
1933
|
+
out.sort((a, b) => b.mtime.localeCompare(a.mtime));
|
|
1934
|
+
return out;
|
|
1935
|
+
}
|
|
1936
|
+
export async function readCanvas(vault, args) {
|
|
1937
|
+
await vault.ensureExists();
|
|
1938
|
+
if (!args.path)
|
|
1939
|
+
throw new Error("path is required");
|
|
1940
|
+
const normalized = args.path.toLowerCase().endsWith(".canvas") ? args.path : `${args.path}.canvas`;
|
|
1941
|
+
const abs = vault.resolveInside(normalized);
|
|
1942
|
+
await vault.stat(abs); // throws if missing or excluded — fail fast
|
|
1943
|
+
const rel = vault.toRel(abs);
|
|
1944
|
+
const buf = await vault.readBinaryFile(abs);
|
|
1945
|
+
let parsed;
|
|
1946
|
+
try {
|
|
1947
|
+
parsed = JSON.parse(buf.toString("utf8"));
|
|
1948
|
+
}
|
|
1949
|
+
catch (err) {
|
|
1950
|
+
throw new Error(`Canvas file is not valid JSON: ${rel} — ${err instanceof Error ? err.message : String(err)}`);
|
|
1951
|
+
}
|
|
1952
|
+
// Resolve each `file:` node's reference against the vault's current
|
|
1953
|
+
// markdown index — surfaces broken canvas links the same way
|
|
1954
|
+
// get_unresolved_wikilinks does for note bodies.
|
|
1955
|
+
const allMarkdown = await vault.listMarkdown();
|
|
1956
|
+
const nodes = [];
|
|
1957
|
+
const summary = { text: 0, file: 0, link: 0, group: 0, unknown: 0 };
|
|
1958
|
+
const brokenRefs = [];
|
|
1959
|
+
if (Array.isArray(parsed.nodes)) {
|
|
1960
|
+
for (const raw of parsed.nodes) {
|
|
1961
|
+
if (!raw || typeof raw !== "object")
|
|
1962
|
+
continue;
|
|
1963
|
+
const n = raw;
|
|
1964
|
+
const id = typeof n.id === "string" ? n.id : "";
|
|
1965
|
+
const x = typeof n.x === "number" ? n.x : 0;
|
|
1966
|
+
const y = typeof n.y === "number" ? n.y : 0;
|
|
1967
|
+
const width = typeof n.width === "number" ? n.width : 0;
|
|
1968
|
+
const height = typeof n.height === "number" ? n.height : 0;
|
|
1969
|
+
const color = typeof n.color === "string" ? n.color : undefined;
|
|
1970
|
+
const type = typeof n.type === "string" ? n.type : "unknown";
|
|
1971
|
+
switch (type) {
|
|
1972
|
+
case "text":
|
|
1973
|
+
nodes.push({
|
|
1974
|
+
kind: "text",
|
|
1975
|
+
id,
|
|
1976
|
+
x,
|
|
1977
|
+
y,
|
|
1978
|
+
width,
|
|
1979
|
+
height,
|
|
1980
|
+
text: typeof n.text === "string" ? n.text : "",
|
|
1981
|
+
...(color !== undefined ? { color } : {})
|
|
1982
|
+
});
|
|
1983
|
+
summary.text += 1;
|
|
1984
|
+
break;
|
|
1985
|
+
case "file": {
|
|
1986
|
+
const fileRef = typeof n.file === "string" ? n.file : "";
|
|
1987
|
+
// Strip leading slash so `findBestMatch` treats it as relative.
|
|
1988
|
+
const cleaned = fileRef.replace(/^\/+/, "");
|
|
1989
|
+
// findBestMatch only looks at the basename; for canvases we have a full
|
|
1990
|
+
// vault-relative path, so try direct match first. Fall through to
|
|
1991
|
+
// findBestMatch (basename) for the path-stripped case.
|
|
1992
|
+
const direct = cleaned.length > 0 ? allMarkdown.find((m) => m.relPath.replace(/\\/g, "/") === cleaned) : undefined;
|
|
1993
|
+
const resolved = direct ?? (cleaned ? findBestMatch(allMarkdown, cleaned) : null);
|
|
1994
|
+
if (cleaned && !resolved)
|
|
1995
|
+
brokenRefs.push(cleaned);
|
|
1996
|
+
nodes.push({
|
|
1997
|
+
kind: "file",
|
|
1998
|
+
id,
|
|
1999
|
+
x,
|
|
2000
|
+
y,
|
|
2001
|
+
width,
|
|
2002
|
+
height,
|
|
2003
|
+
file: fileRef,
|
|
2004
|
+
file_resolved: resolved ? resolved.relPath : null,
|
|
2005
|
+
...(typeof n.subpath === "string" ? { subpath: n.subpath } : {}),
|
|
2006
|
+
...(color !== undefined ? { color } : {})
|
|
2007
|
+
});
|
|
2008
|
+
summary.file += 1;
|
|
2009
|
+
break;
|
|
2010
|
+
}
|
|
2011
|
+
case "link":
|
|
2012
|
+
nodes.push({
|
|
2013
|
+
kind: "link",
|
|
2014
|
+
id,
|
|
2015
|
+
x,
|
|
2016
|
+
y,
|
|
2017
|
+
width,
|
|
2018
|
+
height,
|
|
2019
|
+
url: typeof n.url === "string" ? n.url : "",
|
|
2020
|
+
...(color !== undefined ? { color } : {})
|
|
2021
|
+
});
|
|
2022
|
+
summary.link += 1;
|
|
2023
|
+
break;
|
|
2024
|
+
case "group":
|
|
2025
|
+
nodes.push({
|
|
2026
|
+
kind: "group",
|
|
2027
|
+
id,
|
|
2028
|
+
x,
|
|
2029
|
+
y,
|
|
2030
|
+
width,
|
|
2031
|
+
height,
|
|
2032
|
+
...(typeof n.label === "string" ? { label: n.label } : {}),
|
|
2033
|
+
...(color !== undefined ? { color } : {})
|
|
2034
|
+
});
|
|
2035
|
+
summary.group += 1;
|
|
2036
|
+
break;
|
|
2037
|
+
default:
|
|
2038
|
+
nodes.push({ kind: "unknown", id, raw_type: type, raw: n });
|
|
2039
|
+
summary.unknown += 1;
|
|
2040
|
+
}
|
|
2041
|
+
}
|
|
2042
|
+
}
|
|
2043
|
+
const edges = [];
|
|
2044
|
+
if (Array.isArray(parsed.edges)) {
|
|
2045
|
+
for (const raw of parsed.edges) {
|
|
2046
|
+
if (!raw || typeof raw !== "object")
|
|
2047
|
+
continue;
|
|
2048
|
+
const e = raw;
|
|
2049
|
+
const id = typeof e.id === "string" ? e.id : "";
|
|
2050
|
+
const fromNode = typeof e.fromNode === "string" ? e.fromNode : "";
|
|
2051
|
+
const toNode = typeof e.toNode === "string" ? e.toNode : "";
|
|
2052
|
+
if (!fromNode || !toNode)
|
|
2053
|
+
continue;
|
|
2054
|
+
edges.push({
|
|
2055
|
+
id,
|
|
2056
|
+
from_node: fromNode,
|
|
2057
|
+
...(typeof e.fromSide === "string" ? { from_side: e.fromSide } : {}),
|
|
2058
|
+
to_node: toNode,
|
|
2059
|
+
...(typeof e.toSide === "string" ? { to_side: e.toSide } : {}),
|
|
2060
|
+
...(typeof e.label === "string" ? { label: e.label } : {}),
|
|
2061
|
+
...(typeof e.color === "string" ? { color: e.color } : {})
|
|
2062
|
+
});
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
const stat = await vault.stat(abs);
|
|
2066
|
+
return {
|
|
2067
|
+
path: rel,
|
|
2068
|
+
name: path.basename(rel).replace(/\.canvas$/i, ""),
|
|
2069
|
+
size_bytes: stat.size,
|
|
2070
|
+
mtime: new Date(stat.mtimeMs).toISOString(),
|
|
2071
|
+
nodes,
|
|
2072
|
+
edges,
|
|
2073
|
+
summary,
|
|
2074
|
+
broken_file_refs: brokenRefs
|
|
2075
|
+
};
|
|
2076
|
+
}
|
|
2077
|
+
const tfidfCache = new WeakMap();
|
|
2078
|
+
const STOP_WORDS = new Set([
|
|
2079
|
+
"a",
|
|
2080
|
+
"an",
|
|
2081
|
+
"and",
|
|
2082
|
+
"are",
|
|
2083
|
+
"as",
|
|
2084
|
+
"at",
|
|
2085
|
+
"be",
|
|
2086
|
+
"but",
|
|
2087
|
+
"by",
|
|
2088
|
+
"for",
|
|
2089
|
+
"from",
|
|
2090
|
+
"has",
|
|
2091
|
+
"have",
|
|
2092
|
+
"if",
|
|
2093
|
+
"in",
|
|
2094
|
+
"is",
|
|
2095
|
+
"it",
|
|
2096
|
+
"its",
|
|
2097
|
+
"of",
|
|
2098
|
+
"on",
|
|
2099
|
+
"or",
|
|
2100
|
+
"that",
|
|
2101
|
+
"the",
|
|
2102
|
+
"this",
|
|
2103
|
+
"to",
|
|
2104
|
+
"was",
|
|
2105
|
+
"were",
|
|
2106
|
+
"will",
|
|
2107
|
+
"with",
|
|
2108
|
+
"i",
|
|
2109
|
+
"you",
|
|
2110
|
+
"we",
|
|
2111
|
+
"they",
|
|
2112
|
+
"he",
|
|
2113
|
+
"she",
|
|
2114
|
+
"not",
|
|
2115
|
+
"no",
|
|
2116
|
+
"do",
|
|
2117
|
+
"does",
|
|
2118
|
+
"did",
|
|
2119
|
+
"had",
|
|
2120
|
+
"been",
|
|
2121
|
+
"being",
|
|
2122
|
+
"so",
|
|
2123
|
+
"than",
|
|
2124
|
+
"then",
|
|
2125
|
+
"there",
|
|
2126
|
+
"their",
|
|
2127
|
+
"them",
|
|
2128
|
+
"these",
|
|
2129
|
+
"those",
|
|
2130
|
+
"what",
|
|
2131
|
+
"when",
|
|
2132
|
+
"where",
|
|
2133
|
+
"which",
|
|
2134
|
+
"who",
|
|
2135
|
+
"why",
|
|
2136
|
+
"how"
|
|
2137
|
+
]);
|
|
2138
|
+
// v2.1.0: detect Chinese / Japanese / Thai / Khmer / Lao via script ranges.
|
|
2139
|
+
// These languages don't use spaces between words, so the Unicode-regex
|
|
2140
|
+
// tokenizer falls back to character-level (or huge multi-word tokens),
|
|
2141
|
+
// which tanks BM25 + TF-IDF precision. Intl.Segmenter (Node 16+ ICU)
|
|
2142
|
+
// gives word-break per language. Detection is per-document, branching the
|
|
2143
|
+
// tokenizer.
|
|
2144
|
+
const CJK_OR_THAI_RANGES = /[-ヿ㐀-䶿一-鿿가--ༀ-ក-]/;
|
|
2145
|
+
function tokenizeForTfidf(text) {
|
|
2146
|
+
// v1.11.1: Unicode-aware tokenizer. The previous ASCII-only regex
|
|
2147
|
+
// (`/[a-z0-9][a-z0-9_-]*/g`) silently dropped Cyrillic, Greek, CJK,
|
|
2148
|
+
// Hebrew, Arabic, and any non-Latin content from the TF-IDF index.
|
|
2149
|
+
// `\p{L}` matches any Unicode letter; `\p{N}` matches any Unicode number.
|
|
2150
|
+
//
|
|
2151
|
+
// v2.1.0: when the text contains CJK / Thai / Khmer / Lao chars (no-
|
|
2152
|
+
// whitespace scripts), use Intl.Segmenter for proper word-break first,
|
|
2153
|
+
// then run the Unicode regex per-segment. This produces real word tokens
|
|
2154
|
+
// instead of "認可サーバーがアクセストークン" as a single 12-char token
|
|
2155
|
+
// that the length filter would drop.
|
|
2156
|
+
const lower = text.toLowerCase();
|
|
2157
|
+
const out = [];
|
|
2158
|
+
if (CJK_OR_THAI_RANGES.test(lower) && typeof Intl !== "undefined" && typeof Intl.Segmenter !== "undefined") {
|
|
2159
|
+
const segmenter = new Intl.Segmenter(undefined, { granularity: "word" });
|
|
2160
|
+
for (const seg of segmenter.segment(lower)) {
|
|
2161
|
+
if (!seg.isWordLike)
|
|
2162
|
+
continue;
|
|
2163
|
+
const t = seg.segment;
|
|
2164
|
+
if (t.length < 1)
|
|
2165
|
+
continue;
|
|
2166
|
+
if (t.length > 40)
|
|
2167
|
+
continue;
|
|
2168
|
+
if (STOP_WORDS.has(t))
|
|
2169
|
+
continue;
|
|
2170
|
+
out.push(t);
|
|
2171
|
+
}
|
|
2172
|
+
return out;
|
|
2173
|
+
}
|
|
2174
|
+
for (const m of lower.matchAll(/[\p{L}\p{N}][\p{L}\p{N}_-]*/gu)) {
|
|
2175
|
+
const t = m[0];
|
|
2176
|
+
if (t.length < 2)
|
|
2177
|
+
continue;
|
|
2178
|
+
if (t.length > 40)
|
|
2179
|
+
continue;
|
|
2180
|
+
if (STOP_WORDS.has(t))
|
|
2181
|
+
continue;
|
|
2182
|
+
out.push(t);
|
|
2183
|
+
}
|
|
2184
|
+
return out;
|
|
2185
|
+
}
|
|
2186
|
+
async function buildTfidfIndex(vault) {
|
|
2187
|
+
const entries = await vault.listMarkdown();
|
|
2188
|
+
const cached = tfidfCache.get(vault);
|
|
2189
|
+
if (cached &&
|
|
2190
|
+
cached.entriesRef.length === entries.length &&
|
|
2191
|
+
cached.entriesRef.every((e, i) => entries[i]?.relPath === e.relPath && entries[i]?.mtimeMs === e.mtimeMs)) {
|
|
2192
|
+
return cached;
|
|
2193
|
+
}
|
|
2194
|
+
const rawDocs = [];
|
|
2195
|
+
const docFreq = new Map();
|
|
2196
|
+
for (const e of entries) {
|
|
2197
|
+
const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
2198
|
+
const tokens = tokenizeForTfidf(parsed.body);
|
|
2199
|
+
const tf = new Map();
|
|
2200
|
+
for (const t of tokens)
|
|
2201
|
+
tf.set(t, (tf.get(t) ?? 0) + 1);
|
|
2202
|
+
rawDocs.push({ entry: e, tf });
|
|
2203
|
+
for (const t of tf.keys())
|
|
2204
|
+
docFreq.set(t, (docFreq.get(t) ?? 0) + 1);
|
|
2205
|
+
}
|
|
2206
|
+
// Smoothed IDF: ln(1 + N / (1 + df)). Smoothing keeps every-doc terms
|
|
2207
|
+
// non-zero and tames inflation on small vaults.
|
|
2208
|
+
const N = rawDocs.length || 1;
|
|
2209
|
+
const idf = new Map();
|
|
2210
|
+
for (const [term, df] of docFreq) {
|
|
2211
|
+
idf.set(term, Math.log(1 + N / (1 + df)));
|
|
2212
|
+
}
|
|
2213
|
+
const docs = [];
|
|
2214
|
+
for (const r of rawDocs) {
|
|
2215
|
+
const weights = new Map();
|
|
2216
|
+
let normSq = 0;
|
|
2217
|
+
for (const [term, count] of r.tf) {
|
|
2218
|
+
const w = (1 + Math.log(count)) * (idf.get(term) ?? 0);
|
|
2219
|
+
if (w === 0)
|
|
2220
|
+
continue;
|
|
2221
|
+
weights.set(term, w);
|
|
2222
|
+
normSq += w * w;
|
|
2223
|
+
}
|
|
2224
|
+
const norm = Math.sqrt(normSq);
|
|
2225
|
+
if (norm > 0) {
|
|
2226
|
+
for (const [t, w] of weights)
|
|
2227
|
+
weights.set(t, w / norm);
|
|
2228
|
+
}
|
|
2229
|
+
docs.push({
|
|
2230
|
+
relPath: r.entry.relPath,
|
|
2231
|
+
basename: r.entry.basename,
|
|
2232
|
+
mtimeMs: r.entry.mtimeMs,
|
|
2233
|
+
weights
|
|
2234
|
+
});
|
|
2235
|
+
}
|
|
2236
|
+
const result = { docs, idf, entriesRef: entries };
|
|
2237
|
+
tfidfCache.set(vault, result);
|
|
2238
|
+
return result;
|
|
2239
|
+
}
|
|
2240
|
+
export async function semanticSearch(vault, args) {
|
|
2241
|
+
await vault.ensureExists();
|
|
2242
|
+
const limit = args.limit ?? 10;
|
|
2243
|
+
const minScore = args.min_score ?? 0.05;
|
|
2244
|
+
if (!args.query.trim())
|
|
2245
|
+
throw new Error("query must not be empty");
|
|
2246
|
+
const { docs, idf } = await buildTfidfIndex(vault);
|
|
2247
|
+
// Vectorize query: same tokenization, IDF from the corpus, L2 normalize.
|
|
2248
|
+
const qTokens = tokenizeForTfidf(args.query);
|
|
2249
|
+
const qTf = new Map();
|
|
2250
|
+
for (const t of qTokens)
|
|
2251
|
+
qTf.set(t, (qTf.get(t) ?? 0) + 1);
|
|
2252
|
+
const qWeights = new Map();
|
|
2253
|
+
let qNormSq = 0;
|
|
2254
|
+
for (const [t, count] of qTf) {
|
|
2255
|
+
const w = (1 + Math.log(count)) * (idf.get(t) ?? 0);
|
|
2256
|
+
if (w === 0)
|
|
2257
|
+
continue;
|
|
2258
|
+
qWeights.set(t, w);
|
|
2259
|
+
qNormSq += w * w;
|
|
2260
|
+
}
|
|
2261
|
+
const qNorm = Math.sqrt(qNormSq);
|
|
2262
|
+
if (qNorm > 0) {
|
|
2263
|
+
for (const [t, w] of qWeights)
|
|
2264
|
+
qWeights.set(t, w / qNorm);
|
|
2265
|
+
}
|
|
2266
|
+
// Cosine = Σ q[t]·d[t] over shared terms (both vectors are L2-normed).
|
|
2267
|
+
const folderPrefix = args.folder ? `${args.folder.replace(/\/+$/, "")}/` : null;
|
|
2268
|
+
const scored = [];
|
|
2269
|
+
for (const doc of docs) {
|
|
2270
|
+
if (folderPrefix && !doc.relPath.startsWith(folderPrefix) && doc.relPath !== args.folder)
|
|
2271
|
+
continue;
|
|
2272
|
+
let s = 0;
|
|
2273
|
+
const matched = [];
|
|
2274
|
+
for (const [t, qw] of qWeights) {
|
|
2275
|
+
const dw = doc.weights.get(t);
|
|
2276
|
+
if (dw !== undefined) {
|
|
2277
|
+
s += qw * dw;
|
|
2278
|
+
matched.push(t);
|
|
2279
|
+
}
|
|
2280
|
+
}
|
|
2281
|
+
if (s < minScore)
|
|
2282
|
+
continue;
|
|
2283
|
+
scored.push({ doc, score: s, matchedTerms: matched });
|
|
2284
|
+
}
|
|
2285
|
+
scored.sort((a, b) => b.score - a.score);
|
|
2286
|
+
const matches = [];
|
|
2287
|
+
for (const { doc, score, matchedTerms } of scored.slice(0, limit)) {
|
|
2288
|
+
matchedTerms.sort((a, b) => (idf.get(b) ?? 0) - (idf.get(a) ?? 0));
|
|
2289
|
+
// v1.8.1 fix: snippet was being built from `content` (full file with
|
|
2290
|
+
// frontmatter), so a matched term that lived in the YAML block could leak
|
|
2291
|
+
// YAML keys/values into the response. Use `parsed.body` instead — TF-IDF
|
|
2292
|
+
// is built from body too, so the indexOf below is guaranteed to land if
|
|
2293
|
+
// the term contributed to the cosine score.
|
|
2294
|
+
const { parsed } = await vault.readNote(vault.resolveInside(doc.relPath), doc.mtimeMs);
|
|
2295
|
+
const body = parsed.body;
|
|
2296
|
+
let snippetText = "";
|
|
2297
|
+
for (const t of matchedTerms) {
|
|
2298
|
+
const idx = body.toLowerCase().indexOf(t);
|
|
2299
|
+
if (idx >= 0) {
|
|
2300
|
+
const { snippet } = sliceSnippet(body, idx, t.length);
|
|
2301
|
+
snippetText = snippet;
|
|
2302
|
+
break;
|
|
2303
|
+
}
|
|
2304
|
+
}
|
|
2305
|
+
matches.push({
|
|
2306
|
+
path: doc.relPath,
|
|
2307
|
+
title: stripMd(doc.basename),
|
|
2308
|
+
score: Math.round(score * 10000) / 10000,
|
|
2309
|
+
snippet: snippetText,
|
|
2310
|
+
matched_terms: matchedTerms.slice(0, 8),
|
|
2311
|
+
mtime: new Date(doc.mtimeMs).toISOString()
|
|
2312
|
+
});
|
|
2313
|
+
}
|
|
2314
|
+
return { query: args.query, total_docs: docs.length, method: "tfidf-cosine", matches };
|
|
2315
|
+
}
|
|
2316
|
+
/**
|
|
2317
|
+
* v3.1.0 — pick the text that should be embedded for an embeddings-search
|
|
2318
|
+
* call. HyDE-augmented retrieval prefers the agent-supplied
|
|
2319
|
+
* `hypothetical_answer` (Gao et al 2023); falls back to the raw query
|
|
2320
|
+
* when that's absent / empty / whitespace-only.
|
|
2321
|
+
*
|
|
2322
|
+
* Pure helper so we can unit-test the decision in isolation (the real
|
|
2323
|
+
* `embeddingsSearch` function loads the @huggingface/transformers
|
|
2324
|
+
* embedder, which is out of scope for unit tests).
|
|
2325
|
+
*/
|
|
2326
|
+
export function pickEmbedTextForHyde(args) {
|
|
2327
|
+
const ha = args.hypothetical_answer?.trim() ?? "";
|
|
2328
|
+
if (ha.length > 0)
|
|
2329
|
+
return { text: ha, usedHyde: true };
|
|
2330
|
+
return { text: args.query, usedHyde: false };
|
|
2331
|
+
}
|
|
2332
|
+
export async function embeddingsSearch(vault, args, embedFile, hnsw) {
|
|
2333
|
+
await vault.ensureExists();
|
|
2334
|
+
if (!args.query.trim())
|
|
2335
|
+
throw new Error("query must not be empty");
|
|
2336
|
+
// v3.1.0 — pick the actual text to embed. HyDE prefers the
|
|
2337
|
+
// hypothetical answer when present; otherwise fall back to the query.
|
|
2338
|
+
const { text: embedText, usedHyde } = pickEmbedTextForHyde(args);
|
|
2339
|
+
const limit = args.limit ?? 10;
|
|
2340
|
+
const minScore = args.min_score ?? 0.3;
|
|
2341
|
+
// Lazy-load embed-db + embeddings only when the tool is actually called.
|
|
2342
|
+
const [{ EmbedDb }, { loadEmbedder, resolveModel }] = await Promise.all([
|
|
2343
|
+
import("./embed-db.js"),
|
|
2344
|
+
import("./embeddings.js")
|
|
2345
|
+
]);
|
|
2346
|
+
// Verify the embed db exists before doing anything heavy. This separates
|
|
2347
|
+
// "user hasn't built the index yet" from "model failed to load".
|
|
2348
|
+
const fsMod = await import("node:fs");
|
|
2349
|
+
if (!fsMod.existsSync(embedFile)) {
|
|
2350
|
+
throw new Error(`Embedding index not found at ${embedFile}. ` +
|
|
2351
|
+
`Run: enquire-mcp build-embeddings --vault ${vault.root} ` +
|
|
2352
|
+
`(first-time setup also needs: enquire-mcp install-model multilingual)`);
|
|
2353
|
+
}
|
|
2354
|
+
const model = resolveModel(args.model);
|
|
2355
|
+
const db = new EmbedDb({
|
|
2356
|
+
file: embedFile,
|
|
2357
|
+
vaultRoot: vault.root,
|
|
2358
|
+
modelAlias: model.alias,
|
|
2359
|
+
dim: model.dim
|
|
2360
|
+
});
|
|
2361
|
+
await db.open();
|
|
2362
|
+
try {
|
|
2363
|
+
const total = db.totalChunks();
|
|
2364
|
+
if (total === 0) {
|
|
2365
|
+
return { query: args.query, method: "embeddings-cosine", model: model.alias, total_chunks: 0, matches: [] };
|
|
2366
|
+
}
|
|
2367
|
+
const embedder = await loadEmbedder(args.model);
|
|
2368
|
+
const [qVec] = await embedder.embed([embedText]);
|
|
2369
|
+
if (!qVec)
|
|
2370
|
+
throw new Error("Embedder returned no vectors for the query");
|
|
2371
|
+
// v2.0.0-beta.2 P0 fix: filter excluded paths from the embedding-index
|
|
2372
|
+
// hits BEFORE returning. The persistent .embed.db is built once and may
|
|
2373
|
+
// contain entries for paths now excluded by --exclude-glob / --read-paths
|
|
2374
|
+
// (added between build-embeddings and serve, or between two serve runs).
|
|
2375
|
+
// Pre-fix, those entries leaked through `text_preview` and `rel_path`,
|
|
2376
|
+
// bypassing the privacy contract — same shape as the writeNote bug.
|
|
2377
|
+
// We over-fetch by 2× to keep top-K stable when many hits get filtered.
|
|
2378
|
+
const overFetch = limit * 2;
|
|
2379
|
+
let rawHits;
|
|
2380
|
+
if (hnsw) {
|
|
2381
|
+
// v2.13.0 — HNSW path. Sub-10ms top-K at any scale. We over-fetch
|
|
2382
|
+
// slightly more (3×) than brute-force because HNSW can occasionally
|
|
2383
|
+
// miss a true nearest neighbor; the privacy filter then pares down.
|
|
2384
|
+
const k = Math.min(Math.max(overFetch * 2, 30), Math.max(hnsw.rowByLabel.size, 1));
|
|
2385
|
+
const result = hnsw.index.searchKnn(qVec, k, hnsw.ef !== undefined ? { ef: hnsw.ef } : undefined);
|
|
2386
|
+
const { hnswResultsToHits } = await import("./hnsw.js");
|
|
2387
|
+
rawHits = hnswResultsToHits(result, hnsw.rowByLabel);
|
|
2388
|
+
// HNSW returns scores in [-1, 1] like brute-force cosine. Apply the
|
|
2389
|
+
// same min_score floor + folder filter brute-force does.
|
|
2390
|
+
if (args.folder) {
|
|
2391
|
+
const prefix = `${args.folder.replace(/\/+$/, "")}/`;
|
|
2392
|
+
rawHits = rawHits.filter((h) => h.rel_path.startsWith(prefix));
|
|
2393
|
+
}
|
|
2394
|
+
rawHits = rawHits.filter((h) => h.score >= minScore);
|
|
2395
|
+
}
|
|
2396
|
+
else {
|
|
2397
|
+
rawHits = db.search(qVec, overFetch, { folder: args.folder, minScore });
|
|
2398
|
+
}
|
|
2399
|
+
const hits = rawHits.filter((h) => !vault.isExcluded(h.rel_path)).slice(0, limit);
|
|
2400
|
+
const matches = hits.map((h) => ({
|
|
2401
|
+
path: h.rel_path,
|
|
2402
|
+
title: stripMd(path.basename(h.rel_path)),
|
|
2403
|
+
score: Math.round(h.score * 10000) / 10000,
|
|
2404
|
+
snippet: h.text_preview.slice(0, 240),
|
|
2405
|
+
chunk_index: h.chunk_index,
|
|
2406
|
+
line_start: h.line_start,
|
|
2407
|
+
line_end: h.line_end,
|
|
2408
|
+
kind: h.kind
|
|
2409
|
+
}));
|
|
2410
|
+
return {
|
|
2411
|
+
query: args.query,
|
|
2412
|
+
method: "embeddings-cosine",
|
|
2413
|
+
model: model.alias,
|
|
2414
|
+
total_chunks: total,
|
|
2415
|
+
matches,
|
|
2416
|
+
...(usedHyde ? { hyde: true } : {})
|
|
2417
|
+
};
|
|
2418
|
+
}
|
|
2419
|
+
finally {
|
|
2420
|
+
db.close();
|
|
2421
|
+
}
|
|
2422
|
+
}
|
|
2423
|
+
export async function searchHybrid(vault, args, ctx) {
|
|
2424
|
+
await vault.ensureExists();
|
|
2425
|
+
if (!args.query.trim())
|
|
2426
|
+
throw new Error("query must not be empty");
|
|
2427
|
+
const limit = args.limit ?? 10;
|
|
2428
|
+
const minSignals = args.min_signals ?? 1;
|
|
2429
|
+
const granularity = args.granularity ?? "note";
|
|
2430
|
+
// Fan-out per-ranker top-K. Bigger than user's `limit` so RRF has room
|
|
2431
|
+
// to surface a doc that's mid-rank in one signal but top in another.
|
|
2432
|
+
const fanOutK = Math.max(50, limit * 5);
|
|
2433
|
+
const [{ reciprocalRankFusion, RRF_K }, { existsSync }] = await Promise.all([import("./rrf.js"), import("node:fs")]);
|
|
2434
|
+
// v2.0.0-beta.2 P1 fix: collect per-signal errors for response-side observability.
|
|
2435
|
+
const signalErrors = {};
|
|
2436
|
+
const signalsUsed = [];
|
|
2437
|
+
// ─── BM25 (FTS5) ────────────────────────────────────────────────────────
|
|
2438
|
+
// Note-level: collapse multi-chunk hits to the best rank per note.
|
|
2439
|
+
let bm25Ranked = [];
|
|
2440
|
+
if (ctx.ftsIndex) {
|
|
2441
|
+
try {
|
|
2442
|
+
// v2.0.0-beta.2 P0 fix: filter excluded paths from FTS5 hits BEFORE
|
|
2443
|
+
// chunk-collapse + RRF. The .fts5.db can contain entries from when the
|
|
2444
|
+
// index was built without exclusion flags (or with different flags).
|
|
2445
|
+
// Pre-fix, BM25 search returned excluded chunks via the hybrid pipeline.
|
|
2446
|
+
const rawFtsHits = ctx.ftsIndex.search(args.query, { limit: fanOutK, folder: args.folder });
|
|
2447
|
+
const ftsHits = rawFtsHits.filter((h) => !vault.isExcluded(h.rel_path));
|
|
2448
|
+
// v2.2.0: granularity branch.
|
|
2449
|
+
// "note" → collapse multi-chunk hits per note (best-rank wins),
|
|
2450
|
+
// RRF fuses on path key.
|
|
2451
|
+
// "block" → keep each chunk distinct, RRF fuses on `path#chunk_index`.
|
|
2452
|
+
if (granularity === "block") {
|
|
2453
|
+
bm25Ranked = ftsHits.map((h, i) => ({
|
|
2454
|
+
id: `${h.rel_path}#${h.chunk_index}`,
|
|
2455
|
+
rank: i + 1,
|
|
2456
|
+
score: h.score,
|
|
2457
|
+
snippet: h.snippet,
|
|
2458
|
+
chunk_index: h.chunk_index,
|
|
2459
|
+
line_start: h.line_start,
|
|
2460
|
+
line_end: h.line_end,
|
|
2461
|
+
kind: h.kind
|
|
2462
|
+
}));
|
|
2463
|
+
}
|
|
2464
|
+
else {
|
|
2465
|
+
const bestPerNote = new Map();
|
|
2466
|
+
ftsHits.forEach((h, i) => {
|
|
2467
|
+
const existing = bestPerNote.get(h.rel_path);
|
|
2468
|
+
if (!existing || i < existing.rank) {
|
|
2469
|
+
bestPerNote.set(h.rel_path, {
|
|
2470
|
+
score: h.score,
|
|
2471
|
+
rank: i + 1,
|
|
2472
|
+
snippet: h.snippet,
|
|
2473
|
+
chunk_index: h.chunk_index,
|
|
2474
|
+
line_start: h.line_start,
|
|
2475
|
+
line_end: h.line_end,
|
|
2476
|
+
kind: h.kind
|
|
2477
|
+
});
|
|
2478
|
+
}
|
|
2479
|
+
});
|
|
2480
|
+
bm25Ranked = Array.from(bestPerNote.entries()).map(([id, b]) => ({
|
|
2481
|
+
id,
|
|
2482
|
+
rank: b.rank,
|
|
2483
|
+
score: b.score,
|
|
2484
|
+
snippet: b.snippet,
|
|
2485
|
+
chunk_index: b.chunk_index,
|
|
2486
|
+
line_start: b.line_start,
|
|
2487
|
+
line_end: b.line_end,
|
|
2488
|
+
kind: b.kind
|
|
2489
|
+
}));
|
|
2490
|
+
// Re-sort to ensure 1-based ranks are consecutive after dedup.
|
|
2491
|
+
bm25Ranked.sort((a, b) => a.rank - b.rank);
|
|
2492
|
+
for (let i = 0; i < bm25Ranked.length; i++) {
|
|
2493
|
+
const hit = bm25Ranked[i];
|
|
2494
|
+
if (hit)
|
|
2495
|
+
hit.rank = i + 1;
|
|
2496
|
+
}
|
|
2497
|
+
}
|
|
2498
|
+
if (bm25Ranked.length > 0)
|
|
2499
|
+
signalsUsed.push("bm25");
|
|
2500
|
+
}
|
|
2501
|
+
catch (err) {
|
|
2502
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
2503
|
+
signalErrors.bm25 = msg;
|
|
2504
|
+
process.stderr.write(`obsidian_search: BM25 ranker failed — ${msg}\n`);
|
|
2505
|
+
}
|
|
2506
|
+
}
|
|
2507
|
+
// ─── TF-IDF ─────────────────────────────────────────────────────────────
|
|
2508
|
+
// Always available (in-memory, no native deps).
|
|
2509
|
+
let tfidfRanked = [];
|
|
2510
|
+
try {
|
|
2511
|
+
const tfidf = await semanticSearch(vault, {
|
|
2512
|
+
query: args.query,
|
|
2513
|
+
folder: args.folder,
|
|
2514
|
+
limit: fanOutK,
|
|
2515
|
+
min_score: 0.05
|
|
2516
|
+
});
|
|
2517
|
+
tfidfRanked = tfidf.matches.map((m, i) => ({
|
|
2518
|
+
id: m.path,
|
|
2519
|
+
rank: i + 1,
|
|
2520
|
+
score: m.score,
|
|
2521
|
+
snippet: m.snippet
|
|
2522
|
+
}));
|
|
2523
|
+
if (tfidfRanked.length > 0)
|
|
2524
|
+
signalsUsed.push("tfidf");
|
|
2525
|
+
}
|
|
2526
|
+
catch (err) {
|
|
2527
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
2528
|
+
signalErrors.tfidf = msg;
|
|
2529
|
+
process.stderr.write(`obsidian_search: TF-IDF ranker failed — ${msg}\n`);
|
|
2530
|
+
}
|
|
2531
|
+
// ─── ML embeddings (if .embed.db exists) ────────────────────────────────
|
|
2532
|
+
let embedRanked = [];
|
|
2533
|
+
if (existsSync(ctx.embedFile)) {
|
|
2534
|
+
try {
|
|
2535
|
+
// v2.0.0-beta.1 P1 fix: pass `min_score: 0` to fan-out the embeddings
|
|
2536
|
+
// ranker uniformly with BM25 (no floor) and TF-IDF (0.05 floor). The
|
|
2537
|
+
// user-facing precision filter happens AFTER fusion via `min_signals`,
|
|
2538
|
+
// not before — pre-fix, embeddings used the standalone tool's 0.3
|
|
2539
|
+
// default which silently shrank the embedding-side candidate pool and
|
|
2540
|
+
// starved RRF of cross-signal evidence.
|
|
2541
|
+
const embed = await embeddingsSearch(vault, { query: args.query, folder: args.folder, limit: fanOutK, model: args.embedding_model, min_score: 0 }, ctx.embedFile, ctx.hnsw);
|
|
2542
|
+
// v2.2.0: granularity branch — same shape as BM25 above.
|
|
2543
|
+
if (granularity === "block") {
|
|
2544
|
+
embedRanked = embed.matches.map((m, i) => ({
|
|
2545
|
+
id: `${m.path}#${m.chunk_index ?? 0}`,
|
|
2546
|
+
rank: i + 1,
|
|
2547
|
+
score: m.score,
|
|
2548
|
+
snippet: m.snippet,
|
|
2549
|
+
chunk_index: m.chunk_index,
|
|
2550
|
+
line_start: m.line_start,
|
|
2551
|
+
line_end: m.line_end,
|
|
2552
|
+
kind: m.kind
|
|
2553
|
+
}));
|
|
2554
|
+
}
|
|
2555
|
+
else {
|
|
2556
|
+
const bestPerNote = new Map();
|
|
2557
|
+
embed.matches.forEach((m, i) => {
|
|
2558
|
+
const existing = bestPerNote.get(m.path);
|
|
2559
|
+
if (!existing || i < existing.rank) {
|
|
2560
|
+
bestPerNote.set(m.path, {
|
|
2561
|
+
score: m.score,
|
|
2562
|
+
rank: i + 1,
|
|
2563
|
+
snippet: m.snippet,
|
|
2564
|
+
chunk_index: m.chunk_index,
|
|
2565
|
+
line_start: m.line_start,
|
|
2566
|
+
line_end: m.line_end,
|
|
2567
|
+
kind: m.kind
|
|
2568
|
+
});
|
|
2569
|
+
}
|
|
2570
|
+
});
|
|
2571
|
+
embedRanked = Array.from(bestPerNote.entries()).map(([id, b]) => ({
|
|
2572
|
+
id,
|
|
2573
|
+
rank: b.rank,
|
|
2574
|
+
score: b.score,
|
|
2575
|
+
snippet: b.snippet,
|
|
2576
|
+
chunk_index: b.chunk_index,
|
|
2577
|
+
line_start: b.line_start,
|
|
2578
|
+
line_end: b.line_end,
|
|
2579
|
+
kind: b.kind
|
|
2580
|
+
}));
|
|
2581
|
+
embedRanked.sort((a, b) => a.rank - b.rank);
|
|
2582
|
+
for (let i = 0; i < embedRanked.length; i++) {
|
|
2583
|
+
const hit = embedRanked[i];
|
|
2584
|
+
if (hit)
|
|
2585
|
+
hit.rank = i + 1;
|
|
2586
|
+
}
|
|
2587
|
+
}
|
|
2588
|
+
if (embedRanked.length > 0)
|
|
2589
|
+
signalsUsed.push("embeddings");
|
|
2590
|
+
}
|
|
2591
|
+
catch (err) {
|
|
2592
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
2593
|
+
signalErrors.embeddings = msg;
|
|
2594
|
+
process.stderr.write(`obsidian_search: embeddings ranker failed — ${msg}\n`);
|
|
2595
|
+
}
|
|
2596
|
+
}
|
|
2597
|
+
// ─── RRF fusion ─────────────────────────────────────────────────────────
|
|
2598
|
+
const fused = reciprocalRankFusion({
|
|
2599
|
+
bm25: bm25Ranked.map((h) => ({ id: h.id, rank: h.rank, score: h.score })),
|
|
2600
|
+
tfidf: tfidfRanked.map((h) => ({ id: h.id, rank: h.rank, score: h.score })),
|
|
2601
|
+
embeddings: embedRanked.map((h) => ({ id: h.id, rank: h.rank, score: h.score }))
|
|
2602
|
+
}, { topK: Math.max(limit * 4, 30) } // overshoot — graph boost may rerank
|
|
2603
|
+
);
|
|
2604
|
+
// ─── v2.3.0: Wikilink graph-boost ───────────────────────────────────────
|
|
2605
|
+
// Re-rank top-K by counting how many *other* top-K hits link to each one.
|
|
2606
|
+
// Equivalent to a 1-step personalised PageRank seeded by the fused top-K.
|
|
2607
|
+
// Boost is small (α=0.005) — enough to break ties but won't override
|
|
2608
|
+
// strong single-ranker signals. Requires no new index — uses already-
|
|
2609
|
+
// cached parsed wikilinks per note.
|
|
2610
|
+
// This is the "only enquire-mcp does this" feature: generic vector stores
|
|
2611
|
+
// can't do this without an Obsidian-aware layer; Smart Connections doesn't
|
|
2612
|
+
// do it either. Wikilinks ARE the differentiating Obsidian primitive.
|
|
2613
|
+
const graphBoost = args.graph_boost !== false; // default ON
|
|
2614
|
+
if (graphBoost && fused.length > 1) {
|
|
2615
|
+
const candidatePaths = new Set();
|
|
2616
|
+
for (const f of fused) {
|
|
2617
|
+
candidatePaths.add(f.id.includes("#") ? (f.id.split("#")[0] ?? f.id) : f.id);
|
|
2618
|
+
}
|
|
2619
|
+
const outLinks = new Map();
|
|
2620
|
+
for (const candidatePath of candidatePaths) {
|
|
2621
|
+
try {
|
|
2622
|
+
const note = await vault.readNote(vault.resolveInside(candidatePath));
|
|
2623
|
+
const targets = new Set();
|
|
2624
|
+
for (const wl of note.parsed.wikilinks) {
|
|
2625
|
+
if (!wl.target)
|
|
2626
|
+
continue;
|
|
2627
|
+
// Wikilinks can be by basename ("Foo") or relative path ("Sub/Foo").
|
|
2628
|
+
// Normalize both forms so the membership test catches either.
|
|
2629
|
+
targets.add(wl.target);
|
|
2630
|
+
targets.add(stripMd(wl.target));
|
|
2631
|
+
}
|
|
2632
|
+
outLinks.set(candidatePath, targets);
|
|
2633
|
+
}
|
|
2634
|
+
catch {
|
|
2635
|
+
// skip unreadable notes
|
|
2636
|
+
}
|
|
2637
|
+
}
|
|
2638
|
+
const ALPHA = 0.005;
|
|
2639
|
+
for (const f of fused) {
|
|
2640
|
+
const fPath = f.id.includes("#") ? (f.id.split("#")[0] ?? f.id) : f.id;
|
|
2641
|
+
const fBasename = stripMd(path.basename(fPath));
|
|
2642
|
+
let inDegree = 0;
|
|
2643
|
+
for (const [otherPath, targets] of outLinks) {
|
|
2644
|
+
if (otherPath === fPath)
|
|
2645
|
+
continue;
|
|
2646
|
+
if (targets.has(fPath) || targets.has(stripMd(fPath)) || targets.has(fBasename)) {
|
|
2647
|
+
inDegree += 1;
|
|
2648
|
+
}
|
|
2649
|
+
}
|
|
2650
|
+
if (inDegree > 0)
|
|
2651
|
+
f.score += ALPHA * inDegree;
|
|
2652
|
+
}
|
|
2653
|
+
fused.sort((a, b) => b.score - a.score);
|
|
2654
|
+
}
|
|
2655
|
+
// Build snippet/chunk lookup tables for attaching the best evidence per
|
|
2656
|
+
// note in the final response.
|
|
2657
|
+
const bm25Map = new Map(bm25Ranked.map((h) => [h.id, h]));
|
|
2658
|
+
const tfidfMap = new Map(tfidfRanked.map((h) => [h.id, h]));
|
|
2659
|
+
const embedMap = new Map(embedRanked.map((h) => [h.id, h]));
|
|
2660
|
+
// ─── v2.9.0: Cross-encoder reranking (post-RRF, post-graph-boost) ────────
|
|
2661
|
+
// Take the top-N fused candidates, score each (query, snippet) pair with a
|
|
2662
|
+
// BGE-style cross-encoder, and re-sort. Cross-encoder is far more accurate
|
|
2663
|
+
// than bi-encoder cosine for relevance ranking — it sees query+document
|
|
2664
|
+
// interaction directly. ~30-50ms per query overhead on M1 CPU at N=50.
|
|
2665
|
+
//
|
|
2666
|
+
// Failures are caught and surfaced as `signal_errors.reranker` so a model
|
|
2667
|
+
// load problem doesn't poison the whole search response. The fused order
|
|
2668
|
+
// (RRF + graph-boost) is preserved if reranking fails.
|
|
2669
|
+
let rerankerScores = null;
|
|
2670
|
+
if ((ctx.reranker || ctx.rerankerOverride) && fused.length > 0) {
|
|
2671
|
+
const topN = ctx.reranker?.topN ?? 50;
|
|
2672
|
+
const rerankBatch = fused.slice(0, topN);
|
|
2673
|
+
try {
|
|
2674
|
+
// Prefer the test-injected reranker when present; otherwise lazy-load.
|
|
2675
|
+
let reranker;
|
|
2676
|
+
if (ctx.rerankerOverride) {
|
|
2677
|
+
reranker = ctx.rerankerOverride;
|
|
2678
|
+
}
|
|
2679
|
+
else {
|
|
2680
|
+
const { loadReranker } = await import("./embeddings.js");
|
|
2681
|
+
reranker = await loadReranker(ctx.reranker?.alias);
|
|
2682
|
+
}
|
|
2683
|
+
// For each candidate, find the best snippet (BM25 > embeddings > TF-IDF)
|
|
2684
|
+
// and pair it with the query. Empty-snippet candidates go to the bottom
|
|
2685
|
+
// by getting a -Infinity score (sort below scored candidates).
|
|
2686
|
+
const passages = rerankBatch.map((f) => {
|
|
2687
|
+
const bm = bm25Map.get(f.id);
|
|
2688
|
+
const emb = embedMap.get(f.id);
|
|
2689
|
+
const tf = tfidfMap.get(f.id);
|
|
2690
|
+
const snippet = bm?.snippet ?? emb?.snippet ?? tf?.snippet ?? "";
|
|
2691
|
+
// Strip FTS5 «…» highlight markers — they're cosmetic and the
|
|
2692
|
+
// reranker should see clean prose. Limit to ~600 chars to stay
|
|
2693
|
+
// safely under the model's 512-token budget (rough char/token ratio
|
|
2694
|
+
// varies by language; 600 chars ≈ 200 tokens for English / Cyrillic
|
|
2695
|
+
// per the multilingual model's tokenizer, well under 512).
|
|
2696
|
+
return snippet.replace(/[«»]/g, "").slice(0, 600);
|
|
2697
|
+
});
|
|
2698
|
+
const scores = await reranker.score(args.query, passages);
|
|
2699
|
+
rerankerScores = new Map();
|
|
2700
|
+
for (let i = 0; i < rerankBatch.length; i++) {
|
|
2701
|
+
const f = rerankBatch[i];
|
|
2702
|
+
const s = scores[i];
|
|
2703
|
+
if (f && typeof s === "number")
|
|
2704
|
+
rerankerScores.set(f.id, s);
|
|
2705
|
+
}
|
|
2706
|
+
// Sort the top-N by reranker score; everything below top-N keeps RRF
|
|
2707
|
+
// order. We do this by re-ordering fused[0..topN] in place.
|
|
2708
|
+
const reordered = [...rerankBatch].sort((a, b) => {
|
|
2709
|
+
const sa = rerankerScores?.get(a.id) ?? -Infinity;
|
|
2710
|
+
const sb = rerankerScores?.get(b.id) ?? -Infinity;
|
|
2711
|
+
return sb - sa;
|
|
2712
|
+
});
|
|
2713
|
+
for (let i = 0; i < reordered.length; i++) {
|
|
2714
|
+
fused[i] = reordered[i];
|
|
2715
|
+
}
|
|
2716
|
+
}
|
|
2717
|
+
catch (err) {
|
|
2718
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
2719
|
+
// Add to signalErrors so it surfaces in the response. Reranker is not
|
|
2720
|
+
// a "signal" per se but the existing dict is the right home.
|
|
2721
|
+
signalErrors.reranker = msg;
|
|
2722
|
+
process.stderr.write(`obsidian_search: reranker failed — ${msg}\n`);
|
|
2723
|
+
}
|
|
2724
|
+
}
|
|
2725
|
+
const matches = [];
|
|
2726
|
+
for (const f of fused) {
|
|
2727
|
+
const numSignals = Object.keys(f.per_signal).length;
|
|
2728
|
+
if (numSignals < minSignals)
|
|
2729
|
+
continue;
|
|
2730
|
+
// Snippet preference: BM25 > embeddings > TF-IDF (BM25 snippets bracket
|
|
2731
|
+
// the matched terms with «…», highest signal-to-noise).
|
|
2732
|
+
const bm = bm25Map.get(f.id);
|
|
2733
|
+
const emb = embedMap.get(f.id);
|
|
2734
|
+
const tf = tfidfMap.get(f.id);
|
|
2735
|
+
const bestEvidence = bm ?? emb ?? tf;
|
|
2736
|
+
// Build per_signal as a Partial — only include keys that actually
|
|
2737
|
+
// contributed. Setting `key: undefined` keeps the key visible in
|
|
2738
|
+
// Object.keys() and JSON.stringify, which leaks "this signal exists
|
|
2739
|
+
// but didn't match" instead of "this signal wasn't even running".
|
|
2740
|
+
const perSignal = {};
|
|
2741
|
+
if (f.per_signal.bm25)
|
|
2742
|
+
perSignal.bm25 = { rank: f.per_signal.bm25.rank, score: f.per_signal.bm25.score };
|
|
2743
|
+
if (f.per_signal.tfidf)
|
|
2744
|
+
perSignal.tfidf = { rank: f.per_signal.tfidf.rank, score: f.per_signal.tfidf.score };
|
|
2745
|
+
if (f.per_signal.embeddings) {
|
|
2746
|
+
perSignal.embeddings = { rank: f.per_signal.embeddings.rank, score: f.per_signal.embeddings.score };
|
|
2747
|
+
}
|
|
2748
|
+
// v2.2.0: when granularity is "block", f.id is "path#chunk_index" — split
|
|
2749
|
+
// back into path + chunk_index for the response. When "note", f.id is
|
|
2750
|
+
// just the path.
|
|
2751
|
+
let pathPart = f.id;
|
|
2752
|
+
let chunkFromId;
|
|
2753
|
+
if (granularity === "block") {
|
|
2754
|
+
const hashIdx = f.id.lastIndexOf("#");
|
|
2755
|
+
if (hashIdx > 0) {
|
|
2756
|
+
pathPart = f.id.slice(0, hashIdx);
|
|
2757
|
+
const parsed = Number.parseInt(f.id.slice(hashIdx + 1), 10);
|
|
2758
|
+
if (Number.isInteger(parsed) && parsed >= 0)
|
|
2759
|
+
chunkFromId = parsed;
|
|
2760
|
+
}
|
|
2761
|
+
}
|
|
2762
|
+
// v2.8.0: derive content-source kind. BM25 / embeddings hits carry it
|
|
2763
|
+
// explicitly; TF-IDF doesn't (it only runs over markdown). Either
|
|
2764
|
+
// ranker reporting "pdf" wins; otherwise fall back to "md".
|
|
2765
|
+
const kind = bm?.kind === "pdf" || emb?.kind === "pdf" ? "pdf" : "md";
|
|
2766
|
+
// For PDFs, the title is best derived from the filename without
|
|
2767
|
+
// `.md`-stripping (PDFs don't have that extension); use the .pdf-stripped
|
|
2768
|
+
// form so titles read naturally in agent output.
|
|
2769
|
+
const baseName = path.basename(pathPart);
|
|
2770
|
+
const title = kind === "pdf" ? baseName.replace(/\.pdf$/i, "") : stripMd(baseName);
|
|
2771
|
+
const rerankerScore = rerankerScores?.get(f.id);
|
|
2772
|
+
matches.push({
|
|
2773
|
+
path: pathPart,
|
|
2774
|
+
title,
|
|
2775
|
+
score: Math.round(f.score * 100000) / 100000,
|
|
2776
|
+
snippet: bestEvidence?.snippet ?? "",
|
|
2777
|
+
chunk_index: chunkFromId ?? bm?.chunk_index ?? emb?.chunk_index,
|
|
2778
|
+
line_start: bm?.line_start ?? emb?.line_start,
|
|
2779
|
+
line_end: bm?.line_end ?? emb?.line_end,
|
|
2780
|
+
kind,
|
|
2781
|
+
per_signal: perSignal,
|
|
2782
|
+
...(typeof rerankerScore === "number" && Number.isFinite(rerankerScore)
|
|
2783
|
+
? { reranker_score: Math.round(rerankerScore * 100000) / 100000 }
|
|
2784
|
+
: {})
|
|
2785
|
+
});
|
|
2786
|
+
if (matches.length >= limit)
|
|
2787
|
+
break;
|
|
2788
|
+
}
|
|
2789
|
+
// v2.0.0-beta.2 P1 fix: surface signal_errors only when at least one
|
|
2790
|
+
// ranker actually failed. Omit the key when all signals ran cleanly so
|
|
2791
|
+
// happy-path responses stay narrow.
|
|
2792
|
+
const response = {
|
|
2793
|
+
query: args.query,
|
|
2794
|
+
method: "rrf",
|
|
2795
|
+
k: RRF_K,
|
|
2796
|
+
signals_used: signalsUsed,
|
|
2797
|
+
total_candidates: fused.length,
|
|
2798
|
+
matches
|
|
2799
|
+
};
|
|
2800
|
+
if (Object.keys(signalErrors).length > 0) {
|
|
2801
|
+
response.signal_errors = signalErrors;
|
|
2802
|
+
}
|
|
2803
|
+
return response;
|
|
2804
|
+
}
|
|
2805
|
+
export async function contextPack(vault, args, ctx) {
|
|
2806
|
+
await vault.ensureExists();
|
|
2807
|
+
if (!args.query?.trim())
|
|
2808
|
+
throw new Error("context_pack: `query` is required");
|
|
2809
|
+
const budget = args.budget_tokens ?? 4000;
|
|
2810
|
+
const charBudget = budget * 4; // ~4 chars/token
|
|
2811
|
+
const includeBacklinks = args.include_backlinks !== false;
|
|
2812
|
+
const recentN = Math.max(0, args.recent_dailies ?? 0);
|
|
2813
|
+
// 1) Hybrid retrieval — top-K notes
|
|
2814
|
+
const search = await searchHybrid(vault, { query: args.query, folder: args.folder, limit: 10 }, { ftsIndex: ctx.ftsIndex, embedFile: ctx.embedFile });
|
|
2815
|
+
const sections = [`# Context for: ${args.query}\n`];
|
|
2816
|
+
const includedNotes = [];
|
|
2817
|
+
let charsUsed = sections[0]?.length ?? 0;
|
|
2818
|
+
let notesBytes = 0;
|
|
2819
|
+
let backlinksBytes = 0;
|
|
2820
|
+
let dailiesBytes = 0;
|
|
2821
|
+
// 2) Pack note bodies until budget exhausted
|
|
2822
|
+
sections.push("## Top notes");
|
|
2823
|
+
for (const m of search.matches) {
|
|
2824
|
+
if (charsUsed >= charBudget)
|
|
2825
|
+
break;
|
|
2826
|
+
try {
|
|
2827
|
+
const note = await vault.readNote(vault.resolveInside(m.path), undefined);
|
|
2828
|
+
const body = note.parsed.body.trim();
|
|
2829
|
+
const headerLen = m.path.length + 5;
|
|
2830
|
+
const remaining = charBudget - charsUsed;
|
|
2831
|
+
// Truncate body to fit remaining budget for THIS note (~50% of remainder
|
|
2832
|
+
// so we leave room for backlinks + dailies).
|
|
2833
|
+
const noteCap = Math.min(body.length, Math.max(500, Math.floor(remaining * 0.5)));
|
|
2834
|
+
const trimmed = body.length <= noteCap ? body : `${body.slice(0, noteCap)}\n\n[…truncated…]`;
|
|
2835
|
+
const block = `### ${m.path}\n\n${trimmed}\n`;
|
|
2836
|
+
sections.push(block);
|
|
2837
|
+
charsUsed += block.length + headerLen;
|
|
2838
|
+
notesBytes += block.length;
|
|
2839
|
+
includedNotes.push(m.path);
|
|
2840
|
+
}
|
|
2841
|
+
catch {
|
|
2842
|
+
// skip unreadable notes
|
|
2843
|
+
}
|
|
2844
|
+
}
|
|
2845
|
+
// 3) 1-line backlink summaries for top-3
|
|
2846
|
+
if (includeBacklinks && includedNotes.length > 0 && charsUsed < charBudget) {
|
|
2847
|
+
sections.push("## Backlinks");
|
|
2848
|
+
let backlinksAdded = 0;
|
|
2849
|
+
for (const notePath of includedNotes.slice(0, 3)) {
|
|
2850
|
+
if (charsUsed >= charBudget)
|
|
2851
|
+
break;
|
|
2852
|
+
try {
|
|
2853
|
+
const links = await getBacklinks(vault, { path: notePath, limit: 5 });
|
|
2854
|
+
if (links.length > 0) {
|
|
2855
|
+
const block = `### → ${notePath}\n${links.map((l) => `- ${l.path} : ${(l.snippets[0] ?? "").slice(0, 80)}`).join("\n")}\n`;
|
|
2856
|
+
sections.push(block);
|
|
2857
|
+
charsUsed += block.length;
|
|
2858
|
+
backlinksBytes += block.length;
|
|
2859
|
+
backlinksAdded += links.length;
|
|
2860
|
+
}
|
|
2861
|
+
}
|
|
2862
|
+
catch {
|
|
2863
|
+
// skip
|
|
2864
|
+
}
|
|
2865
|
+
}
|
|
2866
|
+
if (backlinksAdded === 0)
|
|
2867
|
+
sections.pop(); // remove empty heading
|
|
2868
|
+
}
|
|
2869
|
+
// 4) Recent daily notes
|
|
2870
|
+
if (recentN > 0 && charsUsed < charBudget) {
|
|
2871
|
+
try {
|
|
2872
|
+
const recent = await getRecentEdits(vault, { since_minutes: 60 * 24 * 7, limit: recentN, folder: args.folder });
|
|
2873
|
+
const dailies = recent.filter((r) => /\d{4}-\d{2}-\d{2}/.test(r.path));
|
|
2874
|
+
if (dailies.length > 0) {
|
|
2875
|
+
sections.push(`## Recent (${dailies.length} dailies, last 7 days)`);
|
|
2876
|
+
for (const d of dailies) {
|
|
2877
|
+
if (charsUsed >= charBudget)
|
|
2878
|
+
break;
|
|
2879
|
+
const block = `- ${d.path} (${d.mtime})`;
|
|
2880
|
+
sections.push(block);
|
|
2881
|
+
charsUsed += block.length;
|
|
2882
|
+
dailiesBytes += block.length;
|
|
2883
|
+
}
|
|
2884
|
+
}
|
|
2885
|
+
}
|
|
2886
|
+
catch {
|
|
2887
|
+
// skip
|
|
2888
|
+
}
|
|
2889
|
+
}
|
|
2890
|
+
const bundle = sections.join("\n");
|
|
2891
|
+
return {
|
|
2892
|
+
query: args.query,
|
|
2893
|
+
bundle,
|
|
2894
|
+
estimated_tokens: Math.ceil(bundle.length / 4),
|
|
2895
|
+
budget_tokens: budget,
|
|
2896
|
+
sections: { notes: notesBytes, backlinks: backlinksBytes, dailies: dailiesBytes },
|
|
2897
|
+
included_notes: includedNotes
|
|
2898
|
+
};
|
|
2899
|
+
}
|
|
2900
|
+
// ─── small set / string helpers shared by find_similar / get_note_neighbors ─
|
|
2901
|
+
function jaccard(a, b) {
|
|
2902
|
+
if (a.size === 0 && b.size === 0)
|
|
2903
|
+
return 0;
|
|
2904
|
+
let inter = 0;
|
|
2905
|
+
for (const x of a)
|
|
2906
|
+
if (b.has(x))
|
|
2907
|
+
inter += 1;
|
|
2908
|
+
const union = a.size + b.size - inter;
|
|
2909
|
+
return union === 0 ? 0 : inter / union;
|
|
2910
|
+
}
|
|
2911
|
+
function intersectionSize(a, b) {
|
|
2912
|
+
let n = 0;
|
|
2913
|
+
for (const x of a)
|
|
2914
|
+
if (b.has(x))
|
|
2915
|
+
n += 1;
|
|
2916
|
+
return n;
|
|
2917
|
+
}
|
|
2918
|
+
function ngrams(s, n) {
|
|
2919
|
+
const out = new Set();
|
|
2920
|
+
if (s.length < n) {
|
|
2921
|
+
if (s)
|
|
2922
|
+
out.add(s);
|
|
2923
|
+
return out;
|
|
2924
|
+
}
|
|
2925
|
+
for (let i = 0; i <= s.length - n; i++)
|
|
2926
|
+
out.add(s.slice(i, i + n));
|
|
2927
|
+
return out;
|
|
2928
|
+
}
|
|
2929
|
+
const entryIndexCache = new WeakMap();
|
|
2930
|
+
function indexFor(entries) {
|
|
2931
|
+
const cached = entryIndexCache.get(entries);
|
|
2932
|
+
if (cached)
|
|
2933
|
+
return cached;
|
|
2934
|
+
const byBasename = new Map();
|
|
2935
|
+
const byRelPath = new Map();
|
|
2936
|
+
for (const e of entries) {
|
|
2937
|
+
const key = stripMd(e.basename).toLowerCase();
|
|
2938
|
+
const slot = byBasename.get(key);
|
|
2939
|
+
if (slot)
|
|
2940
|
+
slot.push(e);
|
|
2941
|
+
else
|
|
2942
|
+
byBasename.set(key, [e]);
|
|
2943
|
+
byRelPath.set(stripMd(e.relPath).toLowerCase(), e);
|
|
2944
|
+
}
|
|
2945
|
+
const idx = { byBasename, byRelPath };
|
|
2946
|
+
entryIndexCache.set(entries, idx);
|
|
2947
|
+
return idx;
|
|
2948
|
+
}
|
|
2949
|
+
function findBestMatch(entries, target, fromNote) {
|
|
2950
|
+
const idx = indexFor(entries);
|
|
2951
|
+
if (target.startsWith("./") || target.startsWith("../") || target.includes("/../")) {
|
|
2952
|
+
if (fromNote) {
|
|
2953
|
+
const fromDir = path.dirname(fromNote);
|
|
2954
|
+
const joined = path.posix.normalize(path.posix.join(fromDir.split(path.sep).join("/"), target));
|
|
2955
|
+
const lower = stripMd(joined).toLowerCase();
|
|
2956
|
+
const rel = idx.byRelPath.get(lower);
|
|
2957
|
+
if (rel)
|
|
2958
|
+
return rel;
|
|
2959
|
+
}
|
|
2960
|
+
}
|
|
2961
|
+
const norm = stripMd(target).toLowerCase();
|
|
2962
|
+
const exact = idx.byBasename.get(norm) ?? [];
|
|
2963
|
+
if (exact.length === 1)
|
|
2964
|
+
return exact[0] ?? null;
|
|
2965
|
+
if (exact.length > 1 && fromNote) {
|
|
2966
|
+
const fromDir = path.dirname(fromNote);
|
|
2967
|
+
const sameDir = exact.find((e) => path.dirname(e.relPath) === fromDir);
|
|
2968
|
+
if (sameDir)
|
|
2969
|
+
return sameDir;
|
|
2970
|
+
}
|
|
2971
|
+
if (exact.length > 0)
|
|
2972
|
+
return exact[0] ?? null;
|
|
2973
|
+
if (target.includes("/")) {
|
|
2974
|
+
const lower = stripMd(target).toLowerCase();
|
|
2975
|
+
const path1 = idx.byRelPath.get(lower);
|
|
2976
|
+
if (path1)
|
|
2977
|
+
return path1;
|
|
2978
|
+
// endsWith match — falls back to a scan, but only for path-qualified
|
|
2979
|
+
// targets that don't exact-match (rare).
|
|
2980
|
+
for (const e of entries) {
|
|
2981
|
+
if (stripMd(e.relPath).toLowerCase().endsWith(`/${lower}`))
|
|
2982
|
+
return e;
|
|
2983
|
+
}
|
|
2984
|
+
}
|
|
2985
|
+
return null;
|
|
2986
|
+
}
|
|
2987
|
+
function sliceSnippet(text, idx, qLen) {
|
|
2988
|
+
if (idx < 0)
|
|
2989
|
+
return { snippet: "", line: 0 };
|
|
2990
|
+
const before = Math.max(0, idx - 60);
|
|
2991
|
+
const after = Math.min(text.length, idx + qLen + 60);
|
|
2992
|
+
let snippet = text.slice(before, after).replace(/\s+/g, " ").trim();
|
|
2993
|
+
if (before > 0)
|
|
2994
|
+
snippet = `…${snippet}`;
|
|
2995
|
+
if (after < text.length)
|
|
2996
|
+
snippet = `${snippet}…`;
|
|
2997
|
+
const line = text.slice(0, idx).split("\n").length;
|
|
2998
|
+
return { snippet, line };
|
|
2999
|
+
}
|
|
3000
|
+
function stripMd(name) {
|
|
3001
|
+
return name.replace(/\.md$/i, "");
|
|
3002
|
+
}
|
|
3003
|
+
function normalizeTag(t) {
|
|
3004
|
+
return t.replace(/^#+/, "").toLowerCase();
|
|
3005
|
+
}
|
|
3006
|
+
export async function listPdfs(vault, args) {
|
|
3007
|
+
await vault.ensureExists();
|
|
3008
|
+
const limit = args.limit ?? 100;
|
|
3009
|
+
const all = await vault.listFilesByExtension(".pdf", args.folder);
|
|
3010
|
+
const out = [];
|
|
3011
|
+
for (const e of all) {
|
|
3012
|
+
if (out.length >= limit)
|
|
3013
|
+
break;
|
|
3014
|
+
let size = 0;
|
|
3015
|
+
try {
|
|
3016
|
+
const buf = await vault.readBinaryFile(e.absPath);
|
|
3017
|
+
size = buf.byteLength;
|
|
3018
|
+
}
|
|
3019
|
+
catch {
|
|
3020
|
+
// Unreadable PDF — skip without poisoning the listing.
|
|
3021
|
+
continue;
|
|
3022
|
+
}
|
|
3023
|
+
out.push({
|
|
3024
|
+
path: e.relPath,
|
|
3025
|
+
name: e.basename.replace(/\.pdf$/i, ""),
|
|
3026
|
+
size_bytes: size,
|
|
3027
|
+
mtime: new Date(e.mtimeMs).toISOString()
|
|
3028
|
+
});
|
|
3029
|
+
}
|
|
3030
|
+
out.sort((a, b) => b.mtime.localeCompare(a.mtime));
|
|
3031
|
+
return out;
|
|
3032
|
+
}
|
|
3033
|
+
export async function readPdf(vault, args) {
|
|
3034
|
+
await vault.ensureExists();
|
|
3035
|
+
if (!args.path)
|
|
3036
|
+
throw new Error("path is required");
|
|
3037
|
+
const normalized = args.path.toLowerCase().endsWith(".pdf") ? args.path : `${args.path}.pdf`;
|
|
3038
|
+
const abs = vault.resolveInside(normalized);
|
|
3039
|
+
const stat = await vault.stat(abs); // throws if missing or excluded
|
|
3040
|
+
const rel = vault.toRel(abs);
|
|
3041
|
+
const buf = await vault.readBinaryFile(abs);
|
|
3042
|
+
// Lazy import — keeps the markdown-only path zero-cost when pdfjs-dist
|
|
3043
|
+
// isn't installed (--omit=optional users).
|
|
3044
|
+
const { extractPdfText } = await import("./pdf.js");
|
|
3045
|
+
const result = await extractPdfText(buf);
|
|
3046
|
+
// Optional page-range slice (1-indexed inclusive). Validated lightly —
|
|
3047
|
+
// out-of-range bounds clamp rather than throw, matching how `slice()`
|
|
3048
|
+
// behaves elsewhere in the toolkit.
|
|
3049
|
+
let pages = result.pages;
|
|
3050
|
+
if (args.pages && args.pages.length === 2) {
|
|
3051
|
+
const [from, to] = args.pages;
|
|
3052
|
+
if (typeof from === "number" && typeof to === "number" && from > 0 && to >= from) {
|
|
3053
|
+
pages = result.pages.slice(from - 1, to);
|
|
3054
|
+
}
|
|
3055
|
+
}
|
|
3056
|
+
const out = {
|
|
3057
|
+
path: rel,
|
|
3058
|
+
name: rel
|
|
3059
|
+
.split("/")
|
|
3060
|
+
.pop()
|
|
3061
|
+
?.replace(/\.pdf$/i, "") ?? rel,
|
|
3062
|
+
size_bytes: buf.byteLength,
|
|
3063
|
+
mtime: new Date(stat.mtimeMs).toISOString(),
|
|
3064
|
+
page_count: pages.length,
|
|
3065
|
+
has_text: pages.some((p) => !p.isEmpty),
|
|
3066
|
+
pages: pages.map((p) => ({
|
|
3067
|
+
page_number: p.pageNumber,
|
|
3068
|
+
text: p.text,
|
|
3069
|
+
is_empty: p.isEmpty,
|
|
3070
|
+
char_count: p.charCount
|
|
3071
|
+
})),
|
|
3072
|
+
full_text: pages
|
|
3073
|
+
.map((p) => p.text)
|
|
3074
|
+
.filter((t) => t.length > 0)
|
|
3075
|
+
.join("\n\n"),
|
|
3076
|
+
total_page_count: result.pageCount
|
|
3077
|
+
};
|
|
3078
|
+
if (args.include_metadata !== false && Object.keys(result.metadata).length > 0) {
|
|
3079
|
+
out.metadata = {
|
|
3080
|
+
title: result.metadata.title,
|
|
3081
|
+
author: result.metadata.author,
|
|
3082
|
+
subject: result.metadata.subject,
|
|
3083
|
+
keywords: result.metadata.keywords,
|
|
3084
|
+
creator: result.metadata.creator,
|
|
3085
|
+
producer: result.metadata.producer,
|
|
3086
|
+
creation_date: result.metadata.creationDate,
|
|
3087
|
+
mod_date: result.metadata.modDate
|
|
3088
|
+
};
|
|
3089
|
+
}
|
|
3090
|
+
return out;
|
|
3091
|
+
}
|
|
3092
|
+
export async function ocrPdf(vault, args) {
|
|
3093
|
+
await vault.ensureExists();
|
|
3094
|
+
if (!args.path)
|
|
3095
|
+
throw new Error("path is required");
|
|
3096
|
+
const normalized = args.path.toLowerCase().endsWith(".pdf") ? args.path : `${args.path}.pdf`;
|
|
3097
|
+
const abs = vault.resolveInside(normalized);
|
|
3098
|
+
const stat = await vault.stat(abs); // throws if missing or excluded
|
|
3099
|
+
const rel = vault.toRel(abs);
|
|
3100
|
+
const buf = await vault.readBinaryFile(abs);
|
|
3101
|
+
// Lazy import — keeps the markdown-only path zero-cost when tesseract /
|
|
3102
|
+
// canvas optionalDeps aren't installed.
|
|
3103
|
+
const { extractPdfWithOcr } = await import("./ocr.js");
|
|
3104
|
+
const result = await extractPdfWithOcr(buf, {
|
|
3105
|
+
...(args.lang ? { langs: args.lang } : {}),
|
|
3106
|
+
...(args.pages ? { pages: args.pages } : {}),
|
|
3107
|
+
...(typeof args.scale === "number" ? { scale: args.scale } : {})
|
|
3108
|
+
});
|
|
3109
|
+
return {
|
|
3110
|
+
path: rel,
|
|
3111
|
+
name: rel
|
|
3112
|
+
.split("/")
|
|
3113
|
+
.pop()
|
|
3114
|
+
?.replace(/\.pdf$/i, "") ?? rel,
|
|
3115
|
+
size_bytes: buf.byteLength,
|
|
3116
|
+
mtime: new Date(stat.mtimeMs).toISOString(),
|
|
3117
|
+
page_count: result.pages.length,
|
|
3118
|
+
total_page_count: result.pageCount,
|
|
3119
|
+
has_text: result.hasText,
|
|
3120
|
+
pages: result.pages.map((p) => ({
|
|
3121
|
+
page_number: p.pageNumber,
|
|
3122
|
+
text: p.text,
|
|
3123
|
+
is_empty: p.isEmpty,
|
|
3124
|
+
char_count: p.charCount,
|
|
3125
|
+
confidence: Math.round(p.confidence * 10) / 10
|
|
3126
|
+
})),
|
|
3127
|
+
full_text: result.fullText,
|
|
3128
|
+
mean_confidence: Number.isFinite(result.meanConfidence) ? Math.round(result.meanConfidence * 10) / 10 : Number.NaN,
|
|
3129
|
+
langs: result.langs
|
|
3130
|
+
};
|
|
3131
|
+
}
|
|
3132
|
+
//# sourceMappingURL=tools.js.map
|