@oomkapwn/enquire-mcp 3.5.13 → 3.6.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +106 -0
- package/dist/eval.js +1 -1
- package/dist/eval.js.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/tools/index.d.ts +6 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +6 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/media.d.ts +182 -0
- package/dist/tools/media.d.ts.map +1 -0
- package/dist/tools/media.js +304 -0
- package/dist/tools/media.js.map +1 -0
- package/dist/tools/meta.d.ts +201 -0
- package/dist/tools/meta.d.ts.map +1 -0
- package/dist/tools/meta.js +752 -0
- package/dist/tools/meta.js.map +1 -0
- package/dist/tools/read.d.ts +251 -0
- package/dist/tools/read.d.ts.map +1 -0
- package/dist/tools/read.js +643 -0
- package/dist/tools/read.js.map +1 -0
- package/dist/tools/search.d.ts +279 -0
- package/dist/tools/search.d.ts.map +1 -0
- package/dist/tools/search.js +891 -0
- package/dist/tools/search.js.map +1 -0
- package/dist/tools/write.d.ts +145 -0
- package/dist/tools/write.d.ts.map +1 -0
- package/dist/tools/write.js +560 -0
- package/dist/tools/write.js.map +1 -0
- package/package.json +1 -1
- package/dist/tools.d.ts +0 -980
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js +0 -3132
- package/dist/tools.js.map +0 -1
|
@@ -0,0 +1,752 @@
|
|
|
1
|
+
import * as path from "node:path";
|
|
2
|
+
import matter from "gray-matter";
|
|
3
|
+
import { getBacklinks, getRecentEdits, listTags } from "./read.js";
|
|
4
|
+
import { searchHybrid } from "./search.js";
|
|
5
|
+
import { resolveTarget, suggestSimilar } from "./write.js";
|
|
6
|
+
export async function validateNoteProposal(vault, args) {
|
|
7
|
+
await vault.ensureExists();
|
|
8
|
+
const mode = args.mode ?? "create";
|
|
9
|
+
const errors = [];
|
|
10
|
+
const warnings = [];
|
|
11
|
+
// 1. Path sanity. resolveInside throws on traversal — capture as error,
|
|
12
|
+
// don't let it propagate as a generic exception (the validator should
|
|
13
|
+
// return a structured result for ANY input).
|
|
14
|
+
let normalizedPath = args.path.toLowerCase().endsWith(".md") ? args.path : `${args.path}.md`;
|
|
15
|
+
let absPath = null;
|
|
16
|
+
try {
|
|
17
|
+
absPath = vault.resolveInside(normalizedPath);
|
|
18
|
+
normalizedPath = vault.toRel(absPath);
|
|
19
|
+
}
|
|
20
|
+
catch (err) {
|
|
21
|
+
errors.push({
|
|
22
|
+
kind: "path-traversal",
|
|
23
|
+
message: err instanceof Error ? err.message : String(err)
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
// 2. YAML parse via gray-matter (the same parser used at write time).
|
|
27
|
+
const yamlReport = { parsed: false, error: null, keys: [] };
|
|
28
|
+
let bodyAfterFm = args.content;
|
|
29
|
+
try {
|
|
30
|
+
const parsed = matter(args.content);
|
|
31
|
+
yamlReport.parsed = true;
|
|
32
|
+
yamlReport.keys = Object.keys(parsed.data ?? {});
|
|
33
|
+
bodyAfterFm = parsed.content;
|
|
34
|
+
}
|
|
35
|
+
catch (err) {
|
|
36
|
+
yamlReport.error = err instanceof Error ? err.message : String(err);
|
|
37
|
+
errors.push({ kind: "yaml-invalid", message: `YAML frontmatter could not be parsed: ${yamlReport.error}` });
|
|
38
|
+
}
|
|
39
|
+
// 3. Wikilink resolution against the live vault.
|
|
40
|
+
const all = await vault.listMarkdown();
|
|
41
|
+
const wikilinkRe = /(?<!!)\[\[([^\]\n]+?)\]\]/g;
|
|
42
|
+
const wikilinks = [];
|
|
43
|
+
for (const m of bodyAfterFm.matchAll(wikilinkRe)) {
|
|
44
|
+
const raw = m[0];
|
|
45
|
+
const inner = (m[1] ?? "").trim();
|
|
46
|
+
if (!inner)
|
|
47
|
+
continue;
|
|
48
|
+
// Strip alias / section / block to get the bare target name.
|
|
49
|
+
const beforePipe = inner.split("|")[0] ?? "";
|
|
50
|
+
const beforeHash = beforePipe.split("#")[0] ?? "";
|
|
51
|
+
const target = beforeHash.split("^")[0]?.trim() ?? "";
|
|
52
|
+
if (!target)
|
|
53
|
+
continue;
|
|
54
|
+
const match = findBestMatch(all, target, normalizedPath);
|
|
55
|
+
if (match) {
|
|
56
|
+
wikilinks.push({
|
|
57
|
+
raw,
|
|
58
|
+
target,
|
|
59
|
+
status: "resolved",
|
|
60
|
+
resolved_path: match.relPath,
|
|
61
|
+
suggestions: []
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
const suggestions = await suggestSimilar(vault, target);
|
|
66
|
+
wikilinks.push({
|
|
67
|
+
raw,
|
|
68
|
+
target,
|
|
69
|
+
status: "broken",
|
|
70
|
+
resolved_path: null,
|
|
71
|
+
suggestions
|
|
72
|
+
});
|
|
73
|
+
warnings.push({
|
|
74
|
+
kind: "broken-wikilink",
|
|
75
|
+
message: `[[${target}]] does not resolve to any existing note`,
|
|
76
|
+
suggestion: suggestions.length ? `Closest matches: ${suggestions.join(", ")}` : undefined
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// 4. Tag pre-classification (existing vs new).
|
|
81
|
+
const existingTags = new Set((await listTags(vault, {})).map((t) => t.tag.toLowerCase()));
|
|
82
|
+
const proposedTagsRaw = new Set();
|
|
83
|
+
// Frontmatter tags.
|
|
84
|
+
const fmData = yamlReport.parsed ? matter(args.content).data : {};
|
|
85
|
+
const fmTags = fmData.tags ?? fmData.tag;
|
|
86
|
+
if (Array.isArray(fmTags)) {
|
|
87
|
+
for (const t of fmTags)
|
|
88
|
+
if (typeof t === "string" && t)
|
|
89
|
+
proposedTagsRaw.add(t.replace(/^#/, ""));
|
|
90
|
+
}
|
|
91
|
+
else if (typeof fmTags === "string" && fmTags) {
|
|
92
|
+
for (const t of fmTags.split(/[\s,]+/))
|
|
93
|
+
if (t)
|
|
94
|
+
proposedTagsRaw.add(t.replace(/^#/, ""));
|
|
95
|
+
}
|
|
96
|
+
// Inline tags.
|
|
97
|
+
const inlineTagRe = /(?:^|[\s([{>])#([\p{L}][\p{L}\p{N}_/-]*)/gu;
|
|
98
|
+
for (const m of bodyAfterFm.matchAll(inlineTagRe)) {
|
|
99
|
+
if (m[1])
|
|
100
|
+
proposedTagsRaw.add(m[1]);
|
|
101
|
+
}
|
|
102
|
+
const tags = [];
|
|
103
|
+
for (const t of proposedTagsRaw) {
|
|
104
|
+
const status = existingTags.has(t.toLowerCase()) ? "existing" : "new";
|
|
105
|
+
tags.push({ name: t, status });
|
|
106
|
+
if (status === "new") {
|
|
107
|
+
warnings.push({
|
|
108
|
+
kind: "new-tag",
|
|
109
|
+
message: `#${t} is new — won't fork an existing tag (case-insensitive check)`
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
// 5. Path collision check.
|
|
114
|
+
let collision = { kind: "none" };
|
|
115
|
+
if (absPath) {
|
|
116
|
+
try {
|
|
117
|
+
await vault.stat(absPath);
|
|
118
|
+
// Path exists.
|
|
119
|
+
if (mode === "create") {
|
|
120
|
+
errors.push({
|
|
121
|
+
kind: "path-collision",
|
|
122
|
+
message: `Note already exists at ${normalizedPath} (mode="create" refuses overwrite)`
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
collision = { kind: "path-exists", existing_path: normalizedPath };
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
// Path doesn't exist — try title collision (an existing note at a different path).
|
|
129
|
+
const titleFromBasename = stripMd(path.basename(normalizedPath));
|
|
130
|
+
const existing = await vault.findByTitle(titleFromBasename);
|
|
131
|
+
if (existing && existing.relPath !== normalizedPath) {
|
|
132
|
+
warnings.push({
|
|
133
|
+
kind: "title-collision",
|
|
134
|
+
message: `A note titled "${titleFromBasename}" already exists at ${existing.relPath} — proceeding will create a same-titled file at a different path`,
|
|
135
|
+
suggestion: existing.relPath
|
|
136
|
+
});
|
|
137
|
+
collision = { kind: "title-exists-elsewhere", existing_path: existing.relPath };
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return {
|
|
142
|
+
ok: errors.length === 0,
|
|
143
|
+
proposed_path: normalizedPath,
|
|
144
|
+
mode,
|
|
145
|
+
errors,
|
|
146
|
+
warnings,
|
|
147
|
+
yaml: yamlReport,
|
|
148
|
+
wikilinks,
|
|
149
|
+
tags,
|
|
150
|
+
collision
|
|
151
|
+
};
|
|
152
|
+
}
|
|
153
|
+
export async function lintWiki(vault, args) {
|
|
154
|
+
await vault.ensureExists();
|
|
155
|
+
const stubThreshold = args.stub_word_threshold ?? 100;
|
|
156
|
+
const staleDays = args.stale_days ?? 365;
|
|
157
|
+
const conceptMinMentions = args.concept_min_mentions ?? 3;
|
|
158
|
+
const cap = args.max_per_bucket ?? 50;
|
|
159
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
160
|
+
const allEntries = await vault.listMarkdown();
|
|
161
|
+
const staleMs = Date.now() - staleDays * 24 * 3600 * 1000;
|
|
162
|
+
// Single pass: collect inbound counts, outbound presence, broken links,
|
|
163
|
+
// word counts, last-reviewed times, capitalised-phrase mentions.
|
|
164
|
+
const inbound = new Map();
|
|
165
|
+
const outboundPresence = new Set();
|
|
166
|
+
const broken = [];
|
|
167
|
+
const stubs = [];
|
|
168
|
+
const stale = [];
|
|
169
|
+
const titleSet = new Set();
|
|
170
|
+
for (const e of allEntries)
|
|
171
|
+
titleSet.add(stripMd(e.basename).toLowerCase());
|
|
172
|
+
// Capitalised-phrase mentions across the whole vault. A phrase is 1-3
|
|
173
|
+
// CapitalCase tokens (e.g. "Reinforcement Learning", "Attention Heads").
|
|
174
|
+
// Stop-words: dropped when they appear at the start of a phrase.
|
|
175
|
+
const conceptStopwords = new Set([
|
|
176
|
+
"The",
|
|
177
|
+
"A",
|
|
178
|
+
"An",
|
|
179
|
+
"This",
|
|
180
|
+
"That",
|
|
181
|
+
"These",
|
|
182
|
+
"Those",
|
|
183
|
+
"If",
|
|
184
|
+
"When",
|
|
185
|
+
"While",
|
|
186
|
+
"But",
|
|
187
|
+
"And",
|
|
188
|
+
"Or"
|
|
189
|
+
]);
|
|
190
|
+
const capPhraseRe = /\b((?:[A-Z][a-z][a-z]+(?:\s+[A-Z][a-z][a-z]+){0,2}))\b/g;
|
|
191
|
+
const conceptMentions = new Map(); // phrase → set of source paths
|
|
192
|
+
for (const e of entries) {
|
|
193
|
+
const { parsed, mtimeMs } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
194
|
+
// Outbound + broken pass.
|
|
195
|
+
if (parsed.wikilinks.length > 0)
|
|
196
|
+
outboundPresence.add(e.relPath);
|
|
197
|
+
for (const link of parsed.wikilinks) {
|
|
198
|
+
const m = findBestMatch(allEntries, link.target, e.relPath);
|
|
199
|
+
if (m) {
|
|
200
|
+
inbound.set(m.relPath, (inbound.get(m.relPath) ?? 0) + 1);
|
|
201
|
+
}
|
|
202
|
+
else if (broken.length < cap) {
|
|
203
|
+
broken.push({
|
|
204
|
+
kind: "broken-link",
|
|
205
|
+
path: e.relPath,
|
|
206
|
+
message: `[[${link.target}]] in ${e.relPath} doesn't resolve`,
|
|
207
|
+
suggestion: "create the missing note, fix the link, or remove it",
|
|
208
|
+
details: { target: link.target, raw: link.raw }
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
// Stub pass.
|
|
213
|
+
const wordCount = parsed.body.trim() ? parsed.body.trim().split(/\s+/).length : 0;
|
|
214
|
+
if (wordCount < stubThreshold && stubs.length < cap) {
|
|
215
|
+
stubs.push({
|
|
216
|
+
kind: "stub",
|
|
217
|
+
path: e.relPath,
|
|
218
|
+
message: `${e.relPath} is ${wordCount} words (threshold ${stubThreshold})`,
|
|
219
|
+
suggestion: "develop, merge into a hub, or archive",
|
|
220
|
+
details: { word_count: wordCount, mtime: new Date(mtimeMs).toISOString() }
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
// Stale pass — frontmatter `last_reviewed` overrides mtime if present.
|
|
224
|
+
// gray-matter (js-yaml) parses ISO dates into Date objects automatically,
|
|
225
|
+
// so we accept Date | string | number.
|
|
226
|
+
const lastReviewedRaw = parsed.frontmatter?.last_reviewed ?? parsed.frontmatter?.["last-reviewed"];
|
|
227
|
+
let lastTouchedMs = mtimeMs;
|
|
228
|
+
if (lastReviewedRaw instanceof Date) {
|
|
229
|
+
const t = lastReviewedRaw.getTime();
|
|
230
|
+
if (Number.isFinite(t))
|
|
231
|
+
lastTouchedMs = t;
|
|
232
|
+
}
|
|
233
|
+
else if (typeof lastReviewedRaw === "string") {
|
|
234
|
+
const t = Date.parse(lastReviewedRaw);
|
|
235
|
+
if (Number.isFinite(t))
|
|
236
|
+
lastTouchedMs = t;
|
|
237
|
+
}
|
|
238
|
+
else if (typeof lastReviewedRaw === "number" && Number.isFinite(lastReviewedRaw)) {
|
|
239
|
+
lastTouchedMs = lastReviewedRaw;
|
|
240
|
+
}
|
|
241
|
+
if (lastTouchedMs < staleMs && stale.length < cap) {
|
|
242
|
+
stale.push({
|
|
243
|
+
kind: "stale",
|
|
244
|
+
path: e.relPath,
|
|
245
|
+
message: `${e.relPath} not touched since ${new Date(lastTouchedMs).toISOString().slice(0, 10)}`,
|
|
246
|
+
suggestion: "review for accuracy or archive",
|
|
247
|
+
details: {
|
|
248
|
+
last_touched: new Date(lastTouchedMs).toISOString(),
|
|
249
|
+
source: lastReviewedRaw !== undefined ? "frontmatter.last_reviewed" : "mtime"
|
|
250
|
+
}
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
// Concept-mention pass — capitalised phrases in the body that aren't
|
|
254
|
+
// already a wikilink target. Cap at 30 unique phrases per source to
|
|
255
|
+
// bound memory, but loose enough that real concepts in long notes don't
|
|
256
|
+
// get truncated.
|
|
257
|
+
const seenInThisNote = new Set();
|
|
258
|
+
for (const m of parsed.body.matchAll(capPhraseRe)) {
|
|
259
|
+
const phrase = m[1];
|
|
260
|
+
if (!phrase)
|
|
261
|
+
continue;
|
|
262
|
+
const firstWord = phrase.split(/\s+/)[0];
|
|
263
|
+
if (firstWord !== undefined && conceptStopwords.has(firstWord))
|
|
264
|
+
continue;
|
|
265
|
+
if (seenInThisNote.has(phrase))
|
|
266
|
+
continue;
|
|
267
|
+
if (seenInThisNote.size >= 30)
|
|
268
|
+
break;
|
|
269
|
+
// Skip phrases that are already a vault note (basename match).
|
|
270
|
+
if (titleSet.has(phrase.toLowerCase()))
|
|
271
|
+
continue;
|
|
272
|
+
seenInThisNote.add(phrase);
|
|
273
|
+
const set = conceptMentions.get(phrase) ?? new Set();
|
|
274
|
+
set.add(e.relPath);
|
|
275
|
+
conceptMentions.set(phrase, set);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
// Orphan findings (no inbound AND no outbound).
|
|
279
|
+
const orphans = [];
|
|
280
|
+
for (const e of entries) {
|
|
281
|
+
if (orphans.length >= cap)
|
|
282
|
+
break;
|
|
283
|
+
if (!inbound.get(e.relPath) && !outboundPresence.has(e.relPath)) {
|
|
284
|
+
orphans.push({
|
|
285
|
+
kind: "orphan",
|
|
286
|
+
path: e.relPath,
|
|
287
|
+
message: `${e.relPath} has no inbound or outbound wikilinks`,
|
|
288
|
+
suggestion: "link from a hub note, archive, or delete",
|
|
289
|
+
details: { mtime: new Date(e.mtimeMs).toISOString() }
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
// Concept candidates — phrases mentioned by ≥ N distinct notes.
|
|
294
|
+
const conceptCandidates = [];
|
|
295
|
+
const ranked = [...conceptMentions.entries()]
|
|
296
|
+
.filter(([, sources]) => sources.size >= conceptMinMentions)
|
|
297
|
+
.sort((a, b) => b[1].size - a[1].size);
|
|
298
|
+
for (const [phrase, sources] of ranked) {
|
|
299
|
+
if (conceptCandidates.length >= cap)
|
|
300
|
+
break;
|
|
301
|
+
conceptCandidates.push({
|
|
302
|
+
kind: "concept-without-page",
|
|
303
|
+
message: `"${phrase}" is mentioned by ${sources.size} notes but has no page of its own`,
|
|
304
|
+
suggestion: `create a page \`${phrase}.md\` and refile the most-developed mentions into it`,
|
|
305
|
+
details: { phrase, mention_count: sources.size, sources: [...sources].slice(0, 5) }
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
return {
|
|
309
|
+
scope: args.folder ?? "(whole vault)",
|
|
310
|
+
scanned: entries.length,
|
|
311
|
+
generated_at: new Date().toISOString(),
|
|
312
|
+
summary: {
|
|
313
|
+
orphans: orphans.length,
|
|
314
|
+
broken_links: broken.length,
|
|
315
|
+
stubs: stubs.length,
|
|
316
|
+
stale: stale.length,
|
|
317
|
+
concept_candidates: conceptCandidates.length
|
|
318
|
+
},
|
|
319
|
+
findings: {
|
|
320
|
+
orphans,
|
|
321
|
+
broken_links: broken,
|
|
322
|
+
stubs,
|
|
323
|
+
stale,
|
|
324
|
+
concept_candidates: conceptCandidates
|
|
325
|
+
}
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
export async function getOpenQuestions(vault, args) {
|
|
329
|
+
await vault.ensureExists();
|
|
330
|
+
const limit = args.limit ?? 100;
|
|
331
|
+
// Default pattern: "Open question:" / "Open question -" / "Q:" / "TODO?" / "??"
|
|
332
|
+
// followed by space + question text. Anchored at line start (with optional
|
|
333
|
+
// list-bullet / quote / heading prefix).
|
|
334
|
+
// Default pattern matches deferred-thinking markers at line start (with
|
|
335
|
+
// optional list-bullet / quote / heading prefix). Single-line `i` flag —
|
|
336
|
+
// we apply it line-by-line below.
|
|
337
|
+
const defaultPat = "^\\s*(?:[#\\->\\*\\d\\.]+\\s+)?(?:open\\s+question|q|todo\\?|\\?\\?)\\s*[:\\-]?\\s*(.+)$";
|
|
338
|
+
const re = new RegExp(args.pattern ?? defaultPat, "i");
|
|
339
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
340
|
+
const out = [];
|
|
341
|
+
const now = Date.now();
|
|
342
|
+
for (const e of entries) {
|
|
343
|
+
if (out.length >= limit)
|
|
344
|
+
break;
|
|
345
|
+
const { parsed, mtimeMs } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
346
|
+
// Scan parsed.body so frontmatter lines (which can contain "Q:" -ish
|
|
347
|
+
// tokens) don't pollute results.
|
|
348
|
+
const lines = parsed.body.split("\n");
|
|
349
|
+
let currentHeading = null;
|
|
350
|
+
for (let i = 0; i < lines.length; i++) {
|
|
351
|
+
const line = lines[i] ?? "";
|
|
352
|
+
const headingMatch = /^(#{1,6})\s+(.+?)\s*#*\s*$/.exec(line);
|
|
353
|
+
if (headingMatch?.[2]) {
|
|
354
|
+
currentHeading = headingMatch[2];
|
|
355
|
+
// A heading line itself isn't a question hit — skip the regex match.
|
|
356
|
+
continue;
|
|
357
|
+
}
|
|
358
|
+
const m = re.exec(line);
|
|
359
|
+
if (!m?.[1])
|
|
360
|
+
continue;
|
|
361
|
+
out.push({
|
|
362
|
+
question: m[1].trim(),
|
|
363
|
+
source_path: e.relPath,
|
|
364
|
+
source_title: stripMd(e.basename),
|
|
365
|
+
context_heading: currentHeading,
|
|
366
|
+
line: i + 1,
|
|
367
|
+
age_days: Math.round((now - mtimeMs) / (24 * 3600 * 1000)),
|
|
368
|
+
mtime: new Date(mtimeMs).toISOString()
|
|
369
|
+
});
|
|
370
|
+
if (out.length >= limit)
|
|
371
|
+
break;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
// Sort oldest-first so things aging out surface first.
|
|
375
|
+
out.sort((a, b) => b.age_days - a.age_days);
|
|
376
|
+
return out;
|
|
377
|
+
}
|
|
378
|
+
export async function paperAudit(vault, args) {
|
|
379
|
+
await vault.ensureExists();
|
|
380
|
+
const tag = (args.tag ?? "paper").replace(/^#+/, "").toLowerCase();
|
|
381
|
+
const limit = args.limit ?? 100;
|
|
382
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
383
|
+
const arxivRe = /\barxiv[:\s]*([0-9]{4}\.[0-9]{4,5}(?:v\d+)?)\b/gi;
|
|
384
|
+
const doiRe = /\bdoi[:\s]*(10\.\d{4,9}\/[\w\-._;()/:]+)/gi;
|
|
385
|
+
const urlRe = /\bhttps?:\/\/[^\s<>")\]]+/g;
|
|
386
|
+
let scanned = 0;
|
|
387
|
+
const flagged = [];
|
|
388
|
+
for (const e of entries) {
|
|
389
|
+
if (flagged.length >= limit)
|
|
390
|
+
break;
|
|
391
|
+
const { parsed } = await vault.readNote(e.absPath, e.mtimeMs);
|
|
392
|
+
const tagsLower = parsed.tags.map((t) => t.toLowerCase());
|
|
393
|
+
if (!tagsLower.includes(tag))
|
|
394
|
+
continue;
|
|
395
|
+
scanned += 1;
|
|
396
|
+
const fm = parsed.frontmatter ?? {};
|
|
397
|
+
const fmKeys = new Set(Object.keys(fm).map((k) => k.toLowerCase()));
|
|
398
|
+
const hasFmCitation = fmKeys.has("arxiv") || fmKeys.has("doi") || fmKeys.has("url") || fmKeys.has("isbn");
|
|
399
|
+
// Scan parsed.body so the frontmatter's own arxiv/doi keys don't get
|
|
400
|
+
// re-detected as "found in body".
|
|
401
|
+
const body = parsed.body;
|
|
402
|
+
const arxivIds = [...body.matchAll(arxivRe)].map((m) => m[1]).filter((v) => !!v);
|
|
403
|
+
const doiIds = [...body.matchAll(doiRe)].map((m) => m[1]).filter((v) => !!v);
|
|
404
|
+
const urls = [...body.matchAll(urlRe)].map((m) => m[0]);
|
|
405
|
+
const foundInBody = {
|
|
406
|
+
arxiv: [...new Set(arxivIds)],
|
|
407
|
+
doi: [...new Set(doiIds)],
|
|
408
|
+
url: [...new Set(urls)].slice(0, 3)
|
|
409
|
+
};
|
|
410
|
+
const bodyHasAnyId = foundInBody.arxiv.length > 0 || foundInBody.doi.length > 0 || foundInBody.url.length > 0;
|
|
411
|
+
// Clean ⇒ has a frontmatter citation. The body might cite OTHER papers,
|
|
412
|
+
// but this note itself is properly identified.
|
|
413
|
+
if (hasFmCitation)
|
|
414
|
+
continue;
|
|
415
|
+
let proposed = null;
|
|
416
|
+
if (bodyHasAnyId) {
|
|
417
|
+
proposed = {};
|
|
418
|
+
if (foundInBody.arxiv[0])
|
|
419
|
+
proposed.arxiv = foundInBody.arxiv[0];
|
|
420
|
+
if (foundInBody.doi[0])
|
|
421
|
+
proposed.doi = foundInBody.doi[0];
|
|
422
|
+
if (foundInBody.url[0] && !proposed.arxiv && !proposed.doi)
|
|
423
|
+
proposed.url = foundInBody.url[0];
|
|
424
|
+
}
|
|
425
|
+
const msg = bodyHasAnyId
|
|
426
|
+
? `${e.relPath} has identifiers in body (${[
|
|
427
|
+
...foundInBody.arxiv.map((v) => `arxiv:${v}`),
|
|
428
|
+
...foundInBody.doi.map((v) => `doi:${v}`)
|
|
429
|
+
]
|
|
430
|
+
.slice(0, 2)
|
|
431
|
+
.join(", ")}) but missing frontmatter`
|
|
432
|
+
: `${e.relPath} has #${tag} but no arxiv/doi/url anywhere — citation missing`;
|
|
433
|
+
flagged.push({
|
|
434
|
+
path: e.relPath,
|
|
435
|
+
title: stripMd(e.basename),
|
|
436
|
+
has_frontmatter_citation: hasFmCitation,
|
|
437
|
+
found_in_body: foundInBody,
|
|
438
|
+
proposed_frontmatter_patch: proposed,
|
|
439
|
+
message: msg
|
|
440
|
+
});
|
|
441
|
+
}
|
|
442
|
+
return { scanned, flagged };
|
|
443
|
+
}
|
|
444
|
+
export async function findPath(vault, args) {
|
|
445
|
+
await vault.ensureExists();
|
|
446
|
+
const maxDepth = args.max_depth ?? 5;
|
|
447
|
+
const includeAlts = args.include_alternatives === true;
|
|
448
|
+
const followEmbeds = args.follow_embeds !== false;
|
|
449
|
+
const fromArgs = {};
|
|
450
|
+
if (args.from !== undefined)
|
|
451
|
+
fromArgs.path = args.from;
|
|
452
|
+
else if (args.from_title !== undefined)
|
|
453
|
+
fromArgs.title = args.from_title;
|
|
454
|
+
const fromEntry = await resolveTarget(vault, fromArgs);
|
|
455
|
+
const toArgs = {};
|
|
456
|
+
if (args.to !== undefined)
|
|
457
|
+
toArgs.path = args.to;
|
|
458
|
+
else if (args.to_title !== undefined)
|
|
459
|
+
toArgs.title = args.to_title;
|
|
460
|
+
const toEntry = await resolveTarget(vault, toArgs);
|
|
461
|
+
if (fromEntry.absPath === toEntry.absPath) {
|
|
462
|
+
return {
|
|
463
|
+
from: fromEntry.relPath,
|
|
464
|
+
to: toEntry.relPath,
|
|
465
|
+
found: true,
|
|
466
|
+
hops: 0,
|
|
467
|
+
path: [{ path: fromEntry.relPath, title: stripMd(fromEntry.basename), via: "" }]
|
|
468
|
+
};
|
|
469
|
+
}
|
|
470
|
+
const entries = await vault.listMarkdown();
|
|
471
|
+
// BFS layer-by-layer. visited tracks shortest-known-depth so we don't
|
|
472
|
+
// revisit at greater depths. We continue collecting at the depth where
|
|
473
|
+
// we first hit the target IF include_alternatives is set.
|
|
474
|
+
// v1.8.1 perf fix: build a relPath → entry map ONCE before the BFS loop.
|
|
475
|
+
// Pre-fix: entries.find((e) => e.relPath === node.rel) was O(N) per visited
|
|
476
|
+
// node, making the whole BFS O(N²) on large vaults.
|
|
477
|
+
const byRel = new Map();
|
|
478
|
+
for (const e of entries)
|
|
479
|
+
byRel.set(e.relPath, e);
|
|
480
|
+
const visited = new Set([fromEntry.relPath]);
|
|
481
|
+
let frontier = [
|
|
482
|
+
{ rel: fromEntry.relPath, trail: [{ path: fromEntry.relPath, title: stripMd(fromEntry.basename), via: "" }] }
|
|
483
|
+
];
|
|
484
|
+
const found = [];
|
|
485
|
+
let foundDepth = -1;
|
|
486
|
+
for (let depth = 0; depth < maxDepth && frontier.length > 0; depth++) {
|
|
487
|
+
const next = [];
|
|
488
|
+
for (const node of frontier) {
|
|
489
|
+
const entry = byRel.get(node.rel);
|
|
490
|
+
if (!entry)
|
|
491
|
+
continue;
|
|
492
|
+
const { parsed } = await vault.readNote(entry.absPath, entry.mtimeMs);
|
|
493
|
+
const links = followEmbeds ? [...parsed.wikilinks, ...parsed.embeds] : parsed.wikilinks;
|
|
494
|
+
for (const link of links) {
|
|
495
|
+
const m = findBestMatch(entries, link.target, entry.relPath);
|
|
496
|
+
if (!m)
|
|
497
|
+
continue;
|
|
498
|
+
if (visited.has(m.relPath) && m.absPath !== toEntry.absPath)
|
|
499
|
+
continue;
|
|
500
|
+
const newTrail = [...node.trail, { path: m.relPath, title: stripMd(m.basename), via: link.raw }];
|
|
501
|
+
if (m.absPath === toEntry.absPath) {
|
|
502
|
+
if (foundDepth === -1)
|
|
503
|
+
foundDepth = depth + 1;
|
|
504
|
+
if (foundDepth === depth + 1) {
|
|
505
|
+
found.push(newTrail);
|
|
506
|
+
if (!includeAlts) {
|
|
507
|
+
return {
|
|
508
|
+
from: fromEntry.relPath,
|
|
509
|
+
to: toEntry.relPath,
|
|
510
|
+
found: true,
|
|
511
|
+
hops: foundDepth,
|
|
512
|
+
path: newTrail
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
else {
|
|
518
|
+
visited.add(m.relPath);
|
|
519
|
+
next.push({ rel: m.relPath, trail: newTrail });
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
if (foundDepth !== -1 && depth + 1 === foundDepth)
|
|
524
|
+
break;
|
|
525
|
+
frontier = next;
|
|
526
|
+
}
|
|
527
|
+
if (found.length > 0) {
|
|
528
|
+
found.sort((a, b) => a.length - b.length || (a[0]?.path ?? "").localeCompare(b[0]?.path ?? ""));
|
|
529
|
+
const first = found[0];
|
|
530
|
+
if (!first) {
|
|
531
|
+
return { from: fromEntry.relPath, to: toEntry.relPath, found: false, hops: -1, path: [] };
|
|
532
|
+
}
|
|
533
|
+
const result = {
|
|
534
|
+
from: fromEntry.relPath,
|
|
535
|
+
to: toEntry.relPath,
|
|
536
|
+
found: true,
|
|
537
|
+
hops: foundDepth,
|
|
538
|
+
path: first
|
|
539
|
+
};
|
|
540
|
+
if (includeAlts)
|
|
541
|
+
result.alternatives = found.slice(0, 10);
|
|
542
|
+
return result;
|
|
543
|
+
}
|
|
544
|
+
return { from: fromEntry.relPath, to: toEntry.relPath, found: false, hops: -1, path: [] };
|
|
545
|
+
}
|
|
546
|
+
export async function openInUi(vault, args) {
|
|
547
|
+
await vault.ensureExists();
|
|
548
|
+
const target = await resolveTarget(vault, args);
|
|
549
|
+
// Vault name = leaf of the vault root path. obsidian:// matches by name OR
|
|
550
|
+
// by the file's absolute path; if the user opened the vault from a
|
|
551
|
+
// different name in Obsidian, the file argument still resolves correctly.
|
|
552
|
+
const vaultName = path.basename(vault.root);
|
|
553
|
+
const noteRel = stripMd(target.relPath);
|
|
554
|
+
const params = new URLSearchParams({ vault: vaultName, file: noteRel });
|
|
555
|
+
if (args.new_pane)
|
|
556
|
+
params.set("newpane", "true");
|
|
557
|
+
return {
|
|
558
|
+
uri: `obsidian://open?${params.toString()}`,
|
|
559
|
+
vault_name: vaultName,
|
|
560
|
+
path: target.relPath,
|
|
561
|
+
title: stripMd(target.basename)
|
|
562
|
+
};
|
|
563
|
+
}
|
|
564
|
+
export async function contextPack(vault, args, ctx) {
|
|
565
|
+
await vault.ensureExists();
|
|
566
|
+
if (!args.query?.trim())
|
|
567
|
+
throw new Error("context_pack: `query` is required");
|
|
568
|
+
const budget = args.budget_tokens ?? 4000;
|
|
569
|
+
const charBudget = budget * 4; // ~4 chars/token
|
|
570
|
+
const includeBacklinks = args.include_backlinks !== false;
|
|
571
|
+
const recentN = Math.max(0, args.recent_dailies ?? 0);
|
|
572
|
+
// 1) Hybrid retrieval — top-K notes
|
|
573
|
+
const search = await searchHybrid(vault, { query: args.query, folder: args.folder, limit: 10 }, { ftsIndex: ctx.ftsIndex, embedFile: ctx.embedFile });
|
|
574
|
+
const sections = [`# Context for: ${args.query}\n`];
|
|
575
|
+
const includedNotes = [];
|
|
576
|
+
let charsUsed = sections[0]?.length ?? 0;
|
|
577
|
+
let notesBytes = 0;
|
|
578
|
+
let backlinksBytes = 0;
|
|
579
|
+
let dailiesBytes = 0;
|
|
580
|
+
// 2) Pack note bodies until budget exhausted
|
|
581
|
+
sections.push("## Top notes");
|
|
582
|
+
for (const m of search.matches) {
|
|
583
|
+
if (charsUsed >= charBudget)
|
|
584
|
+
break;
|
|
585
|
+
try {
|
|
586
|
+
const note = await vault.readNote(vault.resolveInside(m.path), undefined);
|
|
587
|
+
const body = note.parsed.body.trim();
|
|
588
|
+
const headerLen = m.path.length + 5;
|
|
589
|
+
const remaining = charBudget - charsUsed;
|
|
590
|
+
// Truncate body to fit remaining budget for THIS note (~50% of remainder
|
|
591
|
+
// so we leave room for backlinks + dailies).
|
|
592
|
+
const noteCap = Math.min(body.length, Math.max(500, Math.floor(remaining * 0.5)));
|
|
593
|
+
const trimmed = body.length <= noteCap ? body : `${body.slice(0, noteCap)}\n\n[…truncated…]`;
|
|
594
|
+
const block = `### ${m.path}\n\n${trimmed}\n`;
|
|
595
|
+
sections.push(block);
|
|
596
|
+
charsUsed += block.length + headerLen;
|
|
597
|
+
notesBytes += block.length;
|
|
598
|
+
includedNotes.push(m.path);
|
|
599
|
+
}
|
|
600
|
+
catch {
|
|
601
|
+
// skip unreadable notes
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
// 3) 1-line backlink summaries for top-3
|
|
605
|
+
if (includeBacklinks && includedNotes.length > 0 && charsUsed < charBudget) {
|
|
606
|
+
sections.push("## Backlinks");
|
|
607
|
+
let backlinksAdded = 0;
|
|
608
|
+
for (const notePath of includedNotes.slice(0, 3)) {
|
|
609
|
+
if (charsUsed >= charBudget)
|
|
610
|
+
break;
|
|
611
|
+
try {
|
|
612
|
+
const links = await getBacklinks(vault, { path: notePath, limit: 5 });
|
|
613
|
+
if (links.length > 0) {
|
|
614
|
+
const block = `### → ${notePath}\n${links.map((l) => `- ${l.path} : ${(l.snippets[0] ?? "").slice(0, 80)}`).join("\n")}\n`;
|
|
615
|
+
sections.push(block);
|
|
616
|
+
charsUsed += block.length;
|
|
617
|
+
backlinksBytes += block.length;
|
|
618
|
+
backlinksAdded += links.length;
|
|
619
|
+
}
|
|
620
|
+
}
|
|
621
|
+
catch {
|
|
622
|
+
// skip
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
if (backlinksAdded === 0)
|
|
626
|
+
sections.pop(); // remove empty heading
|
|
627
|
+
}
|
|
628
|
+
// 4) Recent daily notes
|
|
629
|
+
if (recentN > 0 && charsUsed < charBudget) {
|
|
630
|
+
try {
|
|
631
|
+
const recent = await getRecentEdits(vault, { since_minutes: 60 * 24 * 7, limit: recentN, folder: args.folder });
|
|
632
|
+
const dailies = recent.filter((r) => /\d{4}-\d{2}-\d{2}/.test(r.path));
|
|
633
|
+
if (dailies.length > 0) {
|
|
634
|
+
sections.push(`## Recent (${dailies.length} dailies, last 7 days)`);
|
|
635
|
+
for (const d of dailies) {
|
|
636
|
+
if (charsUsed >= charBudget)
|
|
637
|
+
break;
|
|
638
|
+
const block = `- ${d.path} (${d.mtime})`;
|
|
639
|
+
sections.push(block);
|
|
640
|
+
charsUsed += block.length;
|
|
641
|
+
dailiesBytes += block.length;
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
catch {
|
|
646
|
+
// skip
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
const bundle = sections.join("\n");
|
|
650
|
+
return {
|
|
651
|
+
query: args.query,
|
|
652
|
+
bundle,
|
|
653
|
+
estimated_tokens: Math.ceil(bundle.length / 4),
|
|
654
|
+
budget_tokens: budget,
|
|
655
|
+
sections: { notes: notesBytes, backlinks: backlinksBytes, dailies: dailiesBytes },
|
|
656
|
+
included_notes: includedNotes
|
|
657
|
+
};
|
|
658
|
+
}
|
|
659
|
+
// ─── small set / string helpers shared by find_similar / get_note_neighbors ─
|
|
660
|
+
export function jaccard(a, b) {
|
|
661
|
+
if (a.size === 0 && b.size === 0)
|
|
662
|
+
return 0;
|
|
663
|
+
let inter = 0;
|
|
664
|
+
for (const x of a)
|
|
665
|
+
if (b.has(x))
|
|
666
|
+
inter += 1;
|
|
667
|
+
const union = a.size + b.size - inter;
|
|
668
|
+
return union === 0 ? 0 : inter / union;
|
|
669
|
+
}
|
|
670
|
+
export function intersectionSize(a, b) {
|
|
671
|
+
let n = 0;
|
|
672
|
+
for (const x of a)
|
|
673
|
+
if (b.has(x))
|
|
674
|
+
n += 1;
|
|
675
|
+
return n;
|
|
676
|
+
}
|
|
677
|
+
export function ngrams(s, n) {
|
|
678
|
+
const out = new Set();
|
|
679
|
+
if (s.length < n) {
|
|
680
|
+
if (s)
|
|
681
|
+
out.add(s);
|
|
682
|
+
return out;
|
|
683
|
+
}
|
|
684
|
+
for (let i = 0; i <= s.length - n; i++)
|
|
685
|
+
out.add(s.slice(i, i + n));
|
|
686
|
+
return out;
|
|
687
|
+
}
|
|
688
|
+
const entryIndexCache = new WeakMap();
|
|
689
|
+
export function indexFor(entries) {
|
|
690
|
+
const cached = entryIndexCache.get(entries);
|
|
691
|
+
if (cached)
|
|
692
|
+
return cached;
|
|
693
|
+
const byBasename = new Map();
|
|
694
|
+
const byRelPath = new Map();
|
|
695
|
+
for (const e of entries) {
|
|
696
|
+
const key = stripMd(e.basename).toLowerCase();
|
|
697
|
+
const slot = byBasename.get(key);
|
|
698
|
+
if (slot)
|
|
699
|
+
slot.push(e);
|
|
700
|
+
else
|
|
701
|
+
byBasename.set(key, [e]);
|
|
702
|
+
byRelPath.set(stripMd(e.relPath).toLowerCase(), e);
|
|
703
|
+
}
|
|
704
|
+
const idx = { byBasename, byRelPath };
|
|
705
|
+
entryIndexCache.set(entries, idx);
|
|
706
|
+
return idx;
|
|
707
|
+
}
|
|
708
|
+
export function findBestMatch(entries, target, fromNote) {
|
|
709
|
+
const idx = indexFor(entries);
|
|
710
|
+
if (target.startsWith("./") || target.startsWith("../") || target.includes("/../")) {
|
|
711
|
+
if (fromNote) {
|
|
712
|
+
const fromDir = path.dirname(fromNote);
|
|
713
|
+
const joined = path.posix.normalize(path.posix.join(fromDir.split(path.sep).join("/"), target));
|
|
714
|
+
const lower = stripMd(joined).toLowerCase();
|
|
715
|
+
const rel = idx.byRelPath.get(lower);
|
|
716
|
+
if (rel)
|
|
717
|
+
return rel;
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
const norm = stripMd(target).toLowerCase();
|
|
721
|
+
const exact = idx.byBasename.get(norm) ?? [];
|
|
722
|
+
if (exact.length === 1)
|
|
723
|
+
return exact[0] ?? null;
|
|
724
|
+
if (exact.length > 1 && fromNote) {
|
|
725
|
+
const fromDir = path.dirname(fromNote);
|
|
726
|
+
const sameDir = exact.find((e) => path.dirname(e.relPath) === fromDir);
|
|
727
|
+
if (sameDir)
|
|
728
|
+
return sameDir;
|
|
729
|
+
}
|
|
730
|
+
if (exact.length > 0)
|
|
731
|
+
return exact[0] ?? null;
|
|
732
|
+
if (target.includes("/")) {
|
|
733
|
+
const lower = stripMd(target).toLowerCase();
|
|
734
|
+
const path1 = idx.byRelPath.get(lower);
|
|
735
|
+
if (path1)
|
|
736
|
+
return path1;
|
|
737
|
+
// endsWith match — falls back to a scan, but only for path-qualified
|
|
738
|
+
// targets that don't exact-match (rare).
|
|
739
|
+
for (const e of entries) {
|
|
740
|
+
if (stripMd(e.relPath).toLowerCase().endsWith(`/${lower}`))
|
|
741
|
+
return e;
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
return null;
|
|
745
|
+
}
|
|
746
|
+
export function stripMd(name) {
|
|
747
|
+
return name.replace(/\.md$/i, "");
|
|
748
|
+
}
|
|
749
|
+
export function normalizeTag(t) {
|
|
750
|
+
return t.replace(/^#+/, "").toLowerCase();
|
|
751
|
+
}
|
|
752
|
+
//# sourceMappingURL=meta.js.map
|