@oomkapwn/enquire-mcp 2.0.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +187 -0
- package/README.md +13 -5
- package/dist/fts5.d.ts +11 -0
- package/dist/fts5.d.ts.map +1 -1
- package/dist/fts5.js +77 -11
- package/dist/fts5.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +406 -4
- package/dist/index.js.map +1 -1
- package/dist/tools.d.ts +128 -0
- package/dist/tools.d.ts.map +1 -1
- package/dist/tools.js +523 -67
- package/dist/tools.js.map +1 -1
- package/docs/api.md +1 -1
- package/package.json +1 -1
package/dist/tools.js
CHANGED
|
@@ -78,7 +78,7 @@ function extractHeadings(body) {
|
|
|
78
78
|
if (inFence)
|
|
79
79
|
continue;
|
|
80
80
|
const m = /^(#{1,6})\s+(.+?)\s*#*\s*$/.exec(line);
|
|
81
|
-
if (m
|
|
81
|
+
if (m?.[1] && m[2]) {
|
|
82
82
|
out.push({ level: m[1].length, text: m[2], line: i + 1 });
|
|
83
83
|
}
|
|
84
84
|
}
|
|
@@ -515,6 +515,245 @@ export async function renameNote(vault, args) {
|
|
|
515
515
|
total_links_rewritten: totalRewrites
|
|
516
516
|
};
|
|
517
517
|
}
|
|
518
|
+
const CHAT_HEADING_RE = /^### (user|assistant|system) · (.+?)\s*$/;
|
|
519
|
+
// Multi-line flag: `## Chat:` heading can appear anywhere in the body, not
|
|
520
|
+
// only at string start. The append codepath uses .test(body); the read
|
|
521
|
+
// codepath uses .exec(line) per-line so the flag is harmless there.
|
|
522
|
+
const CHAT_THREAD_TITLE_RE = /^## Chat: (.+?)\s*$/m;
|
|
523
|
+
/** Append a message to a note's chat thread. Creates the note (and the
|
|
524
|
+
* `## Chat: <title>` heading) if absent. Idempotent in the sense that
|
|
525
|
+
* appending always creates a fresh `### <role> · <timestamp>` block — no
|
|
526
|
+
* silent overwrites. */
|
|
527
|
+
export async function chatThreadAppend(vault, args) {
|
|
528
|
+
await vault.ensureExists();
|
|
529
|
+
if (!args.note_path?.trim())
|
|
530
|
+
throw new Error("chat_thread_append: `note_path` is required");
|
|
531
|
+
if (!args.content?.trim())
|
|
532
|
+
throw new Error("chat_thread_append: `content` is required");
|
|
533
|
+
const role = args.role;
|
|
534
|
+
if (role !== "user" && role !== "assistant" && role !== "system") {
|
|
535
|
+
throw new Error(`chat_thread_append: invalid role "${role}" (must be user|assistant|system)`);
|
|
536
|
+
}
|
|
537
|
+
const targetRel = args.note_path.toLowerCase().endsWith(".md") ? args.note_path : `${args.note_path}.md`;
|
|
538
|
+
const abs = vault.resolveInside(targetRel);
|
|
539
|
+
const timestamp = new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
540
|
+
const messageBlock = `\n### ${role} · ${timestamp}\n\n${args.content.trim()}\n`;
|
|
541
|
+
// Read existing or create new with thread heading.
|
|
542
|
+
let existed = true;
|
|
543
|
+
let body = "";
|
|
544
|
+
try {
|
|
545
|
+
body = await vault.readFile(abs);
|
|
546
|
+
}
|
|
547
|
+
catch {
|
|
548
|
+
existed = false;
|
|
549
|
+
}
|
|
550
|
+
let toAppend;
|
|
551
|
+
if (existed && CHAT_THREAD_TITLE_RE.test(body)) {
|
|
552
|
+
// Existing thread — just append message.
|
|
553
|
+
toAppend = messageBlock;
|
|
554
|
+
}
|
|
555
|
+
else if (existed) {
|
|
556
|
+
// Existing note without a chat heading — add heading first.
|
|
557
|
+
const title = args.thread_title?.trim() || `chat — ${timestamp.slice(0, 10)}`;
|
|
558
|
+
toAppend = `\n\n## Chat: ${title}\n${messageBlock}`;
|
|
559
|
+
}
|
|
560
|
+
else {
|
|
561
|
+
// New note from scratch.
|
|
562
|
+
const title = args.thread_title?.trim() || `chat — ${timestamp.slice(0, 10)}`;
|
|
563
|
+
const initial = `# ${title}\n\n## Chat: ${title}\n${messageBlock}`;
|
|
564
|
+
const result = await vault.writeNote(targetRel, initial, { overwrite: false });
|
|
565
|
+
return {
|
|
566
|
+
note_path: result.relPath,
|
|
567
|
+
line_start: 4,
|
|
568
|
+
line_end: 4 + messageBlock.split("\n").length
|
|
569
|
+
};
|
|
570
|
+
}
|
|
571
|
+
const before = body.length;
|
|
572
|
+
const newBody = body.replace(/\n+$/, "") + toAppend;
|
|
573
|
+
await vault.writeNote(targetRel, newBody, { overwrite: true });
|
|
574
|
+
const lineStart = (body.slice(0, before).match(/\n/g) ?? []).length + 1;
|
|
575
|
+
return {
|
|
576
|
+
note_path: vault.toRel(abs),
|
|
577
|
+
line_start: lineStart,
|
|
578
|
+
line_end: lineStart + toAppend.split("\n").length
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
/** Parse a note's chat thread into structured messages. Non-chat content
|
|
582
|
+
* (anything outside the `## Chat: <title>` block) is ignored. */
|
|
583
|
+
export async function chatThreadRead(vault, args) {
|
|
584
|
+
await vault.ensureExists();
|
|
585
|
+
const targetRel = args.note_path.toLowerCase().endsWith(".md") ? args.note_path : `${args.note_path}.md`;
|
|
586
|
+
const abs = vault.resolveInside(targetRel);
|
|
587
|
+
const body = await vault.readFile(abs);
|
|
588
|
+
const lines = body.split("\n");
|
|
589
|
+
let threadTitle = null;
|
|
590
|
+
let inThread = false;
|
|
591
|
+
const messages = [];
|
|
592
|
+
let current = null;
|
|
593
|
+
for (let i = 0; i < lines.length; i++) {
|
|
594
|
+
const ln = lines[i] ?? "";
|
|
595
|
+
const titleMatch = CHAT_THREAD_TITLE_RE.exec(ln);
|
|
596
|
+
if (titleMatch) {
|
|
597
|
+
if (current) {
|
|
598
|
+
messages.push({
|
|
599
|
+
role: current.role,
|
|
600
|
+
timestamp: current.timestamp,
|
|
601
|
+
content: current.lines.join("\n").trim(),
|
|
602
|
+
line_start: current.line_start,
|
|
603
|
+
line_end: i
|
|
604
|
+
});
|
|
605
|
+
current = null;
|
|
606
|
+
}
|
|
607
|
+
threadTitle = (titleMatch[1] ?? "").trim();
|
|
608
|
+
inThread = true;
|
|
609
|
+
continue;
|
|
610
|
+
}
|
|
611
|
+
if (!inThread)
|
|
612
|
+
continue;
|
|
613
|
+
// Higher-level heading or a different `## Chat:` block ends the thread.
|
|
614
|
+
if (/^# /.test(ln) || (/^## /.test(ln) && !CHAT_THREAD_TITLE_RE.test(ln))) {
|
|
615
|
+
if (current) {
|
|
616
|
+
messages.push({
|
|
617
|
+
role: current.role,
|
|
618
|
+
timestamp: current.timestamp,
|
|
619
|
+
content: current.lines.join("\n").trim(),
|
|
620
|
+
line_start: current.line_start,
|
|
621
|
+
line_end: i
|
|
622
|
+
});
|
|
623
|
+
current = null;
|
|
624
|
+
}
|
|
625
|
+
inThread = false;
|
|
626
|
+
continue;
|
|
627
|
+
}
|
|
628
|
+
const headingMatch = CHAT_HEADING_RE.exec(ln);
|
|
629
|
+
if (headingMatch?.[1] && headingMatch[2]) {
|
|
630
|
+
if (current) {
|
|
631
|
+
messages.push({
|
|
632
|
+
role: current.role,
|
|
633
|
+
timestamp: current.timestamp,
|
|
634
|
+
content: current.lines.join("\n").trim(),
|
|
635
|
+
line_start: current.line_start,
|
|
636
|
+
line_end: i
|
|
637
|
+
});
|
|
638
|
+
}
|
|
639
|
+
current = {
|
|
640
|
+
role: headingMatch[1],
|
|
641
|
+
timestamp: headingMatch[2].trim(),
|
|
642
|
+
line_start: i + 1,
|
|
643
|
+
lines: []
|
|
644
|
+
};
|
|
645
|
+
continue;
|
|
646
|
+
}
|
|
647
|
+
if (current)
|
|
648
|
+
current.lines.push(ln);
|
|
649
|
+
}
|
|
650
|
+
if (current) {
|
|
651
|
+
messages.push({
|
|
652
|
+
role: current.role,
|
|
653
|
+
timestamp: current.timestamp,
|
|
654
|
+
content: current.lines.join("\n").trim(),
|
|
655
|
+
line_start: current.line_start,
|
|
656
|
+
line_end: lines.length
|
|
657
|
+
});
|
|
658
|
+
}
|
|
659
|
+
return {
|
|
660
|
+
note_path: vault.toRel(abs),
|
|
661
|
+
thread_title: threadTitle,
|
|
662
|
+
messages,
|
|
663
|
+
message_count: messages.length
|
|
664
|
+
};
|
|
665
|
+
}
|
|
666
|
+
// ─── obsidian_frontmatter_{get,set,search} (v2.3.0 — atomic YAML ops) ──────
|
|
667
|
+
// Surgical YAML manipulation. Pre-fix, agents wanting to set `status:
|
|
668
|
+
// published` on 12 notes had to find/replace text — error-prone (multi-line
|
|
669
|
+
// strings, special chars, key-collision). Now: parse via gray-matter, edit,
|
|
670
|
+
// rewrite. Code-fence-aware via gray-matter (frontmatter is delimited
|
|
671
|
+
// strictly by leading `---`, so no fence ambiguity).
|
|
672
|
+
//
|
|
673
|
+
// _get is read-only; _set + _delete are write-gated.
|
|
674
|
+
export async function frontmatterGet(vault, args) {
|
|
675
|
+
await vault.ensureExists();
|
|
676
|
+
const target = await resolveTarget(vault, args);
|
|
677
|
+
const note = await vault.readNote(target.absPath, target.mtimeMs);
|
|
678
|
+
if (args.key) {
|
|
679
|
+
return {
|
|
680
|
+
path: target.relPath,
|
|
681
|
+
frontmatter: note.parsed.frontmatter,
|
|
682
|
+
value: note.parsed.frontmatter[args.key]
|
|
683
|
+
};
|
|
684
|
+
}
|
|
685
|
+
return { path: target.relPath, frontmatter: note.parsed.frontmatter };
|
|
686
|
+
}
|
|
687
|
+
export async function frontmatterSet(vault, args) {
|
|
688
|
+
await vault.ensureExists();
|
|
689
|
+
if (!args.set || Object.keys(args.set).length === 0) {
|
|
690
|
+
throw new Error("frontmatter_set: `set` must be a non-empty object");
|
|
691
|
+
}
|
|
692
|
+
const target = await resolveTarget(vault, args);
|
|
693
|
+
const note = await vault.readNote(target.absPath, target.mtimeMs);
|
|
694
|
+
const before = { ...note.parsed.frontmatter };
|
|
695
|
+
const after = { ...before };
|
|
696
|
+
const changed = [];
|
|
697
|
+
for (const [k, v] of Object.entries(args.set)) {
|
|
698
|
+
if (v === null) {
|
|
699
|
+
if (k in after) {
|
|
700
|
+
delete after[k];
|
|
701
|
+
changed.push(`-${k}`);
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
else {
|
|
705
|
+
const prev = after[k];
|
|
706
|
+
if (JSON.stringify(prev) !== JSON.stringify(v)) {
|
|
707
|
+
after[k] = v;
|
|
708
|
+
changed.push(`${k in before ? "~" : "+"}${k}`);
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
if (changed.length === 0 || args.dry_run === true) {
|
|
713
|
+
return { path: target.relPath, changed_keys: changed, before, after, dry_run: args.dry_run === true };
|
|
714
|
+
}
|
|
715
|
+
// Round-trip via gray-matter — same writer pattern as createNote.
|
|
716
|
+
const newDoc = matter.stringify(note.parsed.body, after);
|
|
717
|
+
await vault.writeNote(target.relPath, newDoc, { overwrite: true });
|
|
718
|
+
return { path: target.relPath, changed_keys: changed, before, after, dry_run: false };
|
|
719
|
+
}
|
|
720
|
+
export async function frontmatterSearch(vault, args) {
|
|
721
|
+
await vault.ensureExists();
|
|
722
|
+
if (!args.key)
|
|
723
|
+
throw new Error("frontmatter_search: `key` is required");
|
|
724
|
+
const predicates = [args.equals !== undefined, args.exists !== undefined, args.contains !== undefined].filter(Boolean);
|
|
725
|
+
if (predicates.length !== 1) {
|
|
726
|
+
throw new Error("frontmatter_search: exactly one of `equals` / `exists` / `contains` must be set");
|
|
727
|
+
}
|
|
728
|
+
const limit = args.limit ?? 100;
|
|
729
|
+
const entries = await vault.listMarkdown(args.folder);
|
|
730
|
+
const matches = [];
|
|
731
|
+
for (const e of entries) {
|
|
732
|
+
if (matches.length >= limit)
|
|
733
|
+
break;
|
|
734
|
+
try {
|
|
735
|
+
const note = await vault.readNote(e.absPath, e.mtimeMs);
|
|
736
|
+
const value = note.parsed.frontmatter[args.key];
|
|
737
|
+
let hit = false;
|
|
738
|
+
if (args.exists === true)
|
|
739
|
+
hit = value !== undefined;
|
|
740
|
+
else if (args.equals !== undefined)
|
|
741
|
+
hit = JSON.stringify(value) === JSON.stringify(args.equals);
|
|
742
|
+
else if (args.contains !== undefined) {
|
|
743
|
+
if (Array.isArray(value)) {
|
|
744
|
+
hit = value.some((v) => JSON.stringify(v) === JSON.stringify(args.contains));
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
if (hit) {
|
|
748
|
+
matches.push({ path: e.relPath, value, mtime: new Date(e.mtimeMs).toISOString() });
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
catch {
|
|
752
|
+
// skip unparseable notes
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
return { key: args.key, total_matches: matches.length, matches };
|
|
756
|
+
}
|
|
518
757
|
export async function archiveNote(vault, args) {
|
|
519
758
|
await vault.ensureExists();
|
|
520
759
|
if (!args.path)
|
|
@@ -1455,7 +1694,7 @@ export async function getOpenQuestions(vault, args) {
|
|
|
1455
1694
|
continue;
|
|
1456
1695
|
}
|
|
1457
1696
|
const m = re.exec(line);
|
|
1458
|
-
if (!m
|
|
1697
|
+
if (!m?.[1])
|
|
1459
1698
|
continue;
|
|
1460
1699
|
out.push({
|
|
1461
1700
|
question: m[1].trim(),
|
|
@@ -1896,16 +2135,42 @@ const STOP_WORDS = new Set([
|
|
|
1896
2135
|
"why",
|
|
1897
2136
|
"how"
|
|
1898
2137
|
]);
|
|
2138
|
+
// v2.1.0: detect Chinese / Japanese / Thai / Khmer / Lao via script ranges.
|
|
2139
|
+
// These languages don't use spaces between words, so the Unicode-regex
|
|
2140
|
+
// tokenizer falls back to character-level (or huge multi-word tokens),
|
|
2141
|
+
// which tanks BM25 + TF-IDF precision. Intl.Segmenter (Node 16+ ICU)
|
|
2142
|
+
// gives word-break per language. Detection is per-document, branching the
|
|
2143
|
+
// tokenizer.
|
|
2144
|
+
const CJK_OR_THAI_RANGES = /[-ヿ㐀-䶿一-鿿가--ༀ-ក-]/;
|
|
1899
2145
|
function tokenizeForTfidf(text) {
|
|
1900
2146
|
// v1.11.1: Unicode-aware tokenizer. The previous ASCII-only regex
|
|
1901
2147
|
// (`/[a-z0-9][a-z0-9_-]*/g`) silently dropped Cyrillic, Greek, CJK,
|
|
1902
|
-
// Hebrew, Arabic, and any non-Latin content from the TF-IDF index
|
|
1903
|
-
//
|
|
1904
|
-
//
|
|
1905
|
-
//
|
|
1906
|
-
//
|
|
2148
|
+
// Hebrew, Arabic, and any non-Latin content from the TF-IDF index.
|
|
2149
|
+
// `\p{L}` matches any Unicode letter; `\p{N}` matches any Unicode number.
|
|
2150
|
+
//
|
|
2151
|
+
// v2.1.0: when the text contains CJK / Thai / Khmer / Lao chars (no-
|
|
2152
|
+
// whitespace scripts), use Intl.Segmenter for proper word-break first,
|
|
2153
|
+
// then run the Unicode regex per-segment. This produces real word tokens
|
|
2154
|
+
// instead of "認可サーバーがアクセストークン" as a single 12-char token
|
|
2155
|
+
// that the length filter would drop.
|
|
1907
2156
|
const lower = text.toLowerCase();
|
|
1908
2157
|
const out = [];
|
|
2158
|
+
if (CJK_OR_THAI_RANGES.test(lower) && typeof Intl !== "undefined" && typeof Intl.Segmenter !== "undefined") {
|
|
2159
|
+
const segmenter = new Intl.Segmenter(undefined, { granularity: "word" });
|
|
2160
|
+
for (const seg of segmenter.segment(lower)) {
|
|
2161
|
+
if (!seg.isWordLike)
|
|
2162
|
+
continue;
|
|
2163
|
+
const t = seg.segment;
|
|
2164
|
+
if (t.length < 1)
|
|
2165
|
+
continue;
|
|
2166
|
+
if (t.length > 40)
|
|
2167
|
+
continue;
|
|
2168
|
+
if (STOP_WORDS.has(t))
|
|
2169
|
+
continue;
|
|
2170
|
+
out.push(t);
|
|
2171
|
+
}
|
|
2172
|
+
return out;
|
|
2173
|
+
}
|
|
1909
2174
|
for (const m of lower.matchAll(/[\p{L}\p{N}][\p{L}\p{N}_-]*/gu)) {
|
|
1910
2175
|
const t = m[0];
|
|
1911
2176
|
if (t.length < 2)
|
|
@@ -2115,6 +2380,7 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2115
2380
|
throw new Error("query must not be empty");
|
|
2116
2381
|
const limit = args.limit ?? 10;
|
|
2117
2382
|
const minSignals = args.min_signals ?? 1;
|
|
2383
|
+
const granularity = args.granularity ?? "note";
|
|
2118
2384
|
// Fan-out per-ranker top-K. Bigger than user's `limit` so RRF has room
|
|
2119
2385
|
// to surface a doc that's mid-rank in one signal but top in another.
|
|
2120
2386
|
const fanOutK = Math.max(50, limit * 5);
|
|
@@ -2133,35 +2399,52 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2133
2399
|
// Pre-fix, BM25 search returned excluded chunks via the hybrid pipeline.
|
|
2134
2400
|
const rawFtsHits = ctx.ftsIndex.search(args.query, { limit: fanOutK, folder: args.folder });
|
|
2135
2401
|
const ftsHits = rawFtsHits.filter((h) => !vault.isExcluded(h.rel_path));
|
|
2136
|
-
|
|
2137
|
-
|
|
2138
|
-
|
|
2139
|
-
|
|
2140
|
-
|
|
2141
|
-
|
|
2142
|
-
|
|
2143
|
-
|
|
2144
|
-
|
|
2145
|
-
|
|
2146
|
-
|
|
2147
|
-
|
|
2402
|
+
// v2.2.0: granularity branch.
|
|
2403
|
+
// "note" → collapse multi-chunk hits per note (best-rank wins),
|
|
2404
|
+
// RRF fuses on path key.
|
|
2405
|
+
// "block" → keep each chunk distinct, RRF fuses on `path#chunk_index`.
|
|
2406
|
+
if (granularity === "block") {
|
|
2407
|
+
bm25Ranked = ftsHits.map((h, i) => ({
|
|
2408
|
+
id: `${h.rel_path}#${h.chunk_index}`,
|
|
2409
|
+
rank: i + 1,
|
|
2410
|
+
score: h.score,
|
|
2411
|
+
snippet: h.snippet,
|
|
2412
|
+
chunk_index: h.chunk_index,
|
|
2413
|
+
line_start: h.line_start,
|
|
2414
|
+
line_end: h.line_end
|
|
2415
|
+
}));
|
|
2416
|
+
}
|
|
2417
|
+
else {
|
|
2418
|
+
const bestPerNote = new Map();
|
|
2419
|
+
ftsHits.forEach((h, i) => {
|
|
2420
|
+
const existing = bestPerNote.get(h.rel_path);
|
|
2421
|
+
if (!existing || i < existing.rank) {
|
|
2422
|
+
bestPerNote.set(h.rel_path, {
|
|
2423
|
+
score: h.score,
|
|
2424
|
+
rank: i + 1,
|
|
2425
|
+
snippet: h.snippet,
|
|
2426
|
+
chunk_index: h.chunk_index,
|
|
2427
|
+
line_start: h.line_start,
|
|
2428
|
+
line_end: h.line_end
|
|
2429
|
+
});
|
|
2430
|
+
}
|
|
2431
|
+
});
|
|
2432
|
+
bm25Ranked = Array.from(bestPerNote.entries()).map(([id, b]) => ({
|
|
2433
|
+
id,
|
|
2434
|
+
rank: b.rank,
|
|
2435
|
+
score: b.score,
|
|
2436
|
+
snippet: b.snippet,
|
|
2437
|
+
chunk_index: b.chunk_index,
|
|
2438
|
+
line_start: b.line_start,
|
|
2439
|
+
line_end: b.line_end
|
|
2440
|
+
}));
|
|
2441
|
+
// Re-sort to ensure 1-based ranks are consecutive after dedup.
|
|
2442
|
+
bm25Ranked.sort((a, b) => a.rank - b.rank);
|
|
2443
|
+
for (let i = 0; i < bm25Ranked.length; i++) {
|
|
2444
|
+
const hit = bm25Ranked[i];
|
|
2445
|
+
if (hit)
|
|
2446
|
+
hit.rank = i + 1;
|
|
2148
2447
|
}
|
|
2149
|
-
});
|
|
2150
|
-
bm25Ranked = Array.from(bestPerNote.entries()).map(([id, b]) => ({
|
|
2151
|
-
id,
|
|
2152
|
-
rank: b.rank,
|
|
2153
|
-
score: b.score,
|
|
2154
|
-
snippet: b.snippet,
|
|
2155
|
-
chunk_index: b.chunk_index,
|
|
2156
|
-
line_start: b.line_start,
|
|
2157
|
-
line_end: b.line_end
|
|
2158
|
-
}));
|
|
2159
|
-
// Re-sort to ensure 1-based ranks are consecutive after dedup.
|
|
2160
|
-
bm25Ranked.sort((a, b) => a.rank - b.rank);
|
|
2161
|
-
for (let i = 0; i < bm25Ranked.length; i++) {
|
|
2162
|
-
const hit = bm25Ranked[i];
|
|
2163
|
-
if (hit)
|
|
2164
|
-
hit.rank = i + 1;
|
|
2165
2448
|
}
|
|
2166
2449
|
if (bm25Ranked.length > 0)
|
|
2167
2450
|
signalsUsed.push("bm25");
|
|
@@ -2207,35 +2490,48 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2207
2490
|
// default which silently shrank the embedding-side candidate pool and
|
|
2208
2491
|
// starved RRF of cross-signal evidence.
|
|
2209
2492
|
const embed = await embeddingsSearch(vault, { query: args.query, folder: args.folder, limit: fanOutK, model: args.embedding_model, min_score: 0 }, ctx.embedFile);
|
|
2210
|
-
//
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2493
|
+
// v2.2.0: granularity branch — same shape as BM25 above.
|
|
2494
|
+
if (granularity === "block") {
|
|
2495
|
+
embedRanked = embed.matches.map((m, i) => ({
|
|
2496
|
+
id: `${m.path}#${m.chunk_index ?? 0}`,
|
|
2497
|
+
rank: i + 1,
|
|
2498
|
+
score: m.score,
|
|
2499
|
+
snippet: m.snippet,
|
|
2500
|
+
chunk_index: m.chunk_index,
|
|
2501
|
+
line_start: m.line_start,
|
|
2502
|
+
line_end: m.line_end
|
|
2503
|
+
}));
|
|
2504
|
+
}
|
|
2505
|
+
else {
|
|
2506
|
+
const bestPerNote = new Map();
|
|
2507
|
+
embed.matches.forEach((m, i) => {
|
|
2508
|
+
const existing = bestPerNote.get(m.path);
|
|
2509
|
+
if (!existing || i < existing.rank) {
|
|
2510
|
+
bestPerNote.set(m.path, {
|
|
2511
|
+
score: m.score,
|
|
2512
|
+
rank: i + 1,
|
|
2513
|
+
snippet: m.snippet,
|
|
2514
|
+
chunk_index: m.chunk_index,
|
|
2515
|
+
line_start: m.line_start,
|
|
2516
|
+
line_end: m.line_end
|
|
2517
|
+
});
|
|
2518
|
+
}
|
|
2519
|
+
});
|
|
2520
|
+
embedRanked = Array.from(bestPerNote.entries()).map(([id, b]) => ({
|
|
2521
|
+
id,
|
|
2522
|
+
rank: b.rank,
|
|
2523
|
+
score: b.score,
|
|
2524
|
+
snippet: b.snippet,
|
|
2525
|
+
chunk_index: b.chunk_index,
|
|
2526
|
+
line_start: b.line_start,
|
|
2527
|
+
line_end: b.line_end
|
|
2528
|
+
}));
|
|
2529
|
+
embedRanked.sort((a, b) => a.rank - b.rank);
|
|
2530
|
+
for (let i = 0; i < embedRanked.length; i++) {
|
|
2531
|
+
const hit = embedRanked[i];
|
|
2532
|
+
if (hit)
|
|
2533
|
+
hit.rank = i + 1;
|
|
2223
2534
|
}
|
|
2224
|
-
});
|
|
2225
|
-
embedRanked = Array.from(bestPerNote.entries()).map(([id, b]) => ({
|
|
2226
|
-
id,
|
|
2227
|
-
rank: b.rank,
|
|
2228
|
-
score: b.score,
|
|
2229
|
-
snippet: b.snippet,
|
|
2230
|
-
chunk_index: b.chunk_index,
|
|
2231
|
-
line_start: b.line_start,
|
|
2232
|
-
line_end: b.line_end
|
|
2233
|
-
}));
|
|
2234
|
-
embedRanked.sort((a, b) => a.rank - b.rank);
|
|
2235
|
-
for (let i = 0; i < embedRanked.length; i++) {
|
|
2236
|
-
const hit = embedRanked[i];
|
|
2237
|
-
if (hit)
|
|
2238
|
-
hit.rank = i + 1;
|
|
2239
2535
|
}
|
|
2240
2536
|
if (embedRanked.length > 0)
|
|
2241
2537
|
signalsUsed.push("embeddings");
|
|
@@ -2251,8 +2547,59 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2251
2547
|
bm25: bm25Ranked.map((h) => ({ id: h.id, rank: h.rank, score: h.score })),
|
|
2252
2548
|
tfidf: tfidfRanked.map((h) => ({ id: h.id, rank: h.rank, score: h.score })),
|
|
2253
2549
|
embeddings: embedRanked.map((h) => ({ id: h.id, rank: h.rank, score: h.score }))
|
|
2254
|
-
}, { topK: limit *
|
|
2550
|
+
}, { topK: Math.max(limit * 4, 30) } // overshoot — graph boost may rerank
|
|
2255
2551
|
);
|
|
2552
|
+
// ─── v2.3.0: Wikilink graph-boost ───────────────────────────────────────
|
|
2553
|
+
// Re-rank top-K by counting how many *other* top-K hits link to each one.
|
|
2554
|
+
// Equivalent to a 1-step personalised PageRank seeded by the fused top-K.
|
|
2555
|
+
// Boost is small (α=0.005) — enough to break ties but won't override
|
|
2556
|
+
// strong single-ranker signals. Requires no new index — uses already-
|
|
2557
|
+
// cached parsed wikilinks per note.
|
|
2558
|
+
// This is the "only enquire-mcp does this" feature: generic vector stores
|
|
2559
|
+
// can't do this without an Obsidian-aware layer; Smart Connections doesn't
|
|
2560
|
+
// do it either. Wikilinks ARE the differentiating Obsidian primitive.
|
|
2561
|
+
const graphBoost = args.graph_boost !== false; // default ON
|
|
2562
|
+
if (graphBoost && fused.length > 1) {
|
|
2563
|
+
const candidatePaths = new Set();
|
|
2564
|
+
for (const f of fused) {
|
|
2565
|
+
candidatePaths.add(f.id.includes("#") ? (f.id.split("#")[0] ?? f.id) : f.id);
|
|
2566
|
+
}
|
|
2567
|
+
const outLinks = new Map();
|
|
2568
|
+
for (const candidatePath of candidatePaths) {
|
|
2569
|
+
try {
|
|
2570
|
+
const note = await vault.readNote(vault.resolveInside(candidatePath));
|
|
2571
|
+
const targets = new Set();
|
|
2572
|
+
for (const wl of note.parsed.wikilinks) {
|
|
2573
|
+
if (!wl.target)
|
|
2574
|
+
continue;
|
|
2575
|
+
// Wikilinks can be by basename ("Foo") or relative path ("Sub/Foo").
|
|
2576
|
+
// Normalize both forms so the membership test catches either.
|
|
2577
|
+
targets.add(wl.target);
|
|
2578
|
+
targets.add(stripMd(wl.target));
|
|
2579
|
+
}
|
|
2580
|
+
outLinks.set(candidatePath, targets);
|
|
2581
|
+
}
|
|
2582
|
+
catch {
|
|
2583
|
+
// skip unreadable notes
|
|
2584
|
+
}
|
|
2585
|
+
}
|
|
2586
|
+
const ALPHA = 0.005;
|
|
2587
|
+
for (const f of fused) {
|
|
2588
|
+
const fPath = f.id.includes("#") ? (f.id.split("#")[0] ?? f.id) : f.id;
|
|
2589
|
+
const fBasename = stripMd(path.basename(fPath));
|
|
2590
|
+
let inDegree = 0;
|
|
2591
|
+
for (const [otherPath, targets] of outLinks) {
|
|
2592
|
+
if (otherPath === fPath)
|
|
2593
|
+
continue;
|
|
2594
|
+
if (targets.has(fPath) || targets.has(stripMd(fPath)) || targets.has(fBasename)) {
|
|
2595
|
+
inDegree += 1;
|
|
2596
|
+
}
|
|
2597
|
+
}
|
|
2598
|
+
if (inDegree > 0)
|
|
2599
|
+
f.score += ALPHA * inDegree;
|
|
2600
|
+
}
|
|
2601
|
+
fused.sort((a, b) => b.score - a.score);
|
|
2602
|
+
}
|
|
2256
2603
|
// Build snippet/chunk lookup tables for attaching the best evidence per
|
|
2257
2604
|
// note in the final response.
|
|
2258
2605
|
const bm25Map = new Map(bm25Ranked.map((h) => [h.id, h]));
|
|
@@ -2281,12 +2628,26 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2281
2628
|
if (f.per_signal.embeddings) {
|
|
2282
2629
|
perSignal.embeddings = { rank: f.per_signal.embeddings.rank, score: f.per_signal.embeddings.score };
|
|
2283
2630
|
}
|
|
2631
|
+
// v2.2.0: when granularity is "block", f.id is "path#chunk_index" — split
|
|
2632
|
+
// back into path + chunk_index for the response. When "note", f.id is
|
|
2633
|
+
// just the path.
|
|
2634
|
+
let pathPart = f.id;
|
|
2635
|
+
let chunkFromId;
|
|
2636
|
+
if (granularity === "block") {
|
|
2637
|
+
const hashIdx = f.id.lastIndexOf("#");
|
|
2638
|
+
if (hashIdx > 0) {
|
|
2639
|
+
pathPart = f.id.slice(0, hashIdx);
|
|
2640
|
+
const parsed = Number.parseInt(f.id.slice(hashIdx + 1), 10);
|
|
2641
|
+
if (Number.isInteger(parsed) && parsed >= 0)
|
|
2642
|
+
chunkFromId = parsed;
|
|
2643
|
+
}
|
|
2644
|
+
}
|
|
2284
2645
|
matches.push({
|
|
2285
|
-
path:
|
|
2286
|
-
title: stripMd(path.basename(
|
|
2646
|
+
path: pathPart,
|
|
2647
|
+
title: stripMd(path.basename(pathPart)),
|
|
2287
2648
|
score: Math.round(f.score * 100000) / 100000,
|
|
2288
2649
|
snippet: bestEvidence?.snippet ?? "",
|
|
2289
|
-
chunk_index: bm?.chunk_index ?? emb?.chunk_index,
|
|
2650
|
+
chunk_index: chunkFromId ?? bm?.chunk_index ?? emb?.chunk_index,
|
|
2290
2651
|
line_start: bm?.line_start ?? emb?.line_start,
|
|
2291
2652
|
line_end: bm?.line_end ?? emb?.line_end,
|
|
2292
2653
|
per_signal: perSignal
|
|
@@ -2310,6 +2671,101 @@ export async function searchHybrid(vault, args, ctx) {
|
|
|
2310
2671
|
}
|
|
2311
2672
|
return response;
|
|
2312
2673
|
}
|
|
2674
|
+
export async function contextPack(vault, args, ctx) {
|
|
2675
|
+
await vault.ensureExists();
|
|
2676
|
+
if (!args.query?.trim())
|
|
2677
|
+
throw new Error("context_pack: `query` is required");
|
|
2678
|
+
const budget = args.budget_tokens ?? 4000;
|
|
2679
|
+
const charBudget = budget * 4; // ~4 chars/token
|
|
2680
|
+
const includeBacklinks = args.include_backlinks !== false;
|
|
2681
|
+
const recentN = Math.max(0, args.recent_dailies ?? 0);
|
|
2682
|
+
// 1) Hybrid retrieval — top-K notes
|
|
2683
|
+
const search = await searchHybrid(vault, { query: args.query, folder: args.folder, limit: 10 }, { ftsIndex: ctx.ftsIndex, embedFile: ctx.embedFile });
|
|
2684
|
+
const sections = [`# Context for: ${args.query}\n`];
|
|
2685
|
+
const includedNotes = [];
|
|
2686
|
+
let charsUsed = sections[0]?.length ?? 0;
|
|
2687
|
+
let notesBytes = 0;
|
|
2688
|
+
let backlinksBytes = 0;
|
|
2689
|
+
let dailiesBytes = 0;
|
|
2690
|
+
// 2) Pack note bodies until budget exhausted
|
|
2691
|
+
sections.push("## Top notes");
|
|
2692
|
+
for (const m of search.matches) {
|
|
2693
|
+
if (charsUsed >= charBudget)
|
|
2694
|
+
break;
|
|
2695
|
+
try {
|
|
2696
|
+
const note = await vault.readNote(vault.resolveInside(m.path), undefined);
|
|
2697
|
+
const body = note.parsed.body.trim();
|
|
2698
|
+
const headerLen = m.path.length + 5;
|
|
2699
|
+
const remaining = charBudget - charsUsed;
|
|
2700
|
+
// Truncate body to fit remaining budget for THIS note (~50% of remainder
|
|
2701
|
+
// so we leave room for backlinks + dailies).
|
|
2702
|
+
const noteCap = Math.min(body.length, Math.max(500, Math.floor(remaining * 0.5)));
|
|
2703
|
+
const trimmed = body.length <= noteCap ? body : `${body.slice(0, noteCap)}\n\n[…truncated…]`;
|
|
2704
|
+
const block = `### ${m.path}\n\n${trimmed}\n`;
|
|
2705
|
+
sections.push(block);
|
|
2706
|
+
charsUsed += block.length + headerLen;
|
|
2707
|
+
notesBytes += block.length;
|
|
2708
|
+
includedNotes.push(m.path);
|
|
2709
|
+
}
|
|
2710
|
+
catch {
|
|
2711
|
+
// skip unreadable notes
|
|
2712
|
+
}
|
|
2713
|
+
}
|
|
2714
|
+
// 3) 1-line backlink summaries for top-3
|
|
2715
|
+
if (includeBacklinks && includedNotes.length > 0 && charsUsed < charBudget) {
|
|
2716
|
+
sections.push("## Backlinks");
|
|
2717
|
+
let backlinksAdded = 0;
|
|
2718
|
+
for (const notePath of includedNotes.slice(0, 3)) {
|
|
2719
|
+
if (charsUsed >= charBudget)
|
|
2720
|
+
break;
|
|
2721
|
+
try {
|
|
2722
|
+
const links = await getBacklinks(vault, { path: notePath, limit: 5 });
|
|
2723
|
+
if (links.length > 0) {
|
|
2724
|
+
const block = `### → ${notePath}\n${links.map((l) => `- ${l.path} : ${(l.snippets[0] ?? "").slice(0, 80)}`).join("\n")}\n`;
|
|
2725
|
+
sections.push(block);
|
|
2726
|
+
charsUsed += block.length;
|
|
2727
|
+
backlinksBytes += block.length;
|
|
2728
|
+
backlinksAdded += links.length;
|
|
2729
|
+
}
|
|
2730
|
+
}
|
|
2731
|
+
catch {
|
|
2732
|
+
// skip
|
|
2733
|
+
}
|
|
2734
|
+
}
|
|
2735
|
+
if (backlinksAdded === 0)
|
|
2736
|
+
sections.pop(); // remove empty heading
|
|
2737
|
+
}
|
|
2738
|
+
// 4) Recent daily notes
|
|
2739
|
+
if (recentN > 0 && charsUsed < charBudget) {
|
|
2740
|
+
try {
|
|
2741
|
+
const recent = await getRecentEdits(vault, { since_minutes: 60 * 24 * 7, limit: recentN, folder: args.folder });
|
|
2742
|
+
const dailies = recent.filter((r) => /\d{4}-\d{2}-\d{2}/.test(r.path));
|
|
2743
|
+
if (dailies.length > 0) {
|
|
2744
|
+
sections.push(`## Recent (${dailies.length} dailies, last 7 days)`);
|
|
2745
|
+
for (const d of dailies) {
|
|
2746
|
+
if (charsUsed >= charBudget)
|
|
2747
|
+
break;
|
|
2748
|
+
const block = `- ${d.path} (${d.mtime})`;
|
|
2749
|
+
sections.push(block);
|
|
2750
|
+
charsUsed += block.length;
|
|
2751
|
+
dailiesBytes += block.length;
|
|
2752
|
+
}
|
|
2753
|
+
}
|
|
2754
|
+
}
|
|
2755
|
+
catch {
|
|
2756
|
+
// skip
|
|
2757
|
+
}
|
|
2758
|
+
}
|
|
2759
|
+
const bundle = sections.join("\n");
|
|
2760
|
+
return {
|
|
2761
|
+
query: args.query,
|
|
2762
|
+
bundle,
|
|
2763
|
+
estimated_tokens: Math.ceil(bundle.length / 4),
|
|
2764
|
+
budget_tokens: budget,
|
|
2765
|
+
sections: { notes: notesBytes, backlinks: backlinksBytes, dailies: dailiesBytes },
|
|
2766
|
+
included_notes: includedNotes
|
|
2767
|
+
};
|
|
2768
|
+
}
|
|
2313
2769
|
// ─── small set / string helpers shared by find_similar / get_note_neighbors ─
|
|
2314
2770
|
function jaccard(a, b) {
|
|
2315
2771
|
if (a.size === 0 && b.size === 0)
|