@theglitchking/semantic-pages 0.4.3 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -11
- package/dist/{chunk-TDC45FQJ.js → chunk-VAPQ4NA3.js} +39 -4
- package/dist/chunk-VAPQ4NA3.js.map +1 -0
- package/dist/cli/index.js +1 -1
- package/dist/core/index.d.ts +18 -0
- package/dist/core/index.js +1 -1
- package/dist/indexer-55PTBSTU.js +7 -0
- package/dist/mcp/server.js +124 -25
- package/dist/mcp/server.js.map +1 -1
- package/package.json +1 -1
- package/dist/chunk-TDC45FQJ.js.map +0 -1
- package/dist/indexer-HSCSXWIO.js +0 -7
- /package/dist/{indexer-HSCSXWIO.js.map → indexer-55PTBSTU.js.map} +0 -0
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
7
7
|
|
|
8
8
|
> [!IMPORTANT]
|
|
9
|
-
> Semantic Pages runs a local embedding model (~
|
|
9
|
+
> Semantic Pages runs a local embedding model (~22MB) on first launch. This download happens once and is cached at `~/.semantic-pages/models/`. No API key required. No data leaves your machine.
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
@@ -18,7 +18,7 @@ When you have markdown notes scattered across a project — a `vault/`, `docs/`,
|
|
|
18
18
|
|
|
19
19
|
## Operational Summary
|
|
20
20
|
|
|
21
|
-
The server indexes all `.md` files in a directory you point it at. Each file is parsed for YAML frontmatter, `[[wikilinks]]`, `#tags`, and headings. The text content is split into
|
|
21
|
+
The server indexes all `.md` files in a directory you point it at. Each file is parsed for YAML frontmatter, `[[wikilinks]]`, `#tags`, and headings. The text content is split into chunks and embedded locally using `all-MiniLM-L6-v2` — a 22MB model that runs natively in Node.js via ONNX. These embeddings are stored in an HNSW index for fast approximate nearest neighbor search. Simultaneously, a directed graph is built from wikilinks and shared tags using graphology.
|
|
22
22
|
|
|
23
23
|
When Claude calls `search_semantic`, the query is embedded and compared against all chunks via cosine similarity. When Claude calls `search_graph`, it does a breadth-first traversal from matching nodes. `search_hybrid` combines both — semantic results re-ranked by graph proximity. Beyond search, Claude can create, read, update, delete, and move notes, manage YAML frontmatter fields, add/remove/rename tags vault-wide, and query the knowledge graph for backlinks, forwardlinks, shortest paths, and connectivity statistics.
|
|
24
24
|
|
|
@@ -223,7 +223,7 @@ semantic-pages --notes ./vault --reindex
|
|
|
223
223
|
- If the index seems stale or corrupted
|
|
224
224
|
- After changing the embedding model
|
|
225
225
|
|
|
226
|
-
**What to expect**: Full re-parse, re-embed, and re-index of all markdown files. Takes
|
|
226
|
+
**What to expect**: Full re-parse, re-embed, and re-index of all markdown files. Takes 30 seconds to ~20 minutes depending on vault size and hardware. See [Performance Tuning](./.documentation/performance-tuning.md) for details.
|
|
227
227
|
|
|
228
228
|
---
|
|
229
229
|
|
|
@@ -480,8 +480,8 @@ src/
|
|
|
480
480
|
| Markdown parsing | `unified` + `remark-parse` | AST-based, handles wikilinks |
|
|
481
481
|
| Frontmatter | `gray-matter` | YAML/TOML frontmatter extraction |
|
|
482
482
|
| Wikilinks | `remark-wiki-link` | `[[note-name]]` extraction from AST |
|
|
483
|
-
| Embeddings | `@huggingface/transformers` |
|
|
484
|
-
| Embedding model | `
|
|
483
|
+
| Embeddings | `@huggingface/transformers` + `onnxruntime-node` | Native ONNX runtime, no Python, no API key |
|
|
484
|
+
| Embedding model | `all-MiniLM-L6-v2` (default) | ~22MB, fast (~3 min / 3K chunks), excellent retrieval quality |
|
|
485
485
|
| Vector index | `hnswlib-node` | HNSW algorithm, same as production vector DBs |
|
|
486
486
|
| Knowledge graph | `graphology` | Directed graph, serializable, rich algorithms |
|
|
487
487
|
| Graph algorithms | `graphology-traversal` + `graphology-shortest-path` | BFS, shortest path |
|
|
@@ -519,7 +519,7 @@ Plain text → split at sentence boundaries → ~512 token chunks
|
|
|
519
519
|
|
|
520
520
|
#### Step 3: Embed
|
|
521
521
|
```
|
|
522
|
-
Each chunk →
|
|
522
|
+
Each chunk → all-MiniLM-L6-v2 (native ONNX) → normalized Float32Array
|
|
523
523
|
```
|
|
524
524
|
|
|
525
525
|
#### Step 4: Index
|
|
@@ -573,14 +573,16 @@ const path = graph.findPath("overview.md", "auth.md");
|
|
|
573
573
|
|
|
574
574
|
| Metric | Value |
|
|
575
575
|
|--------|-------|
|
|
576
|
-
| Index 100 notes | ~
|
|
577
|
-
| Index
|
|
576
|
+
| Index 100 notes (~600 chunks) | ~30 seconds |
|
|
577
|
+
| Index 500 notes (~3,000 chunks) | ~3–5 minutes |
|
|
578
|
+
| Index 2,000 notes (~12,000 chunks) | ~15–20 minutes |
|
|
578
579
|
| Semantic search latency | <100ms |
|
|
579
580
|
| Text search latency | <10ms |
|
|
580
581
|
| Graph traversal latency | <5ms |
|
|
581
|
-
|
|
|
582
|
-
|
|
|
583
|
-
|
|
|
582
|
+
| Subsequent server starts (warm cache) | <1 second |
|
|
583
|
+
| Model download (first run) | ~22MB, cached at `~/.semantic-pages/models/` |
|
|
584
|
+
| Index size (500 notes) | ~30–50MB |
|
|
585
|
+
| npm package size | ~112 kB |
|
|
584
586
|
|
|
585
587
|
---
|
|
586
588
|
|
|
@@ -592,6 +594,18 @@ const path = graph.findPath("overview.md", "auth.md");
|
|
|
592
594
|
|
|
593
595
|
---
|
|
594
596
|
|
|
597
|
+
## Documentation
|
|
598
|
+
|
|
599
|
+
Deep-dive guides are in [`.documentation/`](./.documentation/):
|
|
600
|
+
|
|
601
|
+
- [**How It Works**](./.documentation/how-it-works.md) — architecture, processing pipeline, index format, search mechanics
|
|
602
|
+
- [**Performance Tuning**](./.documentation/performance-tuning.md) — model selection, batch size, workers, benchmarks
|
|
603
|
+
- [**Embedder Guide**](./.documentation/embedder-guide.md) — when/how to tune the embedder, model switching, cache management
|
|
604
|
+
- [**Troubleshooting**](./.documentation/troubleshooting.md) — common problems and fixes
|
|
605
|
+
- [**Changelog**](./.documentation/changelog.md) — version history with rationale
|
|
606
|
+
|
|
607
|
+
---
|
|
608
|
+
|
|
595
609
|
## Troubleshooting
|
|
596
610
|
|
|
597
611
|
### Installation Issues
|
|
@@ -10,7 +10,7 @@ import remarkParse from "remark-parse";
|
|
|
10
10
|
import remarkWikiLink from "remark-wiki-link";
|
|
11
11
|
import matter from "gray-matter";
|
|
12
12
|
import { glob } from "glob";
|
|
13
|
-
import { readFile } from "fs/promises";
|
|
13
|
+
import { readFile, stat } from "fs/promises";
|
|
14
14
|
import { basename, join } from "path";
|
|
15
15
|
var CHUNK_TARGET_CHARS = 2e3;
|
|
16
16
|
var Indexer = class {
|
|
@@ -28,7 +28,10 @@ var Indexer = class {
|
|
|
28
28
|
return docs;
|
|
29
29
|
}
|
|
30
30
|
async indexFile(absolutePath, relativePath) {
|
|
31
|
-
const raw = await
|
|
31
|
+
const [raw, fileStat] = await Promise.all([
|
|
32
|
+
readFile(absolutePath, "utf-8"),
|
|
33
|
+
stat(absolutePath)
|
|
34
|
+
]);
|
|
32
35
|
const { data: frontmatter, content } = matter(raw);
|
|
33
36
|
const tree = this.processor.parse(content);
|
|
34
37
|
const wikilinks = this.extractWikilinks(tree);
|
|
@@ -37,6 +40,12 @@ var Indexer = class {
|
|
|
37
40
|
const plainText = this.stripMarkdown(content);
|
|
38
41
|
const chunks = this.chunkText(plainText);
|
|
39
42
|
const title = frontmatter.title || headers[0] || basename(relativePath, ".md");
|
|
43
|
+
const mtime = this.resolveMtime(frontmatter, fileStat.mtime);
|
|
44
|
+
const loadPriority = typeof frontmatter.load_priority === "number" ? Math.min(10, Math.max(1, frontmatter.load_priority)) : void 0;
|
|
45
|
+
const status = typeof frontmatter.status === "string" ? frontmatter.status : void 0;
|
|
46
|
+
const tier = typeof frontmatter.tier === "string" ? frontmatter.tier : void 0;
|
|
47
|
+
const domains = Array.isArray(frontmatter.domains) ? frontmatter.domains : void 0;
|
|
48
|
+
const purpose = typeof frontmatter.purpose === "string" ? frontmatter.purpose : void 0;
|
|
40
49
|
return {
|
|
41
50
|
path: relativePath,
|
|
42
51
|
title,
|
|
@@ -45,9 +54,35 @@ var Indexer = class {
|
|
|
45
54
|
wikilinks,
|
|
46
55
|
tags,
|
|
47
56
|
headers,
|
|
48
|
-
chunks
|
|
57
|
+
chunks,
|
|
58
|
+
mtime,
|
|
59
|
+
...loadPriority !== void 0 && { loadPriority },
|
|
60
|
+
...status !== void 0 && { status },
|
|
61
|
+
...tier !== void 0 && { tier },
|
|
62
|
+
...domains !== void 0 && { domains },
|
|
63
|
+
...purpose !== void 0 && { purpose }
|
|
49
64
|
};
|
|
50
65
|
}
|
|
66
|
+
/**
|
|
67
|
+
* Resolve the best available modification date for a document.
|
|
68
|
+
* Priority: last_updated → updated → date → lastmod → fs.stat mtime
|
|
69
|
+
* Accepts YYYY-MM-DD strings or full ISO timestamps.
|
|
70
|
+
*/
|
|
71
|
+
resolveMtime(frontmatter, statMtime) {
|
|
72
|
+
const candidates = [
|
|
73
|
+
frontmatter.last_updated,
|
|
74
|
+
frontmatter.updated,
|
|
75
|
+
frontmatter.date,
|
|
76
|
+
frontmatter.lastmod
|
|
77
|
+
];
|
|
78
|
+
for (const val of candidates) {
|
|
79
|
+
if (!val) continue;
|
|
80
|
+
const str = val instanceof Date ? val.toISOString() : String(val);
|
|
81
|
+
const parsed = new Date(str);
|
|
82
|
+
if (!isNaN(parsed.getTime())) return parsed.toISOString();
|
|
83
|
+
}
|
|
84
|
+
return statMtime.toISOString();
|
|
85
|
+
}
|
|
51
86
|
extractWikilinks(tree) {
|
|
52
87
|
const links = [];
|
|
53
88
|
const walk = (node) => {
|
|
@@ -111,4 +146,4 @@ export {
|
|
|
111
146
|
__export,
|
|
112
147
|
Indexer
|
|
113
148
|
};
|
|
114
|
-
//# sourceMappingURL=chunk-
|
|
149
|
+
//# sourceMappingURL=chunk-VAPQ4NA3.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/core/indexer.ts"],"sourcesContent":["import { unified } from \"unified\";\nimport remarkParse from \"remark-parse\";\nimport remarkWikiLink from \"remark-wiki-link\";\nimport matter from \"gray-matter\";\nimport { glob } from \"glob\";\nimport { readFile, stat } from \"node:fs/promises\";\nimport { basename, join, relative } from \"node:path\";\nimport type { IndexedDocument } from \"./types.js\";\n\nconst CHUNK_TARGET_CHARS = 2000; // ~512 tokens\n\nexport class Indexer {\n private notesPath: string;\n private processor: any;\n\n constructor(notesPath: string) {\n this.notesPath = notesPath;\n this.processor = unified().use(remarkParse).use(remarkWikiLink);\n }\n\n async indexAll(): Promise<IndexedDocument[]> {\n const files = await glob(\"**/*.md\", { cwd: this.notesPath });\n const docs = await Promise.all(\n files.map((file) => this.indexFile(join(this.notesPath, file), file))\n );\n return docs;\n }\n\n async indexFile(\n absolutePath: string,\n relativePath: string\n ): Promise<IndexedDocument> {\n const [raw, fileStat] = await Promise.all([\n readFile(absolutePath, \"utf-8\"),\n stat(absolutePath),\n ]);\n const { data: frontmatter, content } = matter(raw);\n const tree = this.processor.parse(content);\n\n const wikilinks = this.extractWikilinks(tree);\n const tags = this.extractTags(content, frontmatter);\n const headers = this.extractHeaders(tree);\n const plainText = this.stripMarkdown(content);\n const chunks = this.chunkText(plainText);\n\n const title =\n (frontmatter.title as string) ||\n headers[0] ||\n basename(relativePath, \".md\");\n\n // Resolve modification time: prefer frontmatter date fields over fs.stat\n // Supports hit-em-with-the-docs (last_updated) and common alternatives\n const mtime = this.resolveMtime(frontmatter, fileStat.mtime);\n\n // Optional hit-em-with-the-docs fields (only populated when present)\n const loadPriority =\n typeof frontmatter.load_priority === \"number\"\n ? Math.min(10, Math.max(1, frontmatter.load_priority))\n : undefined;\n const status =\n typeof frontmatter.status === \"string\" ? frontmatter.status : undefined;\n const tier =\n typeof frontmatter.tier === \"string\" ? frontmatter.tier : undefined;\n const domains = Array.isArray(frontmatter.domains)\n ? (frontmatter.domains as string[])\n : undefined;\n const purpose =\n typeof frontmatter.purpose === \"string\" ? frontmatter.purpose : undefined;\n\n return {\n path: relativePath,\n title,\n content: plainText,\n frontmatter,\n wikilinks,\n tags,\n headers,\n chunks,\n mtime,\n ...(loadPriority !== undefined && { loadPriority }),\n ...(status !== undefined && { status }),\n ...(tier !== undefined && { tier }),\n ...(domains !== undefined && { domains }),\n ...(purpose !== undefined && { purpose }),\n };\n }\n\n /**\n * Resolve the best available modification date for a document.\n * Priority: last_updated → updated → date → lastmod → fs.stat mtime\n * Accepts YYYY-MM-DD strings or full ISO timestamps.\n */\n private resolveMtime(\n frontmatter: Record<string, unknown>,\n statMtime: Date\n ): string {\n const candidates = [\n frontmatter.last_updated,\n frontmatter.updated,\n frontmatter.date,\n frontmatter.lastmod,\n ];\n for (const val of candidates) {\n if (!val) continue;\n const str = val instanceof Date ? val.toISOString() : String(val);\n const parsed = new Date(str);\n if (!isNaN(parsed.getTime())) return parsed.toISOString();\n }\n return statMtime.toISOString();\n }\n\n private extractWikilinks(tree: any): string[] {\n const links: string[] = [];\n const walk = (node: any) => {\n if (node.type === \"wikiLink\") {\n links.push(node.value || node.data?.alias || \"\");\n }\n if (node.children) {\n for (const child of node.children) walk(child);\n }\n };\n walk(tree);\n return [...new Set(links.filter(Boolean))];\n }\n\n private extractTags(content: string, frontmatter: Record<string, unknown>): string[] {\n const inlineTags = [...content.matchAll(/(?:^|\\s)#([a-zA-Z][\\w-/]*)/g)].map(\n (m) => m[1]\n );\n\n const fmTags = Array.isArray(frontmatter.tags)\n ? (frontmatter.tags as string[])\n : [];\n\n return [...new Set([...fmTags, ...inlineTags])];\n }\n\n private extractHeaders(tree: any): string[] {\n const headers: string[] = [];\n const walk = (node: any) => {\n if (node.type === \"heading\") {\n const text = this.nodeToText(node);\n if (text) headers.push(text);\n }\n if (node.children) {\n for (const child of node.children) walk(child);\n }\n };\n walk(tree);\n return headers;\n }\n\n private nodeToText(node: any): string {\n if (node.type === \"text\") return node.value;\n if (node.children) return node.children.map((c: any) => this.nodeToText(c)).join(\"\");\n return \"\";\n }\n\n private stripMarkdown(content: string): string {\n return content\n .replace(/```[\\s\\S]*?```/g, \"\")\n .replace(/`[^`]+`/g, \"\")\n .replace(/!\\[.*?\\]\\(.*?\\)/g, \"\")\n .replace(/\\[([^\\]]+)\\]\\(.*?\\)/g, \"$1\")\n .replace(/#{1,6}\\s+/g, \"\")\n .replace(/[*_~]{1,3}/g, \"\")\n .replace(/>\\s+/g, \"\")\n .replace(/\\|.*\\|/g, \"\")\n .replace(/-{3,}/g, \"\")\n .replace(/\\n{3,}/g, \"\\n\\n\")\n .trim();\n }\n\n chunkText(text: string): string[] {\n if (text.length <= CHUNK_TARGET_CHARS) return [text];\n\n const sentences = text.match(/[^.!?\\n]+[.!?\\n]+|[^.!?\\n]+$/g) || [text];\n const chunks: string[] = [];\n let current = \"\";\n\n for (const sentence of sentences) {\n if (current.length + sentence.length > CHUNK_TARGET_CHARS && current) {\n chunks.push(current.trim());\n current = \"\";\n }\n current += sentence;\n }\n if (current.trim()) chunks.push(current.trim());\n\n return chunks;\n }\n}\n"],"mappings":";;;;;;;AAAA,SAAS,eAAe;AACxB,OAAO,iBAAiB;AACxB,OAAO,oBAAoB;AAC3B,OAAO,YAAY;AACnB,SAAS,YAAY;AACrB,SAAS,UAAU,YAAY;AAC/B,SAAS,UAAU,YAAsB;AAGzC,IAAM,qBAAqB;AAEpB,IAAM,UAAN,MAAc;AAAA,EACX;AAAA,EACA;AAAA,EAER,YAAY,WAAmB;AAC7B,SAAK,YAAY;AACjB,SAAK,YAAY,QAAQ,EAAE,IAAI,WAAW,EAAE,IAAI,cAAc;AAAA,EAChE;AAAA,EAEA,MAAM,WAAuC;AAC3C,UAAM,QAAQ,MAAM,KAAK,WAAW,EAAE,KAAK,KAAK,UAAU,CAAC;AAC3D,UAAM,OAAO,MAAM,QAAQ;AAAA,MACzB,MAAM,IAAI,CAAC,SAAS,KAAK,UAAU,KAAK,KAAK,WAAW,IAAI,GAAG,IAAI,CAAC;AAAA,IACtE;AACA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,UACJ,cACA,cAC0B;AAC1B,UAAM,CAAC,KAAK,QAAQ,IAAI,MAAM,QAAQ,IAAI;AAAA,MACxC,SAAS,cAAc,OAAO;AAAA,MAC9B,KAAK,YAAY;AAAA,IACnB,CAAC;AACD,UAAM,EAAE,MAAM,aAAa,QAAQ,IAAI,OAAO,GAAG;AACjD,UAAM,OAAO,KAAK,UAAU,MAAM,OAAO;AAEzC,UAAM,YAAY,KAAK,iBAAiB,IAAI;AAC5C,UAAM,OAAO,KAAK,YAAY,SAAS,WAAW;AAClD,UAAM,UAAU,KAAK,eAAe,IAAI;AACxC,UAAM,YAAY,KAAK,cAAc,OAAO;AAC5C,UAAM,SAAS,KAAK,UAAU,SAAS;AAEvC,UAAM,QACH,YAAY,SACb,QAAQ,CAAC,KACT,SAAS,cAAc,KAAK;AAI9B,UAAM,QAAQ,KAAK,aAAa,aAAa,SAAS,KAAK;AAG3D,UAAM,eACJ,OAAO,YAAY,kBAAkB,WACjC,KAAK,IAAI,IAAI,KAAK,IAAI,GAAG,YAAY,aAAa,CAAC,IACnD;AACN,UAAM,SACJ,OAAO,YAAY,WAAW,WAAW,YAAY,SAAS;AAChE,UAAM,OACJ,OAAO,YAAY,SAAS,WAAW,YAAY,OAAO;AAC5D,UAAM,UAAU,MAAM,QAAQ,YAAY,OAAO,IAC5C,YAAY,UACb;AACJ,UAAM,UACJ,OAAO,YAAY,YAAY,WAAW,YAAY,UAAU;AAElE,WAAO;AAAA,MACL,MAAM;AAAA,MACN;AAAA,MACA,SAAS;AAAA,MACT;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAI,iBAAiB,UAAa,EAAE,aAAa;AAAA,MACjD,GAAI,WAAW,UAAa,EAAE,OAAO;AAAA,MACrC,GAAI,SAAS,UAAa,EAAE,KAAK;AAAA,MACjC,GAAI,YAAY,UAAa,EAAE,QAAQ;AAAA,MACvC,GAAI,YAAY,UAAa,EAAE,QAAQ;AAAA,IACzC;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOQ,aACN,aACA,WACQ;AACR,UAAM,aAAa;AAAA,MACjB,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ,YAAY;AAAA,IACd;AACA,eAAW,OAAO,YAAY;AAC5B,UAAI,CAAC,IAAK;AACV,YAAM,MAAM,eAAe,OAAO,IAAI,YAAY,IAAI,OAAO,GAAG;AAChE,YAAM,SAAS,IAAI,KAAK,GAAG;AAC3B,UAAI,CAAC,MAAM,OAAO,QAAQ,CAAC,EAAG,QAAO,OAAO,YAAY;AAAA,IAC1D;AACA,WAAO,UAAU,YAAY;AAAA,EAC/B;AAAA,EAEQ,iBAAiB,MAAqB;AAC5C,UAAM,QAAkB,CAAC;AACzB,UAAM,OAAO,CAAC,SAAc;AAC1B,UAAI,KAAK,SAAS,YAAY;AAC5B,cAAM,KAAK,KAAK,SAAS,KAAK,MAAM,SAAS,EAAE;AAAA,MACjD;AACA,UAAI,KAAK,UAAU;AACjB,mBAAW,SAAS,KAAK,SAAU,MAAK,KAAK;AAAA,MAC/C;AAAA,IACF;AACA,SAAK,IAAI;AACT,WAAO,CAAC,GAAG,IAAI,IAAI,MAAM,OAAO,OAAO,CAAC,CAAC;AAAA,EAC3C;AAAA,EAEQ,YAAY,SAAiB,aAAgD;AACnF,UAAM,aAAa,CAAC,GAAG,QAAQ,SAAS,6BAA6B,CAAC,EAAE;AAAA,MACtE,CAAC,MAAM,EAAE,CAAC;AAAA,IACZ;AAEA,UAAM,SAAS,MAAM,QAAQ,YAAY,IAAI,IACxC,YAAY,OACb,CAAC;AAEL,WAAO,CAAC,GAAG,oBAAI,IAAI,CAAC,GAAG,QAAQ,GAAG,UAAU,CAAC,CAAC;AAAA,EAChD;AAAA,EAEQ,eAAe,MAAqB;AAC1C,UAAM,UAAoB,CAAC;AAC3B,UAAM,OAAO,CAAC,SAAc;AAC1B,UAAI,KAAK,SAAS,WAAW;AAC3B,cAAM,OAAO,KAAK,WAAW,IAAI;AACjC,YAAI,KAAM,SAAQ,KAAK,IAAI;AAAA,MAC7B;AACA,UAAI,KAAK,UAAU;AACjB,mBAAW,SAAS,KAAK,SAAU,MAAK,KAAK;AAAA,MAC/C;AAAA,IACF;AACA,SAAK,IAAI;AACT,WAAO;AAAA,EACT;AAAA,EAEQ,WAAW,MAAmB;AACpC,QAAI,KAAK,SAAS,OAAQ,QAAO,KAAK;AACtC,QAAI,KAAK,SAAU,QAAO,KAAK,SAAS,IAAI,CAAC,MAAW,KAAK,WAAW,CAAC,CAAC,EAAE,KAAK,EAAE;AACnF,WAAO;AAAA,EACT;AAAA,EAEQ,cAAc,SAAyB;AAC7C,WAAO,QACJ,QAAQ,mBAAmB,EAAE,EAC7B,QAAQ,YAAY,EAAE,EACtB,QAAQ,oBAAoB,EAAE,EAC9B,QAAQ,wBAAwB,IAAI,EACpC,QAAQ,cAAc,EAAE,EACxB,QAAQ,eAAe,EAAE,EACzB,QAAQ,SAAS,EAAE,EACnB,QAAQ,WAAW,EAAE,EACrB,QAAQ,UAAU,EAAE,EACpB,QAAQ,WAAW,MAAM,EACzB,KAAK;AAAA,EACV;AAAA,EAEA,UAAU,MAAwB;AAChC,QAAI,KAAK,UAAU,mBAAoB,QAAO,CAAC,IAAI;AAEnD,UAAM,YAAY,KAAK,MAAM,+BAA+B,KAAK,CAAC,IAAI;AACtE,UAAM,SAAmB,CAAC;AAC1B,QAAI,UAAU;AAEd,eAAW,YAAY,WAAW;AAChC,UAAI,QAAQ,SAAS,SAAS,SAAS,sBAAsB,SAAS;AACpE,eAAO,KAAK,QAAQ,KAAK,CAAC;AAC1B,kBAAU;AAAA,MACZ;AACA,iBAAW;AAAA,IACb;AACA,QAAI,QAAQ,KAAK,EAAG,QAAO,KAAK,QAAQ,KAAK,CAAC;AAE9C,WAAO;AAAA,EACT;AACF;","names":[]}
|
package/dist/cli/index.js
CHANGED
|
@@ -215,7 +215,7 @@ program.command("serve", { isDefault: true }).description("Start the MCP server
|
|
|
215
215
|
process.exit(1);
|
|
216
216
|
}
|
|
217
217
|
if (opts.stats) {
|
|
218
|
-
const { Indexer } = await import("../indexer-
|
|
218
|
+
const { Indexer } = await import("../indexer-55PTBSTU.js");
|
|
219
219
|
const indexer = new Indexer(notesPath);
|
|
220
220
|
const docs = await indexer.indexAll();
|
|
221
221
|
console.log(`Notes: ${docs.length}`);
|
package/dist/core/index.d.ts
CHANGED
|
@@ -10,6 +10,13 @@ interface IndexedDocument {
|
|
|
10
10
|
tags: string[];
|
|
11
11
|
headers: string[];
|
|
12
12
|
chunks: string[];
|
|
13
|
+
/** ISO timestamp — prefers frontmatter last_updated/updated/date/lastmod, falls back to fs.stat mtime */
|
|
14
|
+
mtime: string;
|
|
15
|
+
loadPriority?: number;
|
|
16
|
+
status?: string;
|
|
17
|
+
tier?: string;
|
|
18
|
+
domains?: string[];
|
|
19
|
+
purpose?: string;
|
|
13
20
|
}
|
|
14
21
|
interface SearchResult {
|
|
15
22
|
path: string;
|
|
@@ -17,6 +24,11 @@ interface SearchResult {
|
|
|
17
24
|
score: number;
|
|
18
25
|
snippet: string;
|
|
19
26
|
matchedChunk?: string;
|
|
27
|
+
mtime?: string;
|
|
28
|
+
loadPriority?: number;
|
|
29
|
+
status?: string;
|
|
30
|
+
tier?: string;
|
|
31
|
+
domains?: string[];
|
|
20
32
|
}
|
|
21
33
|
interface GraphNode {
|
|
22
34
|
path: string;
|
|
@@ -78,6 +90,12 @@ declare class Indexer {
|
|
|
78
90
|
constructor(notesPath: string);
|
|
79
91
|
indexAll(): Promise<IndexedDocument[]>;
|
|
80
92
|
indexFile(absolutePath: string, relativePath: string): Promise<IndexedDocument>;
|
|
93
|
+
/**
|
|
94
|
+
* Resolve the best available modification date for a document.
|
|
95
|
+
* Priority: last_updated → updated → date → lastmod → fs.stat mtime
|
|
96
|
+
* Accepts YYYY-MM-DD strings or full ISO timestamps.
|
|
97
|
+
*/
|
|
98
|
+
private resolveMtime;
|
|
81
99
|
private extractWikilinks;
|
|
82
100
|
private extractTags;
|
|
83
101
|
private extractHeaders;
|
package/dist/core/index.js
CHANGED
package/dist/mcp/server.js
CHANGED
|
@@ -11,7 +11,7 @@ import {
|
|
|
11
11
|
import {
|
|
12
12
|
Indexer,
|
|
13
13
|
__export
|
|
14
|
-
} from "../chunk-
|
|
14
|
+
} from "../chunk-VAPQ4NA3.js";
|
|
15
15
|
|
|
16
16
|
// src/mcp/server.ts
|
|
17
17
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
@@ -4073,6 +4073,7 @@ async function createServer(notesPath, options = {}) {
|
|
|
4073
4073
|
const frontmatterManager = new FrontmatterManager(notesPath);
|
|
4074
4074
|
const tagManager = new TagManager(notesPath);
|
|
4075
4075
|
let documents = [];
|
|
4076
|
+
let docByPath = /* @__PURE__ */ new Map();
|
|
4076
4077
|
let vectorIndex = null;
|
|
4077
4078
|
let indexState = "empty";
|
|
4078
4079
|
let indexProgress = { embedded: 0, total: 0 };
|
|
@@ -4098,6 +4099,7 @@ async function createServer(notesPath, options = {}) {
|
|
|
4098
4099
|
const graphLoaded = await graph.load(indexPath);
|
|
4099
4100
|
if (!graphLoaded) return false;
|
|
4100
4101
|
documents = await indexer.indexAll();
|
|
4102
|
+
docByPath = new Map(documents.map((d) => [d.path, d]));
|
|
4101
4103
|
textSearch.setDocuments(documents);
|
|
4102
4104
|
vectorIndex = tempVector;
|
|
4103
4105
|
indexState = "stale";
|
|
@@ -4165,6 +4167,7 @@ async function createServer(notesPath, options = {}) {
|
|
|
4165
4167
|
allChunks.map((c) => ({ docPath: c.docPath, chunkIndex: c.chunkIndex, text: c.text }))
|
|
4166
4168
|
);
|
|
4167
4169
|
documents = newDocs;
|
|
4170
|
+
docByPath = new Map(documents.map((d) => [d.path, d]));
|
|
4168
4171
|
textSearch.setDocuments(newDocs);
|
|
4169
4172
|
graph.buildFromDocuments(newDocs);
|
|
4170
4173
|
vectorIndex = newVector;
|
|
@@ -4200,19 +4203,62 @@ async function createServer(notesPath, options = {}) {
|
|
|
4200
4203
|
}
|
|
4201
4204
|
return "Indexing in progress... Try again shortly.";
|
|
4202
4205
|
}
|
|
4206
|
+
function enrichResult(result) {
|
|
4207
|
+
const doc = docByPath.get(result.path);
|
|
4208
|
+
if (!doc) return result;
|
|
4209
|
+
return {
|
|
4210
|
+
...result,
|
|
4211
|
+
mtime: doc.mtime,
|
|
4212
|
+
...doc.loadPriority !== void 0 && { loadPriority: doc.loadPriority },
|
|
4213
|
+
...doc.status !== void 0 && { status: doc.status },
|
|
4214
|
+
...doc.tier !== void 0 && { tier: doc.tier },
|
|
4215
|
+
...doc.domains !== void 0 && { domains: doc.domains }
|
|
4216
|
+
};
|
|
4217
|
+
}
|
|
4218
|
+
function applyPriorityBoost(score, path) {
|
|
4219
|
+
const doc = docByPath.get(path);
|
|
4220
|
+
if (doc?.loadPriority === void 0) return score;
|
|
4221
|
+
return score * (1 + (doc.loadPriority - 5) * 0.04);
|
|
4222
|
+
}
|
|
4223
|
+
function applyDateFilter(results, modifiedAfter, modifiedBefore) {
|
|
4224
|
+
if (!modifiedAfter && !modifiedBefore) return results;
|
|
4225
|
+
const after = modifiedAfter ? new Date(modifiedAfter).getTime() : -Infinity;
|
|
4226
|
+
const before = modifiedBefore ? new Date(modifiedBefore).getTime() : Infinity;
|
|
4227
|
+
return results.filter((r) => {
|
|
4228
|
+
const doc = docByPath.get(r.path);
|
|
4229
|
+
if (!doc) return true;
|
|
4230
|
+
const t = new Date(doc.mtime).getTime();
|
|
4231
|
+
return t >= after && t <= before;
|
|
4232
|
+
});
|
|
4233
|
+
}
|
|
4203
4234
|
const server = new McpServer({
|
|
4204
4235
|
name: "semantic-pages",
|
|
4205
4236
|
version: "0.2.0"
|
|
4206
4237
|
});
|
|
4207
4238
|
server.tool(
|
|
4208
4239
|
"search_semantic",
|
|
4209
|
-
"Vector similarity search \u2014 find notes similar to a query by meaning",
|
|
4210
|
-
{
|
|
4211
|
-
|
|
4240
|
+
"Vector similarity search \u2014 find notes similar to a query by meaning. Scores are boosted by load_priority when present.",
|
|
4241
|
+
{
|
|
4242
|
+
query: external_exports.string(),
|
|
4243
|
+
limit: external_exports.number().optional().default(10),
|
|
4244
|
+
modifiedAfter: external_exports.string().optional().describe("ISO date \u2014 only return notes modified after this date (e.g. '2026-01-01')"),
|
|
4245
|
+
modifiedBefore: external_exports.string().optional().describe("ISO date \u2014 only return notes modified before this date"),
|
|
4246
|
+
status: external_exports.string().optional().describe("Filter by frontmatter status (e.g. 'active', 'draft')"),
|
|
4247
|
+
tier: external_exports.string().optional().describe("Filter by frontmatter tier (e.g. 'guide', 'reference')"),
|
|
4248
|
+
domain: external_exports.string().optional().describe("Filter by frontmatter domain (e.g. 'api', 'security')")
|
|
4249
|
+
},
|
|
4250
|
+
async ({ query, limit, modifiedAfter, modifiedBefore, status, tier, domain }) => {
|
|
4212
4251
|
if (!vectorIndex) return textResponse(indexState === "empty" ? indexingMessage() : "Index not built. Run reindex first.");
|
|
4213
4252
|
const queryEmbed = await embedder.embed(query);
|
|
4214
|
-
|
|
4215
|
-
|
|
4253
|
+
let results = vectorIndex.search(queryEmbed, limit * 3);
|
|
4254
|
+
results = results.map((r) => ({ ...r, score: applyPriorityBoost(r.score, r.path) }));
|
|
4255
|
+
results.sort((a, b) => b.score - a.score);
|
|
4256
|
+
results = applyDateFilter(results, modifiedAfter, modifiedBefore);
|
|
4257
|
+
if (status) results = results.filter((r) => docByPath.get(r.path)?.status === status);
|
|
4258
|
+
if (tier) results = results.filter((r) => docByPath.get(r.path)?.tier === tier);
|
|
4259
|
+
if (domain) results = results.filter((r) => docByPath.get(r.path)?.domains?.includes(domain));
|
|
4260
|
+
const enriched = results.slice(0, limit).map(enrichResult);
|
|
4261
|
+
return textResponse(JSON.stringify(enriched, null, 2));
|
|
4216
4262
|
}
|
|
4217
4263
|
);
|
|
4218
4264
|
server.tool(
|
|
@@ -4224,12 +4270,22 @@ async function createServer(notesPath, options = {}) {
|
|
|
4224
4270
|
caseSensitive: external_exports.boolean().optional().default(false),
|
|
4225
4271
|
pathGlob: external_exports.string().optional(),
|
|
4226
4272
|
tagFilter: external_exports.array(external_exports.string()).optional(),
|
|
4227
|
-
limit: external_exports.number().optional().default(20)
|
|
4273
|
+
limit: external_exports.number().optional().default(20),
|
|
4274
|
+
modifiedAfter: external_exports.string().optional().describe("ISO date \u2014 only return notes modified after this date"),
|
|
4275
|
+
modifiedBefore: external_exports.string().optional().describe("ISO date \u2014 only return notes modified before this date"),
|
|
4276
|
+
status: external_exports.string().optional().describe("Filter by frontmatter status"),
|
|
4277
|
+
tier: external_exports.string().optional().describe("Filter by frontmatter tier"),
|
|
4278
|
+
domain: external_exports.string().optional().describe("Filter by frontmatter domain")
|
|
4228
4279
|
},
|
|
4229
|
-
async (opts) => {
|
|
4280
|
+
async ({ modifiedAfter, modifiedBefore, status, tier, domain, ...opts }) => {
|
|
4230
4281
|
if (documents.length === 0 && indexState !== "ready") return textResponse(indexingMessage());
|
|
4231
|
-
|
|
4232
|
-
|
|
4282
|
+
let results = textSearch.search(opts);
|
|
4283
|
+
results = applyDateFilter(results, modifiedAfter, modifiedBefore);
|
|
4284
|
+
if (status) results = results.filter((r) => docByPath.get(r.path)?.status === status);
|
|
4285
|
+
if (tier) results = results.filter((r) => docByPath.get(r.path)?.tier === tier);
|
|
4286
|
+
if (domain) results = results.filter((r) => docByPath.get(r.path)?.domains?.includes(domain));
|
|
4287
|
+
const enriched = results.map(enrichResult);
|
|
4288
|
+
return textResponse(JSON.stringify(enriched, null, 2));
|
|
4233
4289
|
}
|
|
4234
4290
|
);
|
|
4235
4291
|
server.tool(
|
|
@@ -4244,20 +4300,36 @@ async function createServer(notesPath, options = {}) {
|
|
|
4244
4300
|
);
|
|
4245
4301
|
server.tool(
|
|
4246
4302
|
"search_hybrid",
|
|
4247
|
-
"Combined semantic + graph search \u2014 vector results re-ranked by graph proximity",
|
|
4248
|
-
{
|
|
4249
|
-
|
|
4303
|
+
"Combined semantic + graph search \u2014 vector results re-ranked by graph proximity and load_priority",
|
|
4304
|
+
{
|
|
4305
|
+
query: external_exports.string(),
|
|
4306
|
+
limit: external_exports.number().optional().default(10),
|
|
4307
|
+
modifiedAfter: external_exports.string().optional().describe("ISO date \u2014 only return notes modified after this date"),
|
|
4308
|
+
modifiedBefore: external_exports.string().optional().describe("ISO date \u2014 only return notes modified before this date"),
|
|
4309
|
+
status: external_exports.string().optional().describe("Filter by frontmatter status"),
|
|
4310
|
+
tier: external_exports.string().optional().describe("Filter by frontmatter tier"),
|
|
4311
|
+
domain: external_exports.string().optional().describe("Filter by frontmatter domain")
|
|
4312
|
+
},
|
|
4313
|
+
async ({ query, limit, modifiedAfter, modifiedBefore, status, tier, domain }) => {
|
|
4250
4314
|
if (!vectorIndex) return textResponse(indexState === "empty" ? indexingMessage() : "Index not built. Run reindex first.");
|
|
4251
4315
|
const queryEmbed = await embedder.embed(query);
|
|
4252
|
-
const semanticResults = vectorIndex.search(queryEmbed, limit *
|
|
4316
|
+
const semanticResults = vectorIndex.search(queryEmbed, limit * 3);
|
|
4253
4317
|
const graphResults = graph.searchGraph(query, 2);
|
|
4254
4318
|
const graphPaths = new Set(graphResults.map((r) => r.path));
|
|
4255
|
-
|
|
4319
|
+
let hybrid = semanticResults.map((r) => ({
|
|
4256
4320
|
...r,
|
|
4257
|
-
score:
|
|
4321
|
+
score: applyPriorityBoost(
|
|
4322
|
+
graphPaths.has(r.path) ? r.score * 1.3 : r.score,
|
|
4323
|
+
r.path
|
|
4324
|
+
)
|
|
4258
4325
|
}));
|
|
4259
4326
|
hybrid.sort((a, b) => b.score - a.score);
|
|
4260
|
-
|
|
4327
|
+
hybrid = applyDateFilter(hybrid, modifiedAfter, modifiedBefore);
|
|
4328
|
+
if (status) hybrid = hybrid.filter((r) => docByPath.get(r.path)?.status === status);
|
|
4329
|
+
if (tier) hybrid = hybrid.filter((r) => docByPath.get(r.path)?.tier === tier);
|
|
4330
|
+
if (domain) hybrid = hybrid.filter((r) => docByPath.get(r.path)?.domains?.includes(domain));
|
|
4331
|
+
const enriched = hybrid.slice(0, limit).map(enrichResult);
|
|
4332
|
+
return textResponse(JSON.stringify(enriched, null, 2));
|
|
4261
4333
|
}
|
|
4262
4334
|
);
|
|
4263
4335
|
server.tool(
|
|
@@ -4282,16 +4354,37 @@ async function createServer(notesPath, options = {}) {
|
|
|
4282
4354
|
);
|
|
4283
4355
|
server.tool(
|
|
4284
4356
|
"list_notes",
|
|
4285
|
-
"List all indexed notes with metadata (title, tags, link count)",
|
|
4286
|
-
{
|
|
4287
|
-
|
|
4357
|
+
"List all indexed notes with metadata (title, tags, timestamps, link count). Supports filtering by date, status, tier, and domain.",
|
|
4358
|
+
{
|
|
4359
|
+
modifiedAfter: external_exports.string().optional().describe("ISO date \u2014 only return notes modified after this date (e.g. '2026-01-01')"),
|
|
4360
|
+
modifiedBefore: external_exports.string().optional().describe("ISO date \u2014 only return notes modified before this date"),
|
|
4361
|
+
status: external_exports.string().optional().describe("Filter by frontmatter status (e.g. 'active', 'deprecated')"),
|
|
4362
|
+
tier: external_exports.string().optional().describe("Filter by frontmatter tier (e.g. 'guide', 'reference')"),
|
|
4363
|
+
domain: external_exports.string().optional().describe("Filter by frontmatter domain (e.g. 'api', 'security')")
|
|
4364
|
+
},
|
|
4365
|
+
async ({ modifiedAfter, modifiedBefore, status, tier, domain }) => {
|
|
4288
4366
|
if (documents.length === 0 && indexState !== "ready") return textResponse(indexingMessage());
|
|
4289
|
-
const
|
|
4367
|
+
const after = modifiedAfter ? new Date(modifiedAfter).getTime() : -Infinity;
|
|
4368
|
+
const before = modifiedBefore ? new Date(modifiedBefore).getTime() : Infinity;
|
|
4369
|
+
let list = documents.filter((d) => {
|
|
4370
|
+
const t = new Date(d.mtime).getTime();
|
|
4371
|
+
if (t < after || t > before) return false;
|
|
4372
|
+
if (status && d.status !== status) return false;
|
|
4373
|
+
if (tier && d.tier !== tier) return false;
|
|
4374
|
+
if (domain && !d.domains?.includes(domain)) return false;
|
|
4375
|
+
return true;
|
|
4376
|
+
}).map((d) => ({
|
|
4290
4377
|
path: d.path,
|
|
4291
4378
|
title: d.title,
|
|
4379
|
+
mtime: d.mtime,
|
|
4292
4380
|
tags: d.tags,
|
|
4293
4381
|
wikilinks: d.wikilinks.length,
|
|
4294
|
-
chunks: d.chunks.length
|
|
4382
|
+
chunks: d.chunks.length,
|
|
4383
|
+
...d.loadPriority !== void 0 && { loadPriority: d.loadPriority },
|
|
4384
|
+
...d.status !== void 0 && { status: d.status },
|
|
4385
|
+
...d.tier !== void 0 && { tier: d.tier },
|
|
4386
|
+
...d.domains !== void 0 && { domains: d.domains },
|
|
4387
|
+
...d.purpose !== void 0 && { purpose: d.purpose }
|
|
4295
4388
|
}));
|
|
4296
4389
|
return textResponse(JSON.stringify(list, null, 2));
|
|
4297
4390
|
}
|
|
@@ -4472,11 +4565,17 @@ async function createServer(notesPath, options = {}) {
|
|
|
4472
4565
|
);
|
|
4473
4566
|
}
|
|
4474
4567
|
);
|
|
4475
|
-
const cached = await tryLoadCachedIndex();
|
|
4476
4568
|
if (options.waitForReady) {
|
|
4569
|
+
await tryLoadCachedIndex();
|
|
4477
4570
|
await fullIndex();
|
|
4478
|
-
} else
|
|
4479
|
-
|
|
4571
|
+
} else {
|
|
4572
|
+
tryLoadCachedIndex().then((cached) => {
|
|
4573
|
+
if (!cached) backgroundIndex();
|
|
4574
|
+
}).catch((err) => {
|
|
4575
|
+
process.stderr.write(`Startup error: ${err?.message ?? err}
|
|
4576
|
+
`);
|
|
4577
|
+
backgroundIndex();
|
|
4578
|
+
});
|
|
4480
4579
|
}
|
|
4481
4580
|
if (options.watch !== false) {
|
|
4482
4581
|
const watcher = new Watcher(notesPath);
|