@abhishekmcp/notes 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -40
- package/dist/config.d.ts +41 -0
- package/dist/config.js +67 -0
- package/dist/config.js.map +1 -0
- package/dist/embed.d.ts +9 -0
- package/dist/embed.js +134 -0
- package/dist/embed.js.map +1 -0
- package/dist/fsutil.d.ts +23 -0
- package/dist/fsutil.js +113 -0
- package/dist/fsutil.js.map +1 -0
- package/dist/graph.d.ts +38 -0
- package/dist/graph.js +160 -0
- package/dist/graph.js.map +1 -0
- package/dist/index.js +240 -100
- package/dist/index.js.map +1 -1
- package/dist/parse.d.ts +55 -0
- package/dist/parse.js +202 -0
- package/dist/parse.js.map +1 -0
- package/dist/semantic.d.ts +14 -0
- package/dist/semantic.js +142 -0
- package/dist/semantic.js.map +1 -0
- package/dist/store.d.ts +102 -0
- package/dist/store.js +410 -0
- package/dist/store.js.map +1 -0
- package/dist/tokenizer.d.ts +30 -0
- package/dist/tokenizer.js +140 -0
- package/dist/tokenizer.js.map +1 -0
- package/package.json +4 -2
- package/dist/notes.d.ts +0 -36
- package/dist/notes.js +0 -138
- package/dist/notes.js.map +0 -1
package/README.md
CHANGED
|
@@ -1,40 +1,82 @@
|
|
|
1
1
|
# @abhishekmcp/notes
|
|
2
2
|
|
|
3
|
-
An [MCP](https://modelcontextprotocol.io) server for managing local markdown notes. Lets any MCP client (Claude Desktop, Claude Code, Cursor, …) search,
|
|
3
|
+
An [MCP](https://modelcontextprotocol.io) server for managing local markdown notes. Lets any MCP client (Claude Desktop, Claude Code, Cursor, …) search, link, and organize the notes in a folder on your machine — with ranked full-text search, tags, todos, and a wiki-link knowledge graph.
|
|
4
|
+
|
|
5
|
+
Pure JavaScript, no native dependencies, no API keys — everything runs locally.
|
|
4
6
|
|
|
5
7
|
## Features
|
|
6
8
|
|
|
7
|
-
|
|
8
|
-
- `list_notes` — list
|
|
9
|
-
- `read_note` — read a note's
|
|
10
|
-
- `
|
|
11
|
-
- `
|
|
9
|
+
### Notes (token-efficient I/O)
|
|
10
|
+
- `list_notes` — list notes (newest first) with pagination (`offset`/`limit`) and an optional `tag` filter
|
|
11
|
+
- `read_note` — read a note; optionally just one heading's `section`, or a character window (`offset`/`limit`) with a truncation flag
|
|
12
|
+
- `get_outline` — return only a note's heading tree (grasp a big note in a few tokens)
|
|
13
|
+
- `create_note` — create a new note (optional `overwrite`)
|
|
14
|
+
- `append_note` — append to a note, creating it if missing (great for journals/logs)
|
|
12
15
|
- `delete_note` — delete a note
|
|
13
|
-
- `
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
16
|
+
- `move_note` — rename/move a note **and rewrite every `[[wiki-link]]`** across the vault that points at it
|
|
17
|
+
|
|
18
|
+
### Search & discovery
|
|
19
|
+
- `search_notes` — ranked full-text search ([MiniSearch](https://github.com/lucaong/minisearch)); supports `fuzzy` and `prefix` matching, a `field` filter (`title`/`tag`/`body`/`path`), and returns ranked snippets with surrounding context
|
|
20
|
+
- `semantic_search` — **meaning-based** search using local embeddings; finds related notes even with no shared keywords (e.g. "puppy" matches a note about "canine companions"). Optional `hybrid` mode fuses semantic + keyword ranking
|
|
21
|
+
- `list_tags` — every tag across the vault with note counts
|
|
22
|
+
- `list_todos` — aggregate `- [ ]` / `- [x]` checkboxes across all notes
|
|
23
|
+
|
|
24
|
+
### Knowledge graph
|
|
25
|
+
- `get_backlinks` — notes linking to a note via `[[wiki-link]]` syntax
|
|
26
|
+
- `get_neighbors` — notes within N hops over the (undirected) link graph (depth/limit capped)
|
|
27
|
+
- `find_path` — shortest wiki-link chain between two notes
|
|
28
|
+
- `related_notes` — notes ranked by shared links + shared tags
|
|
29
|
+
- `graph_overview` — aggregate health: note/link/tag counts, top hubs, orphans, broken-link count
|
|
30
|
+
- `broken_links` — wiki-links that point at notes which don't exist
|
|
31
|
+
|
|
32
|
+
### Resources
|
|
17
33
|
- Every note is exposed as a `notes://<name>` resource.
|
|
18
34
|
|
|
19
|
-
|
|
35
|
+
### Frontmatter & tags
|
|
36
|
+
Notes may start with a YAML frontmatter block; `title` and `tags` (a list or comma-separated string) are recognized. Inline `#hashtags` in the body are also collected as tags.
|
|
20
37
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
38
|
+
```markdown
|
|
39
|
+
---
|
|
40
|
+
title: My Note
|
|
41
|
+
tags: [project, ideas]
|
|
42
|
+
---
|
|
43
|
+
# My Note
|
|
44
|
+
Links to [[another-note]]. Some inline #tag too.
|
|
26
45
|
```
|
|
27
46
|
|
|
28
47
|
## Configuration
|
|
29
48
|
|
|
30
|
-
|
|
49
|
+
All via environment variables:
|
|
50
|
+
|
|
51
|
+
| Variable | Default | Effect |
|
|
52
|
+
|----------|---------|--------|
|
|
53
|
+
| `NOTES_DIR` | `~/notes` | Directory where notes live (a leading `~` is expanded). |
|
|
54
|
+
| `NOTES_READONLY` | _unset_ | Set to `1` to disable all mutating tools (`create`/`append`/`delete`/`move` are not even registered) — safe for sharing a vault. |
|
|
55
|
+
| `NOTES_NO_CACHE` | _unset_ | Set to `1` to skip the on-disk index cache and rebuild in memory each start. |
|
|
56
|
+
| `NOTES_MODEL_DIR` | `~/.cache/mcp-notes/models` | Where the semantic-search embedding model is cached. |
|
|
57
|
+
|
|
58
|
+
### Semantic search & the embedding model
|
|
59
|
+
`semantic_search` runs the [all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2) model **locally** via WebAssembly ([onnxruntime-web](https://github.com/microsoft/onnxruntime)) — no API keys, no native dependencies, no data leaves your machine. The quantized model (~23 MB) is downloaded **once** on first use into `NOTES_MODEL_DIR` and cached; embeddings are stored in `<NOTES_DIR>/.notes-embeddings.json` and incrementally updated as notes change. The first `semantic_search` call needs network access for the download and embeds the whole vault; everything after that is offline and fast. Keyword search and all other tools work without ever triggering this.
|
|
60
|
+
|
|
61
|
+
### Index cache
|
|
62
|
+
For fast warm starts the server persists its search index to `<NOTES_DIR>/.notes-index.json` and, on startup, incrementally re-parses only the notes that changed (by mtime/size) since last run. The cache is rebuilt automatically if it's missing, unreadable, or from an older index version. Files on disk are always the source of truth.
|
|
63
|
+
|
|
64
|
+
## Security
|
|
65
|
+
|
|
66
|
+
All filesystem access is sandboxed to the notes directory:
|
|
67
|
+
- Path traversal (`../`) and absolute paths are rejected.
|
|
68
|
+
- Symlinks inside the vault that resolve outside it are rejected (realpath containment).
|
|
69
|
+
- Single files above a size limit are refused (DoS / context guard).
|
|
70
|
+
- Writes are atomic (temp file + rename), so a crash can't leave a torn note.
|
|
71
|
+
|
|
72
|
+
## Usage
|
|
73
|
+
|
|
74
|
+
### Claude Code
|
|
31
75
|
|
|
32
76
|
```bash
|
|
33
|
-
|
|
77
|
+
claude mcp add notes --env NOTES_DIR=$HOME/notes -- npx -y @abhishekmcp/notes
|
|
34
78
|
```
|
|
35
79
|
|
|
36
|
-
## Connecting to a client
|
|
37
|
-
|
|
38
80
|
### Claude Desktop
|
|
39
81
|
|
|
40
82
|
Add to `claude_desktop_config.json`:
|
|
@@ -43,38 +85,27 @@ Add to `claude_desktop_config.json`:
|
|
|
43
85
|
{
|
|
44
86
|
"mcpServers": {
|
|
45
87
|
"notes": {
|
|
46
|
-
"command": "
|
|
47
|
-
"args": ["
|
|
88
|
+
"command": "npx",
|
|
89
|
+
"args": ["-y", "@abhishekmcp/notes"],
|
|
48
90
|
"env": { "NOTES_DIR": "/absolute/path/to/your/notes" }
|
|
49
91
|
}
|
|
50
92
|
}
|
|
51
93
|
}
|
|
52
94
|
```
|
|
53
95
|
|
|
54
|
-
|
|
96
|
+
To share a vault read-only, add `"NOTES_READONLY": "1"` to `env`.
|
|
97
|
+
|
|
98
|
+
## Develop from source
|
|
55
99
|
|
|
56
100
|
```bash
|
|
57
|
-
|
|
101
|
+
npm install # from the repo root
|
|
102
|
+
npm run build -w servers/notes
|
|
103
|
+
node servers/notes/dist/index.js # NOTES_DIR=... to point at a vault
|
|
58
104
|
```
|
|
59
105
|
|
|
60
106
|
## Publishing to npm
|
|
61
107
|
|
|
62
|
-
|
|
63
|
-
release tagged `notes-v<version>` is created. See the repo root for the CD workflow.
|
|
64
|
-
|
|
65
|
-
Once published, users can run it without cloning:
|
|
66
|
-
|
|
67
|
-
```json
|
|
68
|
-
{
|
|
69
|
-
"mcpServers": {
|
|
70
|
-
"notes": {
|
|
71
|
-
"command": "npx",
|
|
72
|
-
"args": ["-y", "@abhishekmcp/notes"],
|
|
73
|
-
"env": { "NOTES_DIR": "/path/to/notes" }
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
```
|
|
108
|
+
Publishes automatically via GitHub Actions (Trusted Publishing / OIDC) when a release tagged `notes-v<version>` is created. See the repo root for the CD workflow.
|
|
78
109
|
|
|
79
110
|
## License
|
|
80
111
|
|
package/dist/config.d.ts
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/** Index cache format version — bump to force a full rebuild on upgrade. */
|
|
2
|
+
export declare const INDEX_VERSION = 1;
|
|
3
|
+
/** Cache file name kept inside the notes dir (excluded from notes). */
|
|
4
|
+
export declare const INDEX_FILENAME = ".notes-index.json";
|
|
5
|
+
/** Refuse to read/index any single note larger than this (DoS / context guard). */
|
|
6
|
+
export declare const MAX_FILE_BYTES: number;
|
|
7
|
+
/**
|
|
8
|
+
* Resolve the notes directory from NOTES_DIR, defaulting to ~/notes.
|
|
9
|
+
* A leading "~" is expanded to the home directory.
|
|
10
|
+
*/
|
|
11
|
+
export declare function getNotesDir(): string;
|
|
12
|
+
/** Absolute path to the persisted index cache. */
|
|
13
|
+
export declare function getIndexPath(): string;
|
|
14
|
+
/** When true, all mutating tools are disabled (safe for sharing a vault read-only). */
|
|
15
|
+
export declare function isReadOnly(): boolean;
|
|
16
|
+
/** When true, skip the on-disk index cache and rebuild in memory each start. */
|
|
17
|
+
export declare function cacheDisabled(): boolean;
|
|
18
|
+
/** Embedding model identity (recorded in the cache to invalidate on change). */
|
|
19
|
+
export declare const EMBED_MODEL_ID = "Xenova/all-MiniLM-L6-v2:quantized";
|
|
20
|
+
/** Embedding dimensionality of all-MiniLM-L6-v2. */
|
|
21
|
+
export declare const EMBED_DIM = 384;
|
|
22
|
+
/** Max WordPiece tokens fed to the model (longer notes are truncated). */
|
|
23
|
+
export declare const EMBED_MAX_TOKENS = 256;
|
|
24
|
+
/** Bump to force re-embedding of every note on upgrade. */
|
|
25
|
+
export declare const EMBED_CACHE_VERSION = 1;
|
|
26
|
+
/** Sidecar cache of per-note vectors, kept inside the notes dir. */
|
|
27
|
+
export declare const EMBEDDINGS_FILENAME = ".notes-embeddings.json";
|
|
28
|
+
/** Quantized ONNX model (~23 MB) — downloaded once at runtime. */
|
|
29
|
+
export declare const EMBED_MODEL_URL = "https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx/model_quantized.onnx";
|
|
30
|
+
/** BERT-uncased vocabulary (~232 KB) for the hand-rolled tokenizer. */
|
|
31
|
+
export declare const EMBED_VOCAB_URL = "https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/vocab.txt";
|
|
32
|
+
/** Local filenames for the cached artifacts. */
|
|
33
|
+
export declare const EMBED_MODEL_FILE = "all-MiniLM-L6-v2.quantized.onnx";
|
|
34
|
+
export declare const EMBED_VOCAB_FILE = "all-MiniLM-L6-v2.vocab.txt";
|
|
35
|
+
/**
|
|
36
|
+
* Directory where the embedding model + vocab are cached (downloaded once per
|
|
37
|
+
* machine). Override with NOTES_MODEL_DIR; defaults to ~/.cache/mcp-notes/models.
|
|
38
|
+
*/
|
|
39
|
+
export declare function getModelDir(): string;
|
|
40
|
+
/** Absolute path to the persisted embeddings cache (sidecar to the text index). */
|
|
41
|
+
export declare function getEmbeddingsPath(): string;
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { homedir } from "node:os";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
/** Index cache format version — bump to force a full rebuild on upgrade. */
|
|
4
|
+
export const INDEX_VERSION = 1;
|
|
5
|
+
/** Cache file name kept inside the notes dir (excluded from notes). */
|
|
6
|
+
export const INDEX_FILENAME = ".notes-index.json";
|
|
7
|
+
/** Refuse to read/index any single note larger than this (DoS / context guard). */
|
|
8
|
+
export const MAX_FILE_BYTES = 5 * 1024 * 1024; // 5 MB
|
|
9
|
+
/**
|
|
10
|
+
* Resolve the notes directory from NOTES_DIR, defaulting to ~/notes.
|
|
11
|
+
* A leading "~" is expanded to the home directory.
|
|
12
|
+
*/
|
|
13
|
+
export function getNotesDir() {
|
|
14
|
+
const raw = process.env.NOTES_DIR ?? path.join(homedir(), "notes");
|
|
15
|
+
const expanded = raw.startsWith("~")
|
|
16
|
+
? path.join(homedir(), raw.slice(1))
|
|
17
|
+
: raw;
|
|
18
|
+
return path.resolve(expanded);
|
|
19
|
+
}
|
|
20
|
+
/** Absolute path to the persisted index cache. */
|
|
21
|
+
export function getIndexPath() {
|
|
22
|
+
return path.join(getNotesDir(), INDEX_FILENAME);
|
|
23
|
+
}
|
|
24
|
+
/** When true, all mutating tools are disabled (safe for sharing a vault read-only). */
|
|
25
|
+
export function isReadOnly() {
|
|
26
|
+
return process.env.NOTES_READONLY === "1";
|
|
27
|
+
}
|
|
28
|
+
/** When true, skip the on-disk index cache and rebuild in memory each start. */
|
|
29
|
+
export function cacheDisabled() {
|
|
30
|
+
return process.env.NOTES_NO_CACHE === "1";
|
|
31
|
+
}
|
|
32
|
+
// --- Semantic search (v0.3) ----------------------------------------------
|
|
33
|
+
/** Embedding model identity (recorded in the cache to invalidate on change). */
|
|
34
|
+
export const EMBED_MODEL_ID = "Xenova/all-MiniLM-L6-v2:quantized";
|
|
35
|
+
/** Embedding dimensionality of all-MiniLM-L6-v2. */
|
|
36
|
+
export const EMBED_DIM = 384;
|
|
37
|
+
/** Max WordPiece tokens fed to the model (longer notes are truncated). */
|
|
38
|
+
export const EMBED_MAX_TOKENS = 256;
|
|
39
|
+
/** Bump to force re-embedding of every note on upgrade. */
|
|
40
|
+
export const EMBED_CACHE_VERSION = 1;
|
|
41
|
+
/** Sidecar cache of per-note vectors, kept inside the notes dir. */
|
|
42
|
+
export const EMBEDDINGS_FILENAME = ".notes-embeddings.json";
|
|
43
|
+
const HF_BASE = "https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main";
|
|
44
|
+
/** Quantized ONNX model (~23 MB) — downloaded once at runtime. */
|
|
45
|
+
export const EMBED_MODEL_URL = `${HF_BASE}/onnx/model_quantized.onnx`;
|
|
46
|
+
/** BERT-uncased vocabulary (~232 KB) for the hand-rolled tokenizer. */
|
|
47
|
+
export const EMBED_VOCAB_URL = `${HF_BASE}/vocab.txt`;
|
|
48
|
+
/** Local filenames for the cached artifacts. */
|
|
49
|
+
export const EMBED_MODEL_FILE = "all-MiniLM-L6-v2.quantized.onnx";
|
|
50
|
+
export const EMBED_VOCAB_FILE = "all-MiniLM-L6-v2.vocab.txt";
|
|
51
|
+
/**
|
|
52
|
+
* Directory where the embedding model + vocab are cached (downloaded once per
|
|
53
|
+
* machine). Override with NOTES_MODEL_DIR; defaults to ~/.cache/mcp-notes/models.
|
|
54
|
+
*/
|
|
55
|
+
export function getModelDir() {
|
|
56
|
+
const override = process.env.NOTES_MODEL_DIR;
|
|
57
|
+
if (override) {
|
|
58
|
+
const expanded = override.startsWith("~") ? path.join(homedir(), override.slice(1)) : override;
|
|
59
|
+
return path.resolve(expanded);
|
|
60
|
+
}
|
|
61
|
+
return path.join(homedir(), ".cache", "mcp-notes", "models");
|
|
62
|
+
}
|
|
63
|
+
/** Absolute path to the persisted embeddings cache (sidecar to the text index). */
|
|
64
|
+
export function getEmbeddingsPath() {
|
|
65
|
+
return path.join(getNotesDir(), EMBEDDINGS_FILENAME);
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAC;AAClC,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,4EAA4E;AAC5E,MAAM,CAAC,MAAM,aAAa,GAAG,CAAC,CAAC;AAE/B,uEAAuE;AACvE,MAAM,CAAC,MAAM,cAAc,GAAG,mBAAmB,CAAC;AAElD,mFAAmF;AACnF,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,OAAO;AAEtD;;;GAGG;AACH,MAAM,UAAU,WAAW;IACzB,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,SAAS,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,OAAO,CAAC,CAAC;IACnE,MAAM,QAAQ,GAAG,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC;QAClC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACpC,CAAC,CAAC,GAAG,CAAC;IACR,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;AAChC,CAAC;AAED,kDAAkD;AAClD,MAAM,UAAU,YAAY;IAC1B,OAAO,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,cAAc,CAAC,CAAC;AAClD,CAAC;AAED,uFAAuF;AACvF,MAAM,UAAU,UAAU;IACxB,OAAO,OAAO,CAAC,GAAG,CAAC,cAAc,KAAK,GAAG,CAAC;AAC5C,CAAC;AAED,gFAAgF;AAChF,MAAM,UAAU,aAAa;IAC3B,OAAO,OAAO,CAAC,GAAG,CAAC,cAAc,KAAK,GAAG,CAAC;AAC5C,CAAC;AAED,4EAA4E;AAE5E,gFAAgF;AAChF,MAAM,CAAC,MAAM,cAAc,GAAG,mCAAmC,CAAC;AAClE,oDAAoD;AACpD,MAAM,CAAC,MAAM,SAAS,GAAG,GAAG,CAAC;AAC7B,0EAA0E;AAC1E,MAAM,CAAC,MAAM,gBAAgB,GAAG,GAAG,CAAC;AACpC,2DAA2D;AAC3D,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAAC,CAAC;AACrC,oEAAoE;AACpE,MAAM,CAAC,MAAM,mBAAmB,GAAG,wBAAwB,CAAC;AAE5D,MAAM,OAAO,GAAG,6DAA6D,CAAC;AAC9E,kEAAkE;AAClE,MAAM,CAAC,MAAM,eAAe,GAAG,GAAG,OAAO,4BAA4B,CAAC;AACtE,uEAAuE;AACvE,MAAM,CAAC,MAAM,eAAe,GAAG,GAAG,OAAO,YAAY,CAAC;AACtD,gDAAgD;AAChD,MAAM,CAAC,MAAM,gBAAgB,GAAG,iCAAiC,CAAC;AAClE,MAAM,CAAC,MAAM,gBAAgB,GAAG,4BAA4B,CAAC;AAE7D;;;GAGG;AACH,MAAM,UAAU,WAAW;IACzB,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,CAAC;IAC7C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,QAAQ,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC/F,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;IAChC,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,QAAQ,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;AAC/D,CAAC;AAED,mFAAmF;AACnF,MAAM,UAAU,iBAAiB;IAC/B,OAAO,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,mBAAmB,CAAC,CAAC;AACvD,CAAC"}
|
package/dist/embed.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/** Idempotent, concurrency-safe lazy initialization. */
|
|
2
|
+
export declare function ensureModel(): Promise<void>;
|
|
3
|
+
/** True once the model has been downloaded + loaded. */
|
|
4
|
+
export declare function isReady(): boolean;
|
|
5
|
+
/**
|
|
6
|
+
* Embed a single text into an L2-normalized 384-dim vector (cosine == dot
|
|
7
|
+
* product). Mean-pools the model's last_hidden_state over the attention mask.
|
|
8
|
+
*/
|
|
9
|
+
export declare function embed(text: string): Promise<Float32Array>;
|
package/dist/embed.js
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lazy embedding engine: runs all-MiniLM-L6-v2 (ONNX) on onnxruntime-web (WASM,
|
|
3
|
+
* no native deps). The model + vocab are downloaded once to a persistent cache
|
|
4
|
+
* on first use; nothing here runs at server startup. `onnxruntime-web` is
|
|
5
|
+
* dynamically imported so a server that never does semantic search never loads
|
|
6
|
+
* the WASM runtime.
|
|
7
|
+
*/
|
|
8
|
+
import { promises as fs } from "node:fs";
|
|
9
|
+
import { createRequire } from "node:module";
|
|
10
|
+
import path from "node:path";
|
|
11
|
+
import { EMBED_DIM, EMBED_MAX_TOKENS, EMBED_MODEL_FILE, EMBED_MODEL_URL, EMBED_VOCAB_FILE, EMBED_VOCAB_URL, getModelDir, } from "./config.js";
|
|
12
|
+
import { WordPieceTokenizer } from "./tokenizer.js";
|
|
13
|
+
let session = null;
|
|
14
|
+
let tokenizer = null;
|
|
15
|
+
let initPromise = null;
|
|
16
|
+
/** Download a URL to `dest` atomically (temp + rename), retrying on 429/5xx. */
|
|
17
|
+
async function download(url, dest) {
|
|
18
|
+
let lastErr;
|
|
19
|
+
for (let attempt = 0; attempt < 4; attempt++) {
|
|
20
|
+
try {
|
|
21
|
+
const res = await fetch(url);
|
|
22
|
+
if (res.status === 429 || res.status >= 500) {
|
|
23
|
+
throw new Error(`HTTP ${res.status} fetching ${url}`);
|
|
24
|
+
}
|
|
25
|
+
if (!res.ok)
|
|
26
|
+
throw new Error(`HTTP ${res.status} fetching ${url}`);
|
|
27
|
+
const buf = Buffer.from(await res.arrayBuffer());
|
|
28
|
+
const tmp = `${dest}.${process.pid}.tmp`;
|
|
29
|
+
await fs.writeFile(tmp, buf);
|
|
30
|
+
await fs.rename(tmp, dest);
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
catch (err) {
|
|
34
|
+
lastErr = err;
|
|
35
|
+
await new Promise((r) => setTimeout(r, 500 * 2 ** attempt)); // backoff
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
throw new Error(`Failed to download ${url}: ${lastErr?.message ?? lastErr}`);
|
|
39
|
+
}
|
|
40
|
+
/** Ensure a cached file exists, downloading it if missing. */
|
|
41
|
+
async function ensureFile(url, dest) {
|
|
42
|
+
try {
|
|
43
|
+
await fs.access(dest);
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
await fs.mkdir(path.dirname(dest), { recursive: true });
|
|
47
|
+
await download(url, dest);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
/** Download artifacts (once) and build the tokenizer + WASM inference session. */
|
|
51
|
+
async function init() {
|
|
52
|
+
const dir = getModelDir();
|
|
53
|
+
const modelPath = path.join(dir, EMBED_MODEL_FILE);
|
|
54
|
+
const vocabPath = path.join(dir, EMBED_VOCAB_FILE);
|
|
55
|
+
await ensureFile(EMBED_VOCAB_URL, vocabPath);
|
|
56
|
+
await ensureFile(EMBED_MODEL_URL, modelPath);
|
|
57
|
+
tokenizer = new WordPieceTokenizer(await fs.readFile(vocabPath, "utf8"));
|
|
58
|
+
const ort = await import("onnxruntime-web");
|
|
59
|
+
ort.env.wasm.numThreads = 1; // single-thread: no SharedArrayBuffer / worker isolation needed
|
|
60
|
+
// Best-effort: point the WASM loader at the .wasm shipped in node_modules.
|
|
61
|
+
// (onnxruntime-web self-resolves from its own module URL when this isn't set.)
|
|
62
|
+
try {
|
|
63
|
+
const require = createRequire(import.meta.url);
|
|
64
|
+
ort.env.wasm.wasmPaths = path.dirname(require.resolve("onnxruntime-web")) + path.sep;
|
|
65
|
+
}
|
|
66
|
+
catch {
|
|
67
|
+
/* fall back to onnxruntime-web's own resolution */
|
|
68
|
+
}
|
|
69
|
+
const modelBytes = new Uint8Array(await fs.readFile(modelPath));
|
|
70
|
+
session = await ort.InferenceSession.create(modelBytes, { executionProviders: ["wasm"] });
|
|
71
|
+
}
|
|
72
|
+
/** Idempotent, concurrency-safe lazy initialization. */
|
|
73
|
+
export async function ensureModel() {
|
|
74
|
+
if (session && tokenizer)
|
|
75
|
+
return;
|
|
76
|
+
if (!initPromise) {
|
|
77
|
+
initPromise = init().catch((err) => {
|
|
78
|
+
initPromise = null; // allow retry on a later call
|
|
79
|
+
throw err;
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
await initPromise;
|
|
83
|
+
}
|
|
84
|
+
/** True once the model has been downloaded + loaded. */
|
|
85
|
+
export function isReady() {
|
|
86
|
+
return session !== null && tokenizer !== null;
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Embed a single text into an L2-normalized 384-dim vector (cosine == dot
|
|
90
|
+
* product). Mean-pools the model's last_hidden_state over the attention mask.
|
|
91
|
+
*/
|
|
92
|
+
export async function embed(text) {
|
|
93
|
+
await ensureModel();
|
|
94
|
+
const ort = await import("onnxruntime-web");
|
|
95
|
+
const tok = tokenizer.encode(text, EMBED_MAX_TOKENS);
|
|
96
|
+
const seq = tok.inputIds.length;
|
|
97
|
+
const dims = [1, seq];
|
|
98
|
+
const feeds = {
|
|
99
|
+
input_ids: new ort.Tensor("int64", BigInt64Array.from(tok.inputIds, BigInt), dims),
|
|
100
|
+
attention_mask: new ort.Tensor("int64", BigInt64Array.from(tok.attentionMask, BigInt), dims),
|
|
101
|
+
};
|
|
102
|
+
// Some exports require token_type_ids (all-zero for a single segment).
|
|
103
|
+
if (session.inputNames.includes("token_type_ids")) {
|
|
104
|
+
feeds.token_type_ids = new ort.Tensor("int64", new BigInt64Array(seq), dims);
|
|
105
|
+
}
|
|
106
|
+
const results = await session.run(feeds);
|
|
107
|
+
const outName = session.outputNames.includes("last_hidden_state")
|
|
108
|
+
? "last_hidden_state"
|
|
109
|
+
: session.outputNames[0];
|
|
110
|
+
const data = results[outName].data; // [1, seq, EMBED_DIM]
|
|
111
|
+
// Mean-pool over tokens weighted by the attention mask, then L2-normalize.
|
|
112
|
+
const out = new Float32Array(EMBED_DIM);
|
|
113
|
+
let maskSum = 0;
|
|
114
|
+
for (let t = 0; t < seq; t++) {
|
|
115
|
+
const m = tok.attentionMask[t];
|
|
116
|
+
if (!m)
|
|
117
|
+
continue;
|
|
118
|
+
maskSum += m;
|
|
119
|
+
const base = t * EMBED_DIM;
|
|
120
|
+
for (let d = 0; d < EMBED_DIM; d++)
|
|
121
|
+
out[d] += data[base + d] * m;
|
|
122
|
+
}
|
|
123
|
+
const denom = maskSum || 1;
|
|
124
|
+
let norm = 0;
|
|
125
|
+
for (let d = 0; d < EMBED_DIM; d++) {
|
|
126
|
+
out[d] /= denom;
|
|
127
|
+
norm += out[d] * out[d];
|
|
128
|
+
}
|
|
129
|
+
norm = Math.sqrt(norm) || 1;
|
|
130
|
+
for (let d = 0; d < EMBED_DIM; d++)
|
|
131
|
+
out[d] /= norm;
|
|
132
|
+
return out;
|
|
133
|
+
}
|
|
134
|
+
//# sourceMappingURL=embed.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embed.js","sourceRoot":"","sources":["../src/embed.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC5C,OAAO,IAAI,MAAM,WAAW,CAAC;AAE7B,OAAO,EACL,SAAS,EACT,gBAAgB,EAChB,gBAAgB,EAChB,eAAe,EACf,gBAAgB,EAChB,eAAe,EACf,WAAW,GACZ,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAC;AAEpD,IAAI,OAAO,GAAkC,IAAI,CAAC;AAClD,IAAI,SAAS,GAA8B,IAAI,CAAC;AAChD,IAAI,WAAW,GAAyB,IAAI,CAAC;AAE7C,gFAAgF;AAChF,KAAK,UAAU,QAAQ,CAAC,GAAW,EAAE,IAAY;IAC/C,IAAI,OAAgB,CAAC;IACrB,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;QAC7C,IAAI,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;YAC7B,IAAI,GAAG,CAAC,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,MAAM,IAAI,GAAG,EAAE,CAAC;gBAC5C,MAAM,IAAI,KAAK,CAAC,QAAQ,GAAG,CAAC,MAAM,aAAa,GAAG,EAAE,CAAC,CAAC;YACxD,CAAC;YACD,IAAI,CAAC,GAAG,CAAC,EAAE;gBAAE,MAAM,IAAI,KAAK,CAAC,QAAQ,GAAG,CAAC,MAAM,aAAa,GAAG,EAAE,CAAC,CAAC;YACnE,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC;YACjD,MAAM,GAAG,GAAG,GAAG,IAAI,IAAI,OAAO,CAAC,GAAG,MAAM,CAAC;YACzC,MAAM,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;YAC7B,MAAM,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;YAC3B,OAAO;QACT,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,GAAG,GAAG,CAAC;YACd,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,GAAG,GAAG,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU;QACzE,CAAC;IACH,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,sBAAsB,GAAG,KAAM,OAAiB,EAAE,OAAO,IAAI,OAAO,EAAE,CAAC,CAAC;AAC1F,CAAC;AAED,8DAA8D;AAC9D,KAAK,UAAU,UAAU,CAAC,GAAW,EAAE,IAAY;IACjD,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QACxD,MAAM,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;IAC5B,CAAC;AACH,CAAC;AAED,kFAAkF;AAClF,KAAK,UAAU,IAAI;IACjB,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;IAC1B,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC;IACnD,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC;IACnD,MAAM,UAAU,CAAC,eAAe,EAAE,SAAS,CAAC,CAAC;IAC7C,MAAM,UAAU,CAAC,eAAe,EAAE,SAAS,CAAC,CAAC;IAE7C,SAAS,GAAG,IAAI,kBAAkB,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC;IAEzE,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;IAC5C,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,gEAAgE;IAC7F,2EAA2E;IAC3E,+EAA+E;IAC/E,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/C,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,iBAAiB,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC;IACvF,CAAC;IAAC,MAAM,CAAC;QACP,mDAAmD;IACrD,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;IAChE,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,UAAU,EAAE,EAAE,kBAAkB,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;AAC5F,CAAC;AAED,wDAAwD;AACxD,MAAM,CAAC,KAAK,UAAU,WAAW;IAC/B,IAAI,OAAO,IAAI,SAAS;QAAE,OAAO;IACjC,IAAI,CAAC,WAAW,EAAE,CAAC;QACjB,WAAW,GAAG,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;YACjC,WAAW,GAAG,IAAI,CAAC,CAAC,8BAA8B;YAClD,MAAM,GAAG,CAAC;QACZ,CAAC,CAAC,CAAC;IACL,CAAC;IACD,MAAM,WAAW,CAAC;AACpB,CAAC;AAED,wDAAwD;AACxD,MAAM,UAAU,OAAO;IACrB,OAAO,OAAO,KAAK,IAAI,IAAI,SAAS,KAAK,IAAI,CAAC;AAChD,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAY;IACtC,MAAM,WAAW,EAAE,CAAC;IACpB,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;IAC5C,MAAM,GAAG,GAAG,SAAU,CAAC,MAAM,CAAC,IAAI,EAAE,gBAAgB,CAAC,CAAC;IACtD,MAAM,GAAG,GAAG,GAAG,CAAC,QAAQ,CAAC,MAAM,CAAC;IAChC,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAEtB,MAAM,KAAK,GAAiC;QAC1C,SAAS,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,IAAI,CAAC;QAClF,cAAc,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,EAAE,IAAI,CAAC;KAC7F,CAAC;IACF,uEAAuE;IACvE,IAAI,OAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;QACnD,KAAK,CAAC,cAAc,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,aAAa,CAAC,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC;IAC/E,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAC1C,MAAM,OAAO,GAAG,OAAQ,CAAC,WAAW,CAAC,QAAQ,CAAC,mBAAmB,CAAC;QAChE,CAAC,CAAC,mBAAmB;QACrB,CAAC,CAAC,OAAQ,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;IAC5B,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,IAAoB,CAAC,CAAC,sBAAsB;IAE1E,2EAA2E;IAC3E,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7B,MAAM,CAAC,GAAG,GAAG,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,CAAC,CAAC;YAAE,SAAS;QACjB,OAAO,IAAI,CAAC,CAAC;QACb,MAAM,IAAI,GAAG,CAAC,GAAG,SAAS,CAAC;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;IACnE,CAAC;IACD,MAAM,KAAK,GAAG,OAAO,IAAI,CAAC,CAAC;IAC3B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,GAAG,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC;QAChB,IAAI,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IAC1B,CAAC;IACD,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;QAAE,GAAG,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IACnD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
package/dist/fsutil.d.ts
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Validate a user-supplied note name. Rejects control characters, absolute
|
|
3
|
+
* paths, and absurdly long names. Returns the name with a trailing ".md".
|
|
4
|
+
*/
|
|
5
|
+
export declare function validateName(name: string): string;
|
|
6
|
+
/**
|
|
7
|
+
* Resolve a note name to a safe absolute path inside the notes dir, checking
|
|
8
|
+
* both lexical traversal (`../`) and symlink escapes. Async because the symlink
|
|
9
|
+
* check touches the filesystem.
|
|
10
|
+
*/
|
|
11
|
+
export declare function resolveSafe(name: string): Promise<string>;
|
|
12
|
+
/** Read a note's raw text, guarding against oversized files. */
|
|
13
|
+
export declare function readRaw(absPath: string): Promise<string>;
|
|
14
|
+
/** Atomically write a file: write to a temp sibling, then rename into place. */
|
|
15
|
+
export declare function atomicWrite(absPath: string, content: string): Promise<void>;
|
|
16
|
+
export interface NoteFile {
|
|
17
|
+
name: string;
|
|
18
|
+
fullPath: string;
|
|
19
|
+
size: number;
|
|
20
|
+
mtimeMs: number;
|
|
21
|
+
}
|
|
22
|
+
/** Recursively list markdown note files, skipping dotfiles and the index cache. */
|
|
23
|
+
export declare function listNoteFiles(): Promise<NoteFile[]>;
|
package/dist/fsutil.js
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { promises as fs } from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { getNotesDir, INDEX_FILENAME, MAX_FILE_BYTES } from "./config.js";
|
|
4
|
+
/** Matches ASCII control characters (0x00–0x1f and DEL 0x7f). */
|
|
5
|
+
const CONTROL_CHARS = /[\u0000-\u001f\u007f]/;
|
|
6
|
+
/**
|
|
7
|
+
* Validate a user-supplied note name. Rejects control characters, absolute
|
|
8
|
+
* paths, and absurdly long names. Returns the name with a trailing ".md".
|
|
9
|
+
*/
|
|
10
|
+
export function validateName(name) {
|
|
11
|
+
if (typeof name !== "string" || name.length === 0) {
|
|
12
|
+
throw new Error("Note name must be a non-empty string.");
|
|
13
|
+
}
|
|
14
|
+
if (name.length > 512) {
|
|
15
|
+
throw new Error("Note name is too long (max 512 chars).");
|
|
16
|
+
}
|
|
17
|
+
if (CONTROL_CHARS.test(name)) {
|
|
18
|
+
throw new Error("Note name contains control characters.");
|
|
19
|
+
}
|
|
20
|
+
if (path.isAbsolute(name)) {
|
|
21
|
+
throw new Error("Note name must be relative, not an absolute path.");
|
|
22
|
+
}
|
|
23
|
+
return name.endsWith(".md") ? name : `${name}.md`;
|
|
24
|
+
}
|
|
25
|
+
/** Lexical containment check: does the resolved path stay inside the notes dir? */
|
|
26
|
+
function lexicallyInside(dir, resolved) {
|
|
27
|
+
const rel = path.relative(dir, resolved);
|
|
28
|
+
return rel === "" ? false : !rel.startsWith("..") && !path.isAbsolute(rel);
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Verify, following symlinks, that `absPath` resolves inside the notes dir.
|
|
32
|
+
* Walks up to the nearest existing ancestor (so it works for files that don't
|
|
33
|
+
* exist yet), realpaths it, then re-appends the non-existing suffix lexically.
|
|
34
|
+
* Defeats symlinks placed *inside* the notes dir that point outside it.
|
|
35
|
+
*/
|
|
36
|
+
async function realpathInside(absPath) {
|
|
37
|
+
const root = await fs.realpath(getNotesDir());
|
|
38
|
+
let cur = absPath;
|
|
39
|
+
// Walk up until we hit a path that exists on disk.
|
|
40
|
+
for (;;) {
|
|
41
|
+
try {
|
|
42
|
+
const realCur = await fs.realpath(cur);
|
|
43
|
+
const suffix = path.relative(cur, absPath); // "" when cur === absPath
|
|
44
|
+
const finalReal = suffix ? path.join(realCur, suffix) : realCur;
|
|
45
|
+
const rel = path.relative(root, finalReal);
|
|
46
|
+
return rel === "" || (!rel.startsWith("..") && !path.isAbsolute(rel));
|
|
47
|
+
}
|
|
48
|
+
catch (err) {
|
|
49
|
+
if (err.code !== "ENOENT")
|
|
50
|
+
throw err;
|
|
51
|
+
const parent = path.dirname(cur);
|
|
52
|
+
if (parent === cur)
|
|
53
|
+
return false; // reached filesystem root
|
|
54
|
+
cur = parent;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Resolve a note name to a safe absolute path inside the notes dir, checking
|
|
60
|
+
* both lexical traversal (`../`) and symlink escapes. Async because the symlink
|
|
61
|
+
* check touches the filesystem.
|
|
62
|
+
*/
|
|
63
|
+
export async function resolveSafe(name) {
|
|
64
|
+
const dir = getNotesDir();
|
|
65
|
+
const resolved = path.resolve(dir, validateName(name));
|
|
66
|
+
if (!lexicallyInside(dir, resolved)) {
|
|
67
|
+
throw new Error(`Refusing to access "${name}": path escapes the notes directory.`);
|
|
68
|
+
}
|
|
69
|
+
if (!(await realpathInside(resolved))) {
|
|
70
|
+
throw new Error(`Refusing to access "${name}": resolves (via symlink) outside the notes directory.`);
|
|
71
|
+
}
|
|
72
|
+
return resolved;
|
|
73
|
+
}
|
|
74
|
+
/** Read a note's raw text, guarding against oversized files. */
|
|
75
|
+
export async function readRaw(absPath) {
|
|
76
|
+
const stat = await fs.stat(absPath);
|
|
77
|
+
if (stat.size > MAX_FILE_BYTES) {
|
|
78
|
+
throw new Error(`Note is too large to read (${stat.size} bytes > ${MAX_FILE_BYTES} limit).`);
|
|
79
|
+
}
|
|
80
|
+
return fs.readFile(absPath, "utf8");
|
|
81
|
+
}
|
|
82
|
+
/** Atomically write a file: write to a temp sibling, then rename into place. */
|
|
83
|
+
export async function atomicWrite(absPath, content) {
|
|
84
|
+
await fs.mkdir(path.dirname(absPath), { recursive: true });
|
|
85
|
+
const tmp = `${absPath}.${process.pid}.tmp`;
|
|
86
|
+
await fs.writeFile(tmp, content, "utf8");
|
|
87
|
+
await fs.rename(tmp, absPath);
|
|
88
|
+
}
|
|
89
|
+
/** Recursively list markdown note files, skipping dotfiles and the index cache. */
|
|
90
|
+
export async function listNoteFiles() {
|
|
91
|
+
const dir = getNotesDir();
|
|
92
|
+
await fs.mkdir(dir, { recursive: true });
|
|
93
|
+
const out = [];
|
|
94
|
+
async function walk(current) {
|
|
95
|
+
const entries = await fs.readdir(current, { withFileTypes: true });
|
|
96
|
+
for (const entry of entries) {
|
|
97
|
+
if (entry.name.startsWith(".") || entry.name === INDEX_FILENAME)
|
|
98
|
+
continue; // skip dotfiles/cache
|
|
99
|
+
const full = path.join(current, entry.name);
|
|
100
|
+
if (entry.isDirectory()) {
|
|
101
|
+
await walk(full);
|
|
102
|
+
}
|
|
103
|
+
else if (entry.isFile() && entry.name.endsWith(".md")) {
|
|
104
|
+
const stat = await fs.stat(full);
|
|
105
|
+
const rel = path.relative(dir, full).replace(/\.md$/, "").split(path.sep).join("/");
|
|
106
|
+
out.push({ name: rel, fullPath: full, size: stat.size, mtimeMs: stat.mtimeMs });
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
await walk(dir);
|
|
111
|
+
return out;
|
|
112
|
+
}
|
|
113
|
+
//# sourceMappingURL=fsutil.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fsutil.js","sourceRoot":"","sources":["../src/fsutil.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,aAAa,CAAC;AAE1E,iEAAiE;AACjE,MAAM,aAAa,GAAG,uBAAuB,CAAC;AAE9C;;;GAGG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAClD,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;IAC3D,CAAC;IACD,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;IAC5D,CAAC;IACD,IAAI,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;IAC5D,CAAC;IACD,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;IACvE,CAAC;IACD,OAAO,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,KAAK,CAAC;AACpD,CAAC;AAED,mFAAmF;AACnF,SAAS,eAAe,CAAC,GAAW,EAAE,QAAgB;IACpD,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IACzC,OAAO,GAAG,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;AAC7E,CAAC;AAED;;;;;GAKG;AACH,KAAK,UAAU,cAAc,CAAC,OAAe;IAC3C,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC,CAAC;IAC9C,IAAI,GAAG,GAAG,OAAO,CAAC;IAClB,mDAAmD;IACnD,SAAS,CAAC;QACR,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YACvC,MAAM,MAAM,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC,CAAC,0BAA0B;YACtE,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YAChE,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC3C,OAAO,GAAG,KAAK,EAAE,IAAI,CAAC,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;QACxE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,IAAK,GAA6B,CAAC,IAAI,KAAK,QAAQ;gBAAE,MAAM,GAAG,CAAC;YAChE,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACjC,IAAI,MAAM,KAAK,GAAG;gBAAE,OAAO,KAAK,CAAC,CAAC,0BAA0B;YAC5D,GAAG,GAAG,MAAM,CAAC;QACf,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,IAAY;IAC5C,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;IAC1B,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;IACvD,IAAI,CAAC,eAAe,CAAC,GAAG,EAAE,QAAQ,CAAC,EAAE,CAAC;QACpC,MAAM,IAAI,KAAK,CAAC,uBAAuB,IAAI,sCAAsC,CAAC,CAAC;IACrF,CAAC;IACD,IAAI,CAAC,CAAC,MAAM,cAAc,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,KAAK,CAAC,uBAAuB,IAAI,wDAAwD,CAAC,CAAC;IACvG,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,gEAAgE;AAChE,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,OAAe;IAC3C,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACpC,IAAI,IAAI,CAAC,IAAI,GAAG,cAAc,EAAE,CAAC;QAC/B,MAAM,IAAI,KAAK,CAAC,8BAA8B,IAAI,CAAC,IAAI,YAAY,cAAc,UAAU,CAAC,CAAC;IAC/F,CAAC;IACD,OAAO,EAAE,CAAC,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;AACtC,CAAC;AAED,gFAAgF;AAChF,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,OAAe,EAAE,OAAe;IAChE,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3D,MAAM,GAAG,GAAG,GAAG,OAAO,IAAI,OAAO,CAAC,GAAG,MAAM,CAAC;IAC5C,MAAM,EAAE,CAAC,SAAS,CAAC,GAAG,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,EAAE,CAAC,MAAM,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;AAChC,CAAC;AASD,mFAAmF;AACnF,MAAM,CAAC,KAAK,UAAU,aAAa;IACjC,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC;IAC1B,MAAM,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACzC,MAAM,GAAG,GAAe,EAAE,CAAC;IAE3B,KAAK,UAAU,IAAI,CAAC,OAAe;QACjC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QACnE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,IAAI,KAAK,cAAc;gBAAE,SAAS,CAAC,sBAAsB;YACjG,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5C,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,MAAM,IAAI,CAAC,IAAI,CAAC,CAAC;YACnB,CAAC;iBAAM,IAAI,KAAK,CAAC,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACxD,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBACjC,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACpF,GAAG,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;YAClF,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,IAAI,CAAC,GAAG,CAAC,CAAC;IAChB,OAAO,GAAG,CAAC;AACb,CAAC"}
|
package/dist/graph.d.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
export interface NodeRef {
|
|
2
|
+
name: string;
|
|
3
|
+
title: string;
|
|
4
|
+
}
|
|
5
|
+
/** Notes that link to `name` via [[wiki-link]]. */
|
|
6
|
+
export declare function getBacklinks(name: string): NodeRef[];
|
|
7
|
+
export interface Neighbor extends NodeRef {
|
|
8
|
+
distance: number;
|
|
9
|
+
}
|
|
10
|
+
/** BFS over the undirected link graph up to `depth`, capped at `limit` nodes. */
|
|
11
|
+
export declare function getNeighbors(name: string, depth?: number, limit?: number): Neighbor[];
|
|
12
|
+
/** Shortest wiki-link chain between two notes (BFS, undirected). Null if none. */
|
|
13
|
+
export declare function findPath(a: string, b: string): NodeRef[] | null;
|
|
14
|
+
export interface RelatedNote extends NodeRef {
|
|
15
|
+
score: number;
|
|
16
|
+
sharedLinks: number;
|
|
17
|
+
sharedTags: number;
|
|
18
|
+
}
|
|
19
|
+
/** Rank other notes by shared out-links + shared tags with `name`. */
|
|
20
|
+
export declare function relatedNotes(name: string, limit?: number): RelatedNote[];
|
|
21
|
+
export interface GraphOverview {
|
|
22
|
+
notes: number;
|
|
23
|
+
links: number;
|
|
24
|
+
tags: number;
|
|
25
|
+
brokenLinks: number;
|
|
26
|
+
orphans: NodeRef[];
|
|
27
|
+
hubs: Array<NodeRef & {
|
|
28
|
+
degree: number;
|
|
29
|
+
}>;
|
|
30
|
+
}
|
|
31
|
+
/** Aggregate graph health: counts, top hubs, orphans. */
|
|
32
|
+
export declare function graphOverview(hubLimit?: number, orphanLimit?: number): GraphOverview;
|
|
33
|
+
export interface BrokenLink {
|
|
34
|
+
from: string;
|
|
35
|
+
target: string;
|
|
36
|
+
}
|
|
37
|
+
/** Wiki-links that point at notes which don't exist. */
|
|
38
|
+
export declare function brokenLinks(): BrokenLink[];
|