daftari 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +96 -0
  2. package/README.md +74 -25
  3. package/dist/index.d.ts.map +1 -1
  4. package/dist/index.js +85 -2
  5. package/dist/index.js.map +1 -1
  6. package/dist/search/bm25.d.ts +1 -17
  7. package/dist/search/bm25.d.ts.map +1 -1
  8. package/dist/search/bm25.js +43 -65
  9. package/dist/search/bm25.js.map +1 -1
  10. package/dist/search/embedding-provider.d.ts +8 -0
  11. package/dist/search/embedding-provider.d.ts.map +1 -0
  12. package/dist/search/embedding-provider.js +26 -0
  13. package/dist/search/embedding-provider.js.map +1 -0
  14. package/dist/search/hybrid.d.ts.map +1 -1
  15. package/dist/search/hybrid.js +106 -34
  16. package/dist/search/hybrid.js.map +1 -1
  17. package/dist/search/index-state.d.ts +10 -0
  18. package/dist/search/index-state.d.ts.map +1 -1
  19. package/dist/search/index-state.js +58 -3
  20. package/dist/search/index-state.js.map +1 -1
  21. package/dist/search/providers/local-minilm.d.ts +7 -0
  22. package/dist/search/providers/local-minilm.d.ts.map +1 -0
  23. package/dist/search/providers/local-minilm.js +114 -0
  24. package/dist/search/providers/local-minilm.js.map +1 -0
  25. package/dist/search/providers/openai-3-small.d.ts +5 -0
  26. package/dist/search/providers/openai-3-small.d.ts.map +1 -0
  27. package/dist/search/providers/openai-3-small.js +174 -0
  28. package/dist/search/providers/openai-3-small.js.map +1 -0
  29. package/dist/search/reindex.d.ts.map +1 -1
  30. package/dist/search/reindex.js +63 -13
  31. package/dist/search/reindex.js.map +1 -1
  32. package/dist/search/self-write.d.ts +4 -0
  33. package/dist/search/self-write.d.ts.map +1 -0
  34. package/dist/search/self-write.js +62 -0
  35. package/dist/search/self-write.js.map +1 -0
  36. package/dist/search/vector.d.ts +10 -1
  37. package/dist/search/vector.d.ts.map +1 -1
  38. package/dist/search/vector.js +102 -59
  39. package/dist/search/vector.js.map +1 -1
  40. package/dist/search/watcher.d.ts +18 -0
  41. package/dist/search/watcher.d.ts.map +1 -0
  42. package/dist/search/watcher.js +300 -0
  43. package/dist/search/watcher.js.map +1 -0
  44. package/dist/storage/index-db.d.ts +6 -4
  45. package/dist/storage/index-db.d.ts.map +1 -1
  46. package/dist/storage/index-db.js +262 -39
  47. package/dist/storage/index-db.js.map +1 -1
  48. package/dist/tools/search.d.ts.map +1 -1
  49. package/dist/tools/search.js +11 -3
  50. package/dist/tools/search.js.map +1 -1
  51. package/dist/tools/write.d.ts.map +1 -1
  52. package/dist/tools/write.js +9 -0
  53. package/dist/tools/write.js.map +1 -1
  54. package/dist/utils/config.d.ts +5 -0
  55. package/dist/utils/config.d.ts.map +1 -1
  56. package/dist/utils/config.js +53 -0
  57. package/dist/utils/config.js.map +1 -1
  58. package/package.json +4 -2
package/CHANGELOG.md CHANGED
@@ -7,6 +7,102 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [1.9.0] - 2026-05-21
11
+
12
+ ### Added
13
+
14
+ - **fs.watch reactive indexing** (#38, PR 3 of 5). The server now keeps the
15
+ search index in sync with the markdown files at write time, not just at
16
+ startup. A chokidar watcher runs over the vault root after the MCP
17
+ transport is up and the cold-start reindex (if any) has finished;
18
+ `add` / `change` events trigger an `indexDocument()` pass for the
19
+ affected file, and `unlink` evicts the document and patches the
20
+ freshness manifest so the next startup does not see a missing file as
21
+ drift. Events are debounced per-path with a 500ms window — an
22
+ editor's atomic-rename save burst coalesces into one indexer call —
23
+ and `unlink` events re-stat before deleting, so FSEvents / iCloud /
24
+ Dropbox phantom unlink+add pairs during atomic-rename saves are
25
+ treated as a change instead of a delete. Daftari's own writes are
26
+ suppressed from the watcher path: the write-path tools register the
27
+ absolute path after their in-process `indexDocument()` returns, and
28
+ the watcher silently drops the chokidar event that follows. The new
29
+ `watch` config flag (default `true`) lets read-only or scripted
30
+ environments disable the watcher entirely. The startup freshness
31
+ check (manifest mtimes vs disk, see #36) remains as the reconciliation
32
+ backstop for events the watcher drops.
33
+
34
+ - **Pluggable embedding backend** (#38, PR 4 of 5). The embedding model is
35
+ no longer hard-coded; a new `EmbeddingProvider` interface lets the vault
36
+ owner choose between two backends in `.daftari/config.yaml`:
37
+
38
+ ```yaml
39
+ embeddings:
40
+ provider: local-minilm # default. Other values: openai-3-small.
41
+ ```
42
+
43
+ - **`local-minilm`** (default, 384-dim) is the existing
44
+ `all-MiniLM-L6-v2` path run via `@huggingface/transformers` — free,
45
+ fully local, slow on cold-start.
46
+ - **`openai-3-small`** (1536-dim) calls OpenAI's `text-embedding-3-small`
47
+ endpoint. ~10x faster on large vaults but paid. Requires
48
+ `OPENAI_API_KEY` in the server's environment; a missing key is a hard
49
+ config error at startup, not a silent fallback. Batches at 96 inputs
50
+ per request with exponential backoff on 429 / 5xx (up to 3 retries).
51
+
52
+ The `embeddings` table gains a `dim` column (schema bump 3 → 4) as
53
+ defense-in-depth against a corrupt or cross-provider mix. The schema
54
+ bump rebuilds the index cleanly — derived from the markdown files, no
55
+ manual migration needed. Switching providers between server runs is
56
+ safe: the `(content_hash, model)` composite PK lets both providers'
57
+ rows coexist, and the new provider's first reindex naturally populates
58
+ its own row set without re-embedding under the old id.
59
+
60
+ ### Changed
61
+
62
+ - **SQL-native search via FTS5 and sqlite-vec** (#38, PR 5 of 5 — closes
63
+ the #38 unbundle). The hand-rolled BM25 ranker (a JavaScript scan over
64
+ a JSON tokens column) and the brute-force JavaScript cosine loop are
65
+ both gone; lexical search now runs through an FTS5 virtual table
66
+ (`documents_fts`) and vector search through a sqlite-vec `vec0`
67
+ virtual table (`embeddings_vec`). Both halves are one prepared
68
+ statement; SQLite's built-in BM25 ranks FTS5 matches, sqlite-vec's
69
+ cosine KNN ranks vector matches. AFTER INSERT / UPDATE / DELETE
70
+ triggers on the `documents` table keep the FTS5 mirror in sync — the
71
+ indexer never writes to the virtual table directly. Schema bumped
72
+ 4 → 5; the index is a derived cache so the bump triggers a clean
73
+ rebuild from the markdown files. The vec table is sized at the active
74
+ embedding provider's dim and rebuilt on provider switch (the durable
75
+ `embeddings` cache is per-`(content_hash, model)` and survives the
76
+ vec-table rebuild, so a switch back to the previous provider is all
77
+ cache hits). New dependency: `sqlite-vec`. New prerequisite:
78
+ `better-sqlite3` with extension loading enabled — the npm prebuilt
79
+ has it on by default, so `npm install` is the only setup step in the
80
+ common case; a custom build with it disabled is a hard startup error
81
+ with actionable text (`npm rebuild better-sqlite3 --build-from-source`).
82
+ This is the final follow-up in the #38 unbundle; v1.9.0 ships as a
83
+ grouped release covering all five.
84
+
85
+ - **Lazy embedding model load with background warm-up** (#38, PR 2 of 5).
86
+ The MiniLM embedding model no longer loads at server startup. With the
87
+ v1.8.0 content-addressed cache, a startup whose freshness manifest matches
88
+ disk skips the reindex pass, and a reindex whose chunk hashes are all
89
+ cached skips `embed()` entirely — so the model load (~100MB, ~500ms cold)
90
+ is now deferred until something actually needs to embed. A read-only role
91
+ that only calls `vault_read` / `vault_search` against a fully-cached
92
+ index never loads the model at all. After the MCP transport opens and the
93
+ freshness check / background reindex begins, the server kicks off a
94
+ `warmModel()` in a `void` background promise so the first user search
95
+ does not pay the cold-start cost. A warm-up failure (no network on the
96
+ first run, model download blocked) is logged to stderr but never crashes
97
+ the server — the next `embed()` call retries. The warm-up is gated by a
98
+ new optional `warm_embeddings` flag in `.daftari/config.yaml` (default
99
+ `true`); set it to `false` for read-only deployments or memory-constrained
100
+ environments. The transport-open-before-indexing ordering from v1.7.1
101
+ is preserved — no startup hang regression. A new `modelStatus` field on
102
+ the in-process `IndexState` (`cold` / `warming` / `ready` / `error`) lets
103
+ tools surface "embeddings warming" context when a client retries against
104
+ a warming model rather than misreporting an indexing pass.
105
+
10
106
  ## [1.8.0] - 2026-05-20
11
107
 
12
108
  ### Changed
package/README.md CHANGED
@@ -36,7 +36,7 @@ part.
36
36
 
37
37
  | Layer | Concern | What Daftari provides |
38
38
  |------:|---------|-----------------------|
39
- | 1 | **Storage** | Markdown + YAML frontmatter on disk, a git history, a rebuildable SQLite index for hybrid BM25 + vector search. |
39
+ | 1 | **Storage** | Markdown + YAML frontmatter on disk, a git history, a rebuildable SQLite index — FTS5 for lexical ranking, sqlite-vec for vector search. |
40
40
  | 2 | **Multi-tenant ACL** | Config-driven RBAC. Roles and per-collection read/write/promote permissions declared in `.daftari/config.yaml`. |
41
41
  | 3 | **Write safety** ⭐ | File-level write locks (SQLite-backed, 60s TTL) give single-writer-per-document safety — a competing writer fails cleanly instead of corrupting the file. This is a safety mechanism, not a coordination protocol. The ⭐ is for what is genuinely differentiated: every write auto-committed to git with a provenance log of who changed what and when. |
42
42
  | 4 | **Curation decay** ⭐ | The draft → canonical → deprecated lifecycle, TTL-based staleness, tension logging for contradictions, and an advisory linter. Knowledge that stops being true is surfaced, not silently trusted. |
@@ -152,34 +152,83 @@ by name with JSON arguments; the server replies with a JSON text block. Here is
152
152
  ## Search internals
153
153
 
154
154
  `vault_search` is **hybrid**: a BM25 lexical score and a vector (semantic)
155
- score, blended with tunable weights. The vector half is worth being explicit
156
- about, because a local-first tool should never leave you guessing whether a
157
- query leaves your machine.
155
+ score, blended with tunable weights. Both halves are SQL-native they
156
+ run inside SQLite, not in JavaScript.
157
+
158
+ - **Lexical half.** An FTS5 virtual table (`documents_fts`) over title,
159
+ tags, and body. SQLite's built-in BM25 ranks every MATCH'd row.
160
+ Triggers on the regular `documents` table keep the FTS index in sync
161
+ on every write, so the indexer never touches the virtual table
162
+ directly. Free-text queries are tokenised, stopword-filtered, and
163
+ prefix-OR'd (`cirrus pricing` becomes `cirrus* OR pricing*`) so a
164
+ partial-keystroke or stem variation still matches.
165
+
166
+ - **Vector half.** A sqlite-vec `vec0` virtual table
167
+ (`embeddings_vec`), sized at the active provider's dim and indexed for
168
+ KNN cosine queries. The durable `embeddings` cache (one row per
169
+ `(content_hash, model)`) is the source of truth; `embeddings_vec`
170
+ mirrors it for query-time access. Switching embedding providers
171
+ triggers a drop-and-rebuild of the vec table at the new dim — the
172
+ durable cache survives, so switching back is all cache hits.
173
+
174
+ **Prerequisite.** sqlite-vec is a loadable SQLite extension. The
175
+ `sqlite-vec` npm package ships pre-built binaries for darwin / linux /
176
+ windows on x64 and arm64; `better-sqlite3`'s npm prebuilt enables
177
+ extension loading by default. In the common case `npm install` is the
178
+ only setup step. If a custom `better-sqlite3` build with extension
179
+ loading disabled is in use, Daftari refuses to start with an actionable
180
+ error: `npm rebuild better-sqlite3 --build-from-source`.
181
+
182
+ The vector half is worth being explicit about, because a local-first
183
+ tool should never leave you guessing whether a query leaves your
184
+ machine.
185
+
186
+ ### Embedding providers
187
+
188
+ Daftari ships with two embedding backends. Pick one in
189
+ `.daftari/config.yaml`:
158
190
 
159
- - **Embedding model.** `all-MiniLM-L6-v2` (the `Xenova/all-MiniLM-L6-v2`
160
- build), a 384-dimension sentence-transformer.
161
- - **Where it runs.** Entirely **local**. Embeddings are computed in-process by
191
+ ```yaml
192
+ embeddings:
193
+ provider: local-minilm # default. Other values: openai-3-small.
194
+ ```
195
+
196
+ - **`local-minilm`** (default). `all-MiniLM-L6-v2` (the
197
+ `Xenova/all-MiniLM-L6-v2` build), a 384-dimension sentence-transformer.
198
+ Runs entirely **local**: embeddings are computed in-process by
162
199
  [`@huggingface/transformers`](https://www.npmjs.com/package/@huggingface/transformers)
163
- (Transformers.js). There is **no external embedding API** — nothing is sent
164
- to Hugging Face, OpenAI, or anyone else at index or query time.
165
- - **Dependencies.** Just `npm install`. No Python, no separate ONNX runtime, no
166
- GPU, no API key the ONNX runtime ships as a dependency of
167
- `@huggingface/transformers`. The **first** reindex downloads the model
168
- weights (~25 MB) from the Hugging Face hub and caches them on disk; every run
169
- after that is fully offline.
170
- - **Graceful degradation.** If the model cannot load — e.g. no network on the
171
- very first run, before the weights are cached `vault_reindex` still builds
172
- the BM25 index. The vector column is left empty, `vectorUsed` reports
173
- `false`, and search transparently falls back to lexical-only ranking.
200
+ (Transformers.js). No external embedding API — nothing is sent to
201
+ Hugging Face, OpenAI, or anyone else at index or query time. Just
202
+ `npm install` no Python, no API key. The **first** reindex downloads
203
+ the model weights (~25 MB) from the Hugging Face hub and caches them on
204
+ disk; every run after that is fully offline. Slow on cold start
205
+ (~25 min CPU on a 44k-chunk vault), but free.
206
+
207
+ - **`openai-3-small`**. OpenAI's `text-embedding-3-small`, a 1536-dimension
208
+ hosted embedding. **Sends chunk text to OpenAI** at reindex time
209
+ enable this only if you're comfortable with that. Requires
210
+ `OPENAI_API_KEY` in the server's environment (it is never read from
211
+ config files). ~10x faster than `local-minilm` on large vaults; on the
212
+ 44k-chunk benchmark above, ~2 minutes and ~$0.10. Because Daftari's
213
+ embedding cache is content-addressed by `(content_hash, model)`, the
214
+ paid cost is a **one-time event per chunk text** — re-running
215
+ `vault_reindex` on an unchanged vault embeds zero new chunks. Switching
216
+ providers between server runs is safe: the cache keeps both providers'
217
+ rows, so switching back to the other later re-uses what was previously
218
+ embedded.
219
+
220
+ - **Graceful degradation.** Whichever provider is active, if it cannot
221
+ reach the model (no network on the very first `local-minilm` run, before
222
+ the weights are cached; or OpenAI unreachable), `vault_reindex` still
223
+ builds the FTS5 lexical index. The vector column is left empty,
224
+ `vectorUsed` reports `false`, and search transparently falls back to
225
+ lexical-only ranking.
226
+
174
227
  - **Quality tradeoff.** MiniLM is small and fast, which keeps Daftari
175
- dependency-light and snappy, but its recall/precision is below larger hosted
176
- embedding models. Pairing it with BM25 covers the common case where a small
228
+ dependency-light and snappy, but its recall/precision is below larger
229
+ hosted embedding models. `openai-3-small` is the obvious next step.
230
+ Pairing either with FTS5 BM25 covers the common case where a small
177
231
  model misses an exact-term match.
178
- - **Swappability.** v1 pins the model as a constant (`EMBEDDING_MODEL` in
179
- [`src/search/vector.ts`](src/search/vector.ts)). Any model the Transformers.js
180
- feature-extraction pipeline supports can be substituted by editing that
181
- constant (and `EMBEDDING_DIM` to match) and running `vault_reindex`. A
182
- config-driven bring-your-own-embedding hook is not in v1.
183
232
 
184
233
  ---
185
234
 
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAgCA,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CASrE;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,GAAG,IAAI,CAE3D;AAED,wBAAsB,IAAI,CAAC,IAAI,GAAE,MAAM,EAA0B,GAAG,OAAO,CAAC,IAAI,CAAC,CAuFhF"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAkCA,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CASrE;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,MAAM,GAAG,IAAI,CAE3D;AAED,wBAAsB,IAAI,CAAC,IAAI,GAAE,MAAM,EAA0B,GAAG,OAAO,CAAC,IAAI,CAAC,CA+GhF"}
package/dist/index.js CHANGED
@@ -19,6 +19,8 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
19
19
  import { GUEST_ROLE, resolveAccess } from "./access/rbac.js";
20
20
  import { markIndexError, markIndexing, markIndexReady, setIndexProgress, } from "./search/index-state.js";
21
21
  import { isIndexFresh, reindexVault } from "./search/reindex.js";
22
+ import { setProvider, warmModel } from "./search/vector.js";
23
+ import { startWatcher } from "./search/watcher.js";
22
24
  import { createServer } from "./server.js";
23
25
  import { directoryExists } from "./storage/local.js";
24
26
  import { loadConfig } from "./utils/config.js";
@@ -58,6 +60,20 @@ export async function main(argv = process.argv.slice(2)) {
58
60
  process.exitCode = 1;
59
61
  return;
60
62
  }
63
+ // Install the configured embedding provider. loadConfig has already
64
+ // validated the id and (for openai-3-small) the OPENAI_API_KEY env var,
65
+ // so setProvider should never throw here — but if it does (race-y env
66
+ // var stripping by a wrapper, say), fail loud rather than serving with
67
+ // a broken provider.
68
+ try {
69
+ setProvider(config.value.embeddingProvider);
70
+ }
71
+ catch (e) {
72
+ const reason = e instanceof Error ? e.message : String(e);
73
+ process.stderr.write(`daftari: ${reason}\n`);
74
+ process.exitCode = 1;
75
+ return;
76
+ }
61
77
  // Resolve the access identity. With no --role the server runs as the
62
78
  // deny-all guest; an unknown role name resolves the same way.
63
79
  const user = parseFlag(argv, "user") ?? "guest";
@@ -103,6 +119,14 @@ export async function main(argv = process.argv.slice(2)) {
103
119
  if (fresh) {
104
120
  process.stderr.write(`daftari: index is up to date — skipping reindex\n`);
105
121
  markIndexReady();
122
+ // Fresh index means a fully-cached state: no embedding work was done, so
123
+ // the model is still cold. Warm it in the background (if config allows)
124
+ // so the first user search does not pay the ~500ms cold start. Then
125
+ // start the watcher to catch out-of-band edits going forward.
126
+ if (config.value.warmEmbeddings) {
127
+ void runBackgroundWarm();
128
+ }
129
+ maybeStartWatcher(vaultRoot, config.value.watch);
106
130
  return;
107
131
  }
108
132
  // Background reindex. The promise is intentionally not awaited — main()
@@ -110,9 +134,53 @@ export async function main(argv = process.argv.slice(2)) {
110
134
  // completion alongside the live server.
111
135
  markIndexing();
112
136
  process.stderr.write(`daftari: starting background reindex…\n`);
113
- void runBackgroundReindex(vaultRoot);
137
+ void runBackgroundReindex(vaultRoot, config.value.warmEmbeddings, () => {
138
+ maybeStartWatcher(vaultRoot, config.value.watch);
139
+ });
140
+ }
141
+ // Reference held so a SIGTERM / SIGINT can close the watcher cleanly. One
142
+ // per process — the server runs against one vault for its lifetime.
143
+ let activeWatcher = null;
144
+ // Spawns the chokidar watcher when config.watch !== false. Wired here, not
145
+ // at module load, so the test entry points (which import main) can run with
146
+ // a config that disables it. Idempotent: a second call is a no-op while the
147
+ // first watcher is still alive.
148
+ function maybeStartWatcher(vaultRoot, watchEnabled) {
149
+ if (!watchEnabled) {
150
+ process.stderr.write(`daftari: vault watcher disabled (watch: false in config)\n`);
151
+ return;
152
+ }
153
+ if (activeWatcher)
154
+ return;
155
+ activeWatcher = startWatcher(vaultRoot);
156
+ process.stderr.write(`daftari: watching vault for out-of-band edits\n`);
157
+ // Clean shutdown on the signals stdio MCP servers receive when their
158
+ // parent process closes the pipe. Without this the chokidar handles can
159
+ // keep the event loop alive past the transport close.
160
+ const onShutdown = () => {
161
+ if (!activeWatcher)
162
+ return;
163
+ const w = activeWatcher;
164
+ activeWatcher = null;
165
+ void w.close();
166
+ };
167
+ process.once("SIGTERM", onShutdown);
168
+ process.once("SIGINT", onShutdown);
114
169
  }
115
- async function runBackgroundReindex(vaultRoot) {
170
+ // Loads the embedding model in the background so the first user search does
171
+ // not pay the cold-start latency. Failures (no network on first run, model
172
+ // download blocked) are logged but never crash the server — the next embed()
173
+ // call will retry. Intended to be invoked as a `void` from main().
174
+ async function runBackgroundWarm() {
175
+ const result = await warmModel();
176
+ if (result.ok) {
177
+ process.stderr.write(`daftari: embedding model warm — ready for search\n`);
178
+ }
179
+ else {
180
+ process.stderr.write(`daftari: warning: embedding warm-up failed: ${result.error.message}\n`);
181
+ }
182
+ }
183
+ async function runBackgroundReindex(vaultRoot, warmEmbeddings, onDone) {
116
184
  try {
117
185
  const reindexed = await reindexVault(vaultRoot, makeProgressReporter());
118
186
  if (reindexed.ok) {
@@ -120,6 +188,13 @@ async function runBackgroundReindex(vaultRoot) {
120
188
  markIndexReady();
121
189
  process.stderr.write(`daftari: indexed ${r.documentCount} docs, ${r.chunkCount} chunks ` +
122
190
  `(vectors ${r.vectorEnabled ? "on" : "off"})\n`);
191
+ // If the reindex was fully cache-hit (no chunks needed embedding) the
192
+ // model was never loaded — warm it now so the first user search isn't
193
+ // a cold start. A reindex that did embed already loaded the model; no
194
+ // extra warm is necessary in that path.
195
+ if (warmEmbeddings && r.embeddedCount === 0) {
196
+ void runBackgroundWarm();
197
+ }
123
198
  }
124
199
  else {
125
200
  markIndexError(reindexed.error.message);
@@ -131,6 +206,14 @@ async function runBackgroundReindex(vaultRoot) {
131
206
  markIndexError(reason);
132
207
  process.stderr.write(`daftari: warning: background indexer crashed: ${reason}\n`);
133
208
  }
209
+ finally {
210
+ // Start the watcher only after the full reindex pass finishes — the
211
+ // dispatch() guard inside watcher.ts would queue events while the
212
+ // global status is "indexing", but starting after avoids the
213
+ // bookkeeping and keeps the startup ordering obvious: transport,
214
+ // freshness/reindex, watcher.
215
+ onDone?.();
216
+ }
134
217
  }
135
218
  // Builds a ReindexOptions whose onProgress streams to both stderr (for
136
219
  // operator visibility) and the in-process IndexState (so tools can return
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAClC,EAAE;AACF,+EAA+E;AAC/E,2EAA2E;AAC3E,+EAA+E;AAC/E,qDAAqD;AACrD,EAAE;AACF,4EAA4E;AAC5E,8EAA8E;AAC9E,iEAAiE;AACjE,uEAAuE;AACvE,4BAA4B;AAC5B,EAAE;AACF,2EAA2E;AAC3E,oBAAoB;AAEpB,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,EACL,cAAc,EACd,YAAY,EACZ,cAAc,EACd,gBAAgB,GACjB,MAAM,yBAAyB,CAAC;AACjC,OAAO,EAAE,YAAY,EAAuB,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACtF,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,oEAAoE;AACpE,MAAM,UAAU,SAAS,CAAC,IAAc,EAAE,IAAY;IACpD,MAAM,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;IACzB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACrC,IAAI,SAAS,KAAK,CAAC,CAAC,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACpD,OAAO,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC;IACrC,CAAC;IACD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC;IAC1D,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IACnD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAc;IAC1C,OAAO,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAClC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,IAAI,CAAC,OAAiB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAC/D,MAAM,QAAQ,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IACrC,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,qDAAqD,CAAC,CAAC;QAC5E,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IACpC,IAAI,CAAC,CAAC,MAAM,eAAe,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;QACxC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,uCAAuC,SAAS,IAAI,CAAC,CAAC;QAC3E,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,2EAA2E;IAC3E,2DAA2D;IAC3D,MAAM,MAAM,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC;IACrC,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;QACf,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,MAAM,CAAC,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;QAC3D,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,qEAAqE;IACrE,8DAA8D;IAC9D,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,OAAO,CAAC;IAChD,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,UAAU,CAAC;IACvD,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;IAC3D,IAAI,MAAM,CAAC,IAAI,KAAK,IAAI,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;QACpD,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,2BAA2B,QAAQ,qDAAqD,CACzF,CAAC;IACJ,CAAC;IAED,4EAA4E;IAC5E,2EAA2E;IAC3E,sEAAsE;IACtE,0EAA0E;IAC1E,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAEhD,yEAAyE;IACzE,wEAAwE;IACxE,qCAAqC;IACrC,IAAI,YAAY,EAAE,CAAC;QACjB,YAAY,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,SAAS,EAAE,oBAAoB,EAAE,CAAC,CAAC;QACxE,IAAI,SAAS,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC;YAC1B,cAAc,EAAE,CAAC;YACjB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,oBAAoB,CAAC,CAAC,aAAa,UAAU,CAAC,CAAC,UAAU,UAAU;gBACjE,YAAY,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,KAAK,CAClD,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,cAAc,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACxC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,yCAAyC,SAAS,CAAC,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;YAC3F,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACvB,CAAC;QACD,OAAO;IACT,CAAC;IAED,qEAAqE;IACrE,wEAAwE;IACxE,2EAA2E;IAC3E,gBAAgB;IAChB,MAAM,MAAM,GAAG,YAAY,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAC/C,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,6BAA6B,SAAS,aAAa;QACjD,QAAQ,MAAM,CAAC,IAAI,SAAS,MAAM,CAAC,QAAQ,IAAI,CAClD,CAAC;IAEF,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,SAAS,CAAC,CAAC;IAC5C,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,mDAAmD,CAAC,CAAC;QAC1E,cAAc,EAAE,CAAC;QACjB,OAAO;IACT,CAAC;IAED,wEAAwE;IACxE,kEAAkE;IAClE,wCAAwC;IACxC,YAAY,EAAE,CAAC;IACf,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;IAChE,KAAK,oBAAoB,CAAC,SAAS,CAAC,CAAC;AACvC,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,SAAiB;IACnD,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,SAAS,EAAE,oBAAoB,EAAE,CAAC,CAAC;QACxE,IAAI,SAAS,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC;YAC1B,cAAc,EAAE,CAAC;YACjB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,oBAAoB,CAAC,CAAC,aAAa,UAAU,CAAC,CAAC,UAAU,UAAU;gBACjE,YAAY,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,KAAK,CAClD,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,cAAc,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACxC,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,oDAAoD,SAAS,CAAC,KAAK,CAAC,OAAO,IAAI,CAChF,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,MAAM,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACvE,cAAc,CAAC,MAAM,CAAC,CAAC;QACvB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,iDAAiD,MAAM,IAAI,CAAC,CAAC;IACpF,CAAC;AACH,CAAC;AAED,uEAAuE;AACvE,0EAA0E;AAC1E,4EAA4E;AAC5E,6EAA6E;AAC7E,uCAAuC;AACvC,SAAS,oBAAoB;IAC3B,MAAM,SAAS,GAAG,IAAI,CAAC;IACvB,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,OAAO;QACL,UAAU,EAAE,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;YAC1B,gBAAgB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;YAC9B,IAAI,KAAK,KAAK,CAAC;gBAAE,OAAO;YACxB,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;gBACzB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,wBAAwB,IAAI,IAAI,KAAK,SAAS,CAAC,CAAC;gBACrE,IAAI,IAAI,KAAK,KAAK;oBAAE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC/C,OAAO;YACT,CAAC;YACD,MAAM,KAAK,GAAG,IAAI,GAAG,KAAK,CAAC;YAC3B,IAAI,KAAK,IAAI,YAAY,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;gBAC5C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,sBAAsB,IAAI,IAAI,KAAK,WAAW,CAAC,CAAC;gBACrE,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;YAC/D,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC;AAED,uEAAuE;AACvE,qEAAqE;AACrE,MAAM,QAAQ,GAAG,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC;AAC3D,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;IACjC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;QACjB,MAAM,MAAM,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACvE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,mBAAmB,MAAM,IAAI,CAAC,CAAC;QACpD,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,kCAAkC;AAClC,EAAE;AACF,+EAA+E;AAC/E,2EAA2E;AAC3E,+EAA+E;AAC/E,qDAAqD;AACrD,EAAE;AACF,4EAA4E;AAC5E,8EAA8E;AAC9E,iEAAiE;AACjE,uEAAuE;AACvE,4BAA4B;AAC5B,EAAE;AACF,2EAA2E;AAC3E,oBAAoB;AAEpB,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,UAAU,EAAE,aAAa,EAAE,MAAM,kBAAkB,CAAC;AAC7D,OAAO,EACL,cAAc,EACd,YAAY,EACZ,cAAc,EACd,gBAAgB,GACjB,MAAM,yBAAyB,CAAC;AACjC,OAAO,EAAE,YAAY,EAAuB,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACtF,OAAO,EAAE,WAAW,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAC5D,OAAO,EAAE,YAAY,EAAqB,MAAM,qBAAqB,CAAC;AACtE,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,oEAAoE;AACpE,MAAM,UAAU,SAAS,CAAC,IAAc,EAAE,IAAY;IACpD,MAAM,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;IACzB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACrC,IAAI,SAAS,KAAK,CAAC,CAAC,IAAI,SAAS,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACpD,OAAO,IAAI,CAAC,SAAS,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC;IACrC,CAAC;IACD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC;IAC1D,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC;IACnD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAc;IAC1C,OAAO,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAClC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,IAAI,CAAC,OAAiB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAC/D,MAAM,QAAQ,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;IACrC,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,qDAAqD,CAAC,CAAC;QAC5E,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,MAAM,SAAS,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;IACpC,IAAI,CAAC,CAAC,MAAM,eAAe,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC;QACxC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,uCAAuC,SAAS,IAAI,CAAC,CAAC;QAC3E,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,2EAA2E;IAC3E,2DAA2D;IAC3D,MAAM,MAAM,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC;IACrC,IAAI,CAAC,MAAM,CAAC,EAAE,EAAE,CAAC;QACf,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,MAAM,CAAC,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;QAC3D,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,oEAAoE;IACpE,wEAAwE;IACxE,sEAAsE;IACtE,uEAAuE;IACvE,qBAAqB;IACrB,IAAI,CAAC;QACH,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;IAC9C,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,MAAM,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,MAAM,IAAI,CAAC,CAAC;QAC7C,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,qEAAqE;IACrE,8DAA8D;IAC9D,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,OAAO,CAAC;IAChD,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,UAAU,CAAC;IACvD,MAAM,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC;IAC3D,IAAI,MAAM,CAAC,IAAI,KAAK,IAAI,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;QACpD,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,2BAA2B,QAAQ,qDAAqD,CACzF,CAAC;IACJ,CAAC;IAED,4EAA4E;IAC5E,2EAA2E;IAC3E,sEAAsE;IACtE,0EAA0E;IAC1E,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAEhD,yEAAyE;IACzE,wEAAwE;IACxE,qCAAqC;IACrC,IAAI,YAAY,EAAE,CAAC;QACjB,YAAY,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,SAAS,EAAE,oBAAoB,EAAE,CAAC,CAAC;QACxE,IAAI,SAAS,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC;YAC1B,cAAc,EAAE,CAAC;YACjB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,oBAAoB,CAAC,CAAC,aAAa,UAAU,CAAC,CAAC,UAAU,UAAU;gBACjE,YAAY,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,KAAK,CAClD,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,cAAc,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACxC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,yCAAyC,SAAS,CAAC,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;YAC3F,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACvB,CAAC;QACD,OAAO;IACT,CAAC;IAED,qEAAqE;IACrE,wEAAwE;IACxE,2EAA2E;IAC3E,gBAAgB;IAChB,MAAM,MAAM,GAAG,YAAY,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAC/C,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,6BAA6B,SAAS,aAAa;QACjD,QAAQ,MAAM,CAAC,IAAI,SAAS,MAAM,CAAC,QAAQ,IAAI,CAClD,CAAC;IAEF,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,SAAS,CAAC,CAAC;IAC5C,IAAI,KAAK,EAAE,CAAC;QACV,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,mDAAmD,CAAC,CAAC;QAC1E,cAAc,EAAE,CAAC;QACjB,yEAAyE;QACzE,wEAAwE;QACxE,oEAAoE;QACpE,8DAA8D;QAC9D,IAAI,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,CAAC;YAChC,KAAK,iBAAiB,EAAE,CAAC;QAC3B,CAAC;QACD,iBAAiB,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACjD,OAAO;IACT,CAAC;IAED,wEAAwE;IACxE,kEAAkE;IAClE,wCAAwC;IACxC,YAAY,EAAE,CAAC;IACf,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,yCAAyC,CAAC,CAAC;IAChE,KAAK,oBAAoB,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,cAAc,EAAE,GAAG,EAAE;QACrE,iBAAiB,CAAC,SAAS,EAAE,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;AACL,CAAC;AAED,0EAA0E;AAC1E,oEAAoE;AACpE,IAAI,aAAa,GAAwB,IAAI,CAAC;AAE9C,2EAA2E;AAC3E,4EAA4E;AAC5E,4EAA4E;AAC5E,gCAAgC;AAChC,SAAS,iBAAiB,CAAC,SAAiB,EAAE,YAAqB;IACjE,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,4DAA4D,CAAC,CAAC;QACnF,OAAO;IACT,CAAC;IACD,IAAI,aAAa;QAAE,OAAO;IAC1B,aAAa,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC;IACxC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,iDAAiD,CAAC,CAAC;IAExE,qEAAqE;IACrE,wEAAwE;IACxE,sDAAsD;IACtD,MAAM,UAAU,GAAG,GAAG,EAAE;QACtB,IAAI,CAAC,aAAa;YAAE,OAAO;QAC3B,MAAM,CAAC,GAAG,aAAa,CAAC;QACxB,aAAa,GAAG,IAAI,CAAC;QACrB,KAAK,CAAC,CAAC,KAAK,EAAE,CAAC;IACjB,CAAC,CAAC;IACF,OAAO,CAAC,IAAI,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;IACpC,OAAO,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;AACrC,CAAC;AAED,4EAA4E;AAC5E,2EAA2E;AAC3E,6EAA6E;AAC7E,mEAAmE;AACnE,KAAK,UAAU,iBAAiB;IAC9B,MAAM,MAAM,GAAG,MAAM,SAAS,EAAE,CAAC;IACjC,IAAI,MAAM,CAAC,EAAE,EAAE,CAAC;QACd,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,oDAAoD,CAAC,CAAC;IAC7E,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,+CAA+C,MAAM,CAAC,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;IAChG,CAAC;AACH,CAAC;AAED,KAAK,UAAU,oBAAoB,CACjC,SAAiB,EACjB,cAAuB,EACvB,MAAmB;IAEnB,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,SAAS,EAAE,oBAAoB,EAAE,CAAC,CAAC;QACxE,IAAI,SAAS,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC;YAC1B,cAAc,EAAE,CAAC;YACjB,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,oBAAoB,CAAC,CAAC,aAAa,UAAU,CAAC,CAAC,UAAU,UAAU;gBACjE,YAAY,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,KAAK,CAClD,CAAC;YACF,sEAAsE;YACtE,sEAAsE;YACtE,sEAAsE;YACtE,wCAAwC;YACxC,IAAI,cAAc,IAAI,CAAC,CAAC,aAAa,KAAK,CAAC,EAAE,CAAC;gBAC5C,KAAK,iBAAiB,EAAE,CAAC;YAC3B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,cAAc,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACxC,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,oDAAoD,SAAS,CAAC,KAAK,CAAC,OAAO,IAAI,CAChF,CAAC;QACJ,CAAC;IACH,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,MAAM,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACvE,cAAc,CAAC,MAAM,CAAC,CAAC;QACvB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,iDAAiD,MAAM,IAAI,CAAC,CAAC;IACpF,CAAC;YAAS,CAAC;QACT,oEAAoE;QACpE,kEAAkE;QAClE,6DAA6D;QAC7D,iEAAiE;QACjE,8BAA8B;QAC9B,MAAM,EAAE,EAAE,CAAC;IACb,CAAC;AACH,CAAC;AAED,uEAAuE;AACvE,0EAA0E;AAC1E,4EAA4E;AAC5E,6EAA6E;AAC7E,uCAAuC;AACvC,SAAS,oBAAoB;IAC3B,MAAM,SAAS,GAAG,IAAI,CAAC;IACvB,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,OAAO;QACL,UAAU,EAAE,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;YAC1B,gBAAgB,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;YAC9B,IAAI,KAAK,KAAK,CAAC;gBAAE,OAAO;YACxB,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;gBACzB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,wBAAwB,IAAI,IAAI,KAAK,SAAS,CAAC,CAAC;gBACrE,IAAI,IAAI,KAAK,KAAK;oBAAE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC/C,OAAO;YACT,CAAC;YACD,MAAM,KAAK,GAAG,IAAI,GAAG,KAAK,CAAC;YAC3B,IAAI,KAAK,IAAI,YAAY,IAAI,IAAI,KAAK,KAAK,EAAE,CAAC;gBAC5C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,sBAAsB,IAAI,IAAI,KAAK,WAAW,CAAC,CAAC;gBACrE,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;YAC/D,CAAC;QACH,CAAC;KACF,CAAC;AACJ,CAAC;AAED,uEAAuE;AACvE,qEAAqE;AACrE,MAAM,QAAQ,GAAG,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC;AAC3D,IAAI,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,QAAQ,EAAE,CAAC;IACjC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;QACjB,MAAM,MAAM,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QACvE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,mBAAmB,MAAM,IAAI,CAAC,CAAC;QACpD,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACvB,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -1,19 +1,3 @@
1
1
  export declare function tokenize(text: string): string[];
2
- export interface Bm25Document {
3
- path: string;
4
- tokens: string[];
5
- }
6
- export interface Bm25Model {
7
- termFreqs: Map<string, Map<string, number>>;
8
- docFreqs: Map<string, number>;
9
- docLengths: Map<string, number>;
10
- docCount: number;
11
- avgDocLength: number;
12
- }
13
- export declare function buildBm25(docs: Bm25Document[]): Bm25Model;
14
- export interface Bm25Hit {
15
- path: string;
16
- score: number;
17
- }
18
- export declare function searchBm25(model: Bm25Model, queryTokens: string[]): Bm25Hit[];
2
+ export declare function buildMatchQuery(query: string): string | null;
19
3
  //# sourceMappingURL=bm25.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AA4DA,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAK/C;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,SAAS;IAExB,SAAS,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;IAE5C,QAAQ,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE9B,UAAU,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAChC,QAAQ,EAAE,MAAM,CAAC;IACjB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,wBAAgB,SAAS,CAAC,IAAI,EAAE,YAAY,EAAE,GAAG,SAAS,CA0BzD;AAED,MAAM,WAAW,OAAO;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;CACf;AAKD,wBAAgB,UAAU,CAAC,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,EAAE,GAAG,OAAO,EAAE,CAsB7E"}
1
+ {"version":3,"file":"bm25.d.ts","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AA4DA,wBAAgB,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAK/C;AAqBD,wBAAgB,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAO5D"}
@@ -1,18 +1,19 @@
1
- // BM25 lexical ranking, hand-rolled and dependency-free.
1
+ // Lexical ranking now a thin shim over SQLite FTS5.
2
2
  //
3
- // BM25 scores a document for a query by summing, over each query term, an IDF
4
- // weight times a saturating term-frequency factor. It is the lexical half of
5
- // hybrid search: it rewards exact word overlap, which embeddings tend to blur.
3
+ // Until v1.9 this file held a hand-rolled BM25 implementation that scanned
4
+ // every document's JSON-tokens column in JavaScript. SQLite's built-in FTS5
5
+ // virtual table is faster, scales further, and ships with its own (Okapi)
6
+ // BM25 ranker — so this file is now reduced to (a) a query-side tokenizer
7
+ // used by snippet building and `relatedSearch`, and (b) a helper that turns
8
+ // a free-text query into the prefix-OR'd MATCH string FTS5 expects.
6
9
  //
7
- // The vault is a curated knowledge base (tens to low-hundreds of documents),
8
- // so the whole corpus is ranked in memory on every query. No inverted index.
9
- // Okapi BM25 free parameters. k1 controls term-frequency saturation; b controls
10
- // how strongly document length normalises the score. These are the standard
11
- // defaults and need no tuning at this corpus size.
12
- const K1 = 1.5;
13
- const B = 0.75;
14
- // Common English words carry no discriminating signal; dropping them keeps IDF
15
- // meaningful and snippets pointed at content words.
10
+ // The FTS5 virtual table (`documents_fts`) is declared in
11
+ // `src/storage/index-db.ts`; AFTER INSERT/UPDATE/DELETE triggers on the
12
+ // `documents` table keep it in sync.
13
+ // Common English words carry no discriminating signal; dropping them keeps
14
+ // the query side aligned with FTS5's porter/unicode61 tokenizer (which also
15
+ // drops stopwords from BM25 scoring via low IDF) and gives snippet building
16
+ // a cleaner highlight list.
16
17
  const STOPWORDS = new Set([
17
18
  "a",
18
19
  "an",
@@ -53,63 +54,40 @@ const STOPWORDS = new Set([
53
54
  "your",
54
55
  ]);
55
56
  // Lowercases, splits on any non-alphanumeric run, and drops stopwords and
56
- // 1-character fragments. Used identically for documents and queries so the
57
- // term spaces line up.
57
+ // 1-character fragments. Used for snippet highlighting and as the BM25
58
+ // query-side tokens fed into FTS5's MATCH parser.
58
59
  export function tokenize(text) {
59
60
  return text
60
61
  .toLowerCase()
61
62
  .split(/[^a-z0-9]+/)
62
63
  .filter((t) => t.length > 1 && !STOPWORDS.has(t));
63
64
  }
64
- export function buildBm25(docs) {
65
- const termFreqs = new Map();
66
- const docFreqs = new Map();
67
- const docLengths = new Map();
68
- let totalLength = 0;
69
- for (const doc of docs) {
70
- const tf = new Map();
71
- for (const term of doc.tokens) {
72
- tf.set(term, (tf.get(term) ?? 0) + 1);
73
- }
74
- termFreqs.set(doc.path, tf);
75
- docLengths.set(doc.path, doc.tokens.length);
76
- totalLength += doc.tokens.length;
77
- for (const term of tf.keys()) {
78
- docFreqs.set(term, (docFreqs.get(term) ?? 0) + 1);
79
- }
80
- }
81
- return {
82
- termFreqs,
83
- docFreqs,
84
- docLengths,
85
- docCount: docs.length,
86
- avgDocLength: docs.length > 0 ? totalLength / docs.length : 0,
87
- };
88
- }
89
- // Scores every document against the query terms and returns the matches
90
- // (score > 0) sorted high to low. A document with zero query-term overlap is
91
- // omitted entirely rather than returned with a zero score.
92
- export function searchBm25(model, queryTokens) {
93
- const hits = [];
94
- const uniqueQueryTerms = [...new Set(queryTokens)];
95
- for (const [path, tf] of model.termFreqs) {
96
- const docLength = model.docLengths.get(path) ?? 0;
97
- let score = 0;
98
- for (const term of uniqueQueryTerms) {
99
- const freq = tf.get(term) ?? 0;
100
- if (freq === 0)
101
- continue;
102
- const df = model.docFreqs.get(term) ?? 0;
103
- // IDF with the +1 inside the log keeps it non-negative even for terms
104
- // that appear in more than half the corpus.
105
- const idf = Math.log(1 + (model.docCount - df + 0.5) / (df + 0.5));
106
- const denom = freq + K1 * (1 - B + (B * docLength) / (model.avgDocLength || 1));
107
- score += idf * ((freq * (K1 + 1)) / denom);
108
- }
109
- if (score > 0)
110
- hits.push({ path, score });
111
- }
112
- hits.sort((a, b) => b.score - a.score);
113
- return hits;
65
+ // Builds an FTS5 MATCH query from a free-text user query.
66
+ //
67
+ // We tokenize the same way as `tokenize()`, then OR every term together as
68
+ // a prefix match (`cirrus*`). Prefix matching is friendly to partial
69
+ // keystrokes ("pric" → "pricing", "prices") and to morphologically related
70
+ // words; FTS5's porter tokenizer already collapses many of these on the
71
+ // document side, so the prefix is mostly a query-side recall booster.
72
+ //
73
+ // FTS5 query syntax is fragile in the face of user input: quotes, hyphens,
74
+ // the bare words AND / OR / NOT, and the trailing `*` operator all have
75
+ // meaning to the parser. We strip every character outside [a-zA-Z0-9_]
76
+ // during tokenization (already done), so the only remaining hazard is the
77
+ // reserved words. We bypass that by lower-casing every token — FTS5's
78
+ // reserved words are matched case-sensitively in upper case, so `or` is
79
+ // just a search term.
80
+ //
81
+ // Returns null when the query yields no usable tokens (all-whitespace or
82
+ // all-stopwords). Callers must treat null as "no lexical match possible"
83
+ // rather than passing an empty string to MATCH, which is a syntax error.
84
+ export function buildMatchQuery(query) {
85
+ const tokens = tokenize(query);
86
+ if (tokens.length === 0)
87
+ return null;
88
+ // Deduplicate to keep the MATCH string short. Prefix every token with `*`
89
+ // for partial matches.
90
+ const unique = [...new Set(tokens)];
91
+ return unique.map((t) => `${t}*`).join(" OR ");
114
92
  }
115
93
  //# sourceMappingURL=bm25.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AAAA,yDAAyD;AACzD,EAAE;AACF,8EAA8E;AAC9E,6EAA6E;AAC7E,+EAA+E;AAC/E,EAAE;AACF,6EAA6E;AAC7E,6EAA6E;AAE7E,gFAAgF;AAChF,4EAA4E;AAC5E,mDAAmD;AACnD,MAAM,EAAE,GAAG,GAAG,CAAC;AACf,MAAM,CAAC,GAAG,IAAI,CAAC;AAEf,+EAA+E;AAC/E,oDAAoD;AACpD,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACxB,GAAG;IACH,IAAI;IACJ,KAAK;IACL,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,IAAI;IACJ,KAAK;IACL,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,MAAM;IACN,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,IAAI;IACJ,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;CACP,CAAC,CAAC;AAEH,0EAA0E;AAC1E,2EAA2E;AAC3E,uBAAuB;AACvB,MAAM,UAAU,QAAQ,CAAC,IAAY;IACnC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,YAAY,CAAC;SACnB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;AACtD,CAAC;AAkBD,MAAM,UAAU,SAAS,CAAC,IAAoB;IAC5C,MAAM,SAAS,GAAG,IAAI,GAAG,EAA+B,CAAC;IACzD,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC3C,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC7C,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,EAAE,GAAG,IAAI,GAAG,EAAkB,CAAC;QACrC,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,MAAM,EAAE,CAAC;YAC9B,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACxC,CAAC;QACD,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC5B,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAC5C,WAAW,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC;QACjC,KAAK,MAAM,IAAI,IAAI,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC;YAC7B,QAAQ,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QACpD,CAAC;IACH,CAAC;IAED,OAAO;QACL,SAAS;QACT,QAAQ;QACR,UAAU;QACV,QAAQ,EAAE,IAAI,CAAC,MAAM;QACrB,YAAY,EAAE,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;KAC9D,CAAC;AACJ,CAAC;AAOD,wEAAwE;AACxE,6EAA6E;AAC7E,2DAA2D;AAC3D,MAAM,UAAU,UAAU,CAAC,KAAgB,EAAE,WAAqB;IAChE,MAAM,IAAI,GAAc,EAAE,CAAC;IAC3B,MAAM,gBAAgB,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC;IAEnD,KAAK,MAAM,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;QACzC,MAAM,SAAS,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClD,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,IAAI,IAAI,gBAAgB,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC/B,IAAI,IAAI,KAAK,CAAC;gBAAE,SAAS;YACzB,MAAM,EAAE,GAAG,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACzC,sEAAsE;YACtE,4CAA4C;YAC5C,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,GAAG,EAAE,GAAG,GAAG,CAAC,GAAG,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC;YACnE,MAAM,KAAK,GAAG,IAAI,GAAG,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC,GAAG,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC;YAChF,KAAK,IAAI,GAAG,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC;QAC7C,CAAC;QACD,IAAI,KAAK,GAAG,CAAC;YAAE,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;IAC5C,CAAC;IAED,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACvC,OAAO,IAAI,CAAC;AACd,CAAC"}
1
+ {"version":3,"file":"bm25.js","sourceRoot":"","sources":["../../src/search/bm25.ts"],"names":[],"mappings":"AAAA,sDAAsD;AACtD,EAAE;AACF,2EAA2E;AAC3E,4EAA4E;AAC5E,0EAA0E;AAC1E,0EAA0E;AAC1E,4EAA4E;AAC5E,oEAAoE;AACpE,EAAE;AACF,0DAA0D;AAC1D,wEAAwE;AACxE,qCAAqC;AAErC,2EAA2E;AAC3E,4EAA4E;AAC5E,4EAA4E;AAC5E,4BAA4B;AAC5B,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;IACxB,GAAG;IACH,IAAI;IACJ,KAAK;IACL,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,KAAK;IACL,MAAM;IACN,KAAK;IACL,MAAM;IACN,IAAI;IACJ,KAAK;IACL,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,KAAK;IACL,MAAM;IACN,KAAK;IACL,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,IAAI;IACJ,KAAK;IACL,MAAM;IACN,MAAM;IACN,MAAM;IACN,KAAK;IACL,MAAM;CACP,CAAC,CAAC;AAEH,0EAA0E;AAC1E,uEAAuE;AACvE,kDAAkD;AAClD,MAAM,UAAU,QAAQ,CAAC,IAAY;IACnC,OAAO,IAAI;SACR,WAAW,EAAE;SACb,KAAK,CAAC,YAAY,CAAC;SACnB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;AACtD,CAAC;AAED,0DAA0D;AAC1D,EAAE;AACF,2EAA2E;AAC3E,qEAAqE;AACrE,2EAA2E;AAC3E,wEAAwE;AACxE,sEAAsE;AACtE,EAAE;AACF,2EAA2E;AAC3E,wEAAwE;AACxE,uEAAuE;AACvE,0EAA0E;AAC1E,sEAAsE;AACtE,wEAAwE;AACxE,sBAAsB;AACtB,EAAE;AACF,yEAAyE;AACzE,yEAAyE;AACzE,yEAAyE;AACzE,MAAM,UAAU,eAAe,CAAC,KAAa;IAC3C,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;IAC/B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACrC,0EAA0E;IAC1E,uBAAuB;IACvB,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC;IACpC,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AACjD,CAAC"}
@@ -0,0 +1,8 @@
1
+ import type { Result } from "../frontmatter/types.js";
2
+ export interface EmbeddingProvider {
3
+ readonly id: string;
4
+ readonly dim: number;
5
+ warm(): Promise<Result<void, Error>>;
6
+ embed(texts: string[], onProgress?: (done: number, total: number) => void): Promise<Result<Float32Array[], Error>>;
7
+ }
8
+ //# sourceMappingURL=embedding-provider.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding-provider.d.ts","sourceRoot":"","sources":["../../src/search/embedding-provider.ts"],"names":[],"mappings":"AAyBA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAEtD,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,IAAI,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;IACrC,KAAK,CACH,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,GACjD,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,CAAC,CAAC,CAAC;CAC3C"}
@@ -0,0 +1,26 @@
1
+ // EmbeddingProvider — pluggable backend for chunk-to-vector conversion.
2
+ //
3
+ // The vault search index always stored its embeddings under a `model` column,
4
+ // but until this PR there was exactly one model: a locally-loaded MiniLM. The
5
+ // provider interface lets the vault owner pick between free-and-slow (local
6
+ // MiniLM, ~25 min cold reindex on a 44k-chunk vault) and fast-and-paid (an
7
+ // API-backed provider, ~2 min and ~$0.10 for the same vault). Because the
8
+ // embeddings cache is keyed by (content_hash, model), the paid cost is a
9
+ // one-time event per chunk text — a switch back to local doesn't re-embed
10
+ // anything; the old model's rows stay put and the new model populates its
11
+ // own row set.
12
+ //
13
+ // Contract:
14
+ // - `id` is what gets written to embeddings.model. Two providers with the
15
+ // same id would corrupt the cache; treat it as a stable namespace.
16
+ // - `dim` is the vector dimension. The cache stores it per row as defense-
17
+ // in-depth, but the model id alone scopes the join, so mixed-dim vectors
18
+ // for the same model id are a bug — not an expected runtime state.
19
+ // - `warm()` is the eager-load entry point. For providers with no warm-up
20
+ // cost (e.g. a stateless HTTP client), it can be a no-op that returns ok.
21
+ // - `embed()` returns one Float32Array per input text, in input order, all
22
+ // of length `dim`. `onProgress` (if given) fires after each sub-batch.
23
+ // Errors are returned as Result.err — the caller (reindex / search) is
24
+ // responsible for degrading gracefully to lexical-only ranking.
25
+ export {};
26
+ //# sourceMappingURL=embedding-provider.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding-provider.js","sourceRoot":"","sources":["../../src/search/embedding-provider.ts"],"names":[],"mappings":"AAAA,wEAAwE;AACxE,EAAE;AACF,8EAA8E;AAC9E,8EAA8E;AAC9E,4EAA4E;AAC5E,2EAA2E;AAC3E,0EAA0E;AAC1E,yEAAyE;AACzE,0EAA0E;AAC1E,0EAA0E;AAC1E,eAAe;AACf,EAAE;AACF,YAAY;AACZ,4EAA4E;AAC5E,uEAAuE;AACvE,6EAA6E;AAC7E,6EAA6E;AAC7E,uEAAuE;AACvE,4EAA4E;AAC5E,8EAA8E;AAC9E,6EAA6E;AAC7E,2EAA2E;AAC3E,2EAA2E;AAC3E,oEAAoE"}
@@ -1 +1 @@
1
- {"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../src/search/hybrid.ts"],"names":[],"mappings":"AAUA,OAAO,EAAgB,KAAK,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EAAM,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAKL,KAAK,OAAO,EACb,MAAM,wBAAwB,CAAC;AAIhC,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,eAAO,MAAM,eAAe,EAAE,aAA0C,CAAC;AAEzE,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,UAAU,GAAG,IAAI,CAAC;CAC1B;AAED,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,OAAO,CAAC;IACpB,OAAO,EAAE,aAAa,CAAC;IACvB,IAAI,EAAE,SAAS,EAAE,CAAC;CACnB;AAsHD,MAAM,WAAW,mBAAmB;IAClC,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAGD,wBAAsB,YAAY,CAChC,EAAE,EAAE,OAAO,EACX,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,mBAAwB,GAChC,OAAO,CAAC,MAAM,CAAC,kBAAkB,EAAE,KAAK,CAAC,CAAC,CAqB5C;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,OAAO,CAAC;IACpB,OAAO,EAAE,aAAa,CAAC;IACvB,IAAI,EAAE,SAAS,EAAE,CAAC;CACnB;AAOD,wBAAgB,aAAa,CAC3B,EAAE,EAAE,OAAO,EACX,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,mBAAwB,GAChC,MAAM,CAAC,mBAAmB,EAAE,KAAK,CAAC,CA8BpC"}
1
+ {"version":3,"file":"hybrid.d.ts","sourceRoot":"","sources":["../../src/search/hybrid.ts"],"names":[],"mappings":"AAkBA,OAAO,EAAgB,KAAK,UAAU,EAAE,MAAM,sBAAsB,CAAC;AACrE,OAAO,EAAM,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,EAKL,KAAK,OAAO,EACb,MAAM,wBAAwB,CAAC;AAIhC,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,eAAO,MAAM,eAAe,EAAE,aAA0C,CAAC;AAEzE,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,KAAK,EAAE,UAAU,GAAG,IAAI,CAAC;CAC1B;AAED,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,OAAO,CAAC;IACpB,OAAO,EAAE,aAAa,CAAC;IACvB,IAAI,EAAE,SAAS,EAAE,CAAC;CACnB;AAoLD,MAAM,WAAW,mBAAmB;IAClC,OAAO,CAAC,EAAE,aAAa,CAAC;IACxB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAGD,wBAAsB,YAAY,CAChC,EAAE,EAAE,OAAO,EACX,KAAK,EAAE,MAAM,EACb,OAAO,GAAE,mBAAwB,GAChC,OAAO,CAAC,MAAM,CAAC,kBAAkB,EAAE,KAAK,CAAC,CAAC,CAsB5C;AAED,MAAM,WAAW,mBAAmB;IAClC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,OAAO,CAAC;IACpB,OAAO,EAAE,aAAa,CAAC;IACvB,IAAI,EAAE,SAAS,EAAE,CAAC;CACnB;AAOD,wBAAgB,aAAa,CAC3B,EAAE,EAAE,OAAO,EACX,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,mBAAwB,GAChC,MAAM,CAAC,mBAAmB,EAAE,KAAK,CAAC,CA2CpC"}