daftari 1.7.1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/CHANGELOG.md +122 -0
  2. package/README.md +74 -25
  3. package/dist/index.d.ts.map +1 -1
  4. package/dist/index.js +85 -2
  5. package/dist/index.js.map +1 -1
  6. package/dist/search/bm25.d.ts +1 -17
  7. package/dist/search/bm25.d.ts.map +1 -1
  8. package/dist/search/bm25.js +43 -65
  9. package/dist/search/bm25.js.map +1 -1
  10. package/dist/search/embedding-provider.d.ts +8 -0
  11. package/dist/search/embedding-provider.d.ts.map +1 -0
  12. package/dist/search/embedding-provider.js +26 -0
  13. package/dist/search/embedding-provider.js.map +1 -0
  14. package/dist/search/hybrid.d.ts.map +1 -1
  15. package/dist/search/hybrid.js +106 -34
  16. package/dist/search/hybrid.js.map +1 -1
  17. package/dist/search/index-state.d.ts +10 -0
  18. package/dist/search/index-state.d.ts.map +1 -1
  19. package/dist/search/index-state.js +58 -3
  20. package/dist/search/index-state.js.map +1 -1
  21. package/dist/search/providers/local-minilm.d.ts +7 -0
  22. package/dist/search/providers/local-minilm.d.ts.map +1 -0
  23. package/dist/search/providers/local-minilm.js +114 -0
  24. package/dist/search/providers/local-minilm.js.map +1 -0
  25. package/dist/search/providers/openai-3-small.d.ts +5 -0
  26. package/dist/search/providers/openai-3-small.d.ts.map +1 -0
  27. package/dist/search/providers/openai-3-small.js +174 -0
  28. package/dist/search/providers/openai-3-small.js.map +1 -0
  29. package/dist/search/reindex.d.ts +3 -0
  30. package/dist/search/reindex.d.ts.map +1 -1
  31. package/dist/search/reindex.js +189 -39
  32. package/dist/search/reindex.js.map +1 -1
  33. package/dist/search/self-write.d.ts +4 -0
  34. package/dist/search/self-write.d.ts.map +1 -0
  35. package/dist/search/self-write.js +62 -0
  36. package/dist/search/self-write.js.map +1 -0
  37. package/dist/search/vector.d.ts +10 -1
  38. package/dist/search/vector.d.ts.map +1 -1
  39. package/dist/search/vector.js +102 -59
  40. package/dist/search/vector.js.map +1 -1
  41. package/dist/search/watcher.d.ts +18 -0
  42. package/dist/search/watcher.d.ts.map +1 -0
  43. package/dist/search/watcher.js +300 -0
  44. package/dist/search/watcher.js.map +1 -0
  45. package/dist/storage/index-db.d.ts +17 -4
  46. package/dist/storage/index-db.d.ts.map +1 -1
  47. package/dist/storage/index-db.js +329 -28
  48. package/dist/storage/index-db.js.map +1 -1
  49. package/dist/tools/search.d.ts.map +1 -1
  50. package/dist/tools/search.js +11 -3
  51. package/dist/tools/search.js.map +1 -1
  52. package/dist/tools/write.d.ts.map +1 -1
  53. package/dist/tools/write.js +9 -0
  54. package/dist/tools/write.js.map +1 -1
  55. package/dist/utils/config.d.ts +5 -0
  56. package/dist/utils/config.d.ts.map +1 -1
  57. package/dist/utils/config.js +53 -0
  58. package/dist/utils/config.js.map +1 -1
  59. package/package.json +4 -2
@@ -0,0 +1,62 @@
1
+ // Self-write suppression for the fs.watch reactive indexer.
2
+ //
3
+ // The write-path tools (vault_write, vault_append, vault_promote,
4
+ // vault_deprecate) already call indexDocument() in-process after writing the
5
+ // file to disk. The chokidar watcher will *also* see that write as an `add` /
6
+ // `change` event and would queue a redundant re-index. To avoid the duplicate
7
+ // work we mark each path the writer just touched and the watcher silently
8
+ // drops events whose path is still in the set.
9
+ //
10
+ // Implementation: a Map<absPath, expiresAt>. The writer calls
11
+ // noteSelfWrite(absPath) after the file is on disk and indexDocument() has
12
+ // returned, so by the time chokidar fires (no earlier than its 500ms
13
+ // per-path debounce window elapses) the path is already registered. The
14
+ // watcher calls consumeSelfWrite(absPath) when an event fires; if the path is
15
+ // present and the TTL has not lapsed, the event is dropped and the entry is
16
+ // removed. Expired entries are purged lazily on every check.
17
+ //
18
+ // Keys are normalized via path.resolve() so the writer and watcher agree on
19
+ // "the same path" regardless of how each side formed it (chokidar may emit
20
+ // paths with no symlink resolution; node:path.resolve does the same on macOS).
21
+ import { resolve } from "node:path";
22
+ // 1 second is long enough to cover the watcher's 500ms debounce window plus
23
+ // some slack for FSEvents latency, and short enough that a *real* external
24
+ // edit that lands within the window is at worst delayed-not-dropped: the next
25
+ // edit will still fire after the set has expired.
26
+ const SELF_WRITE_TTL_MS = 1_000;
27
+ const pending = new Map();
28
+ function purgeExpired(now) {
29
+ for (const [key, expiresAt] of pending) {
30
+ if (expiresAt <= now)
31
+ pending.delete(key);
32
+ }
33
+ }
34
+ // Register a path that Daftari itself just wrote so the watcher ignores its
35
+ // next event for that path. Path is resolved to its absolute form so the
36
+ // watcher's normalised path matches regardless of how it was supplied.
37
+ export function noteSelfWrite(absPath, now = Date.now()) {
38
+ purgeExpired(now);
39
+ pending.set(resolve(absPath), now + SELF_WRITE_TTL_MS);
40
+ }
41
+ // True if `absPath` is currently in the self-write set. When true the entry is
42
+ // consumed (a single self-write covers a single watcher event) so a second,
43
+ // genuinely-external edit shortly after is not silently dropped.
44
+ export function consumeSelfWrite(absPath, now = Date.now()) {
45
+ purgeExpired(now);
46
+ const key = resolve(absPath);
47
+ const expiresAt = pending.get(key);
48
+ if (expiresAt === undefined)
49
+ return false;
50
+ if (expiresAt <= now) {
51
+ pending.delete(key);
52
+ return false;
53
+ }
54
+ pending.delete(key);
55
+ return true;
56
+ }
57
+ // Tests load multiple suites against the singleton; clearing between them
58
+ // keeps cross-test pollution out of the set.
59
+ export function resetSelfWriteState() {
60
+ pending.clear();
61
+ }
62
+ //# sourceMappingURL=self-write.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"self-write.js","sourceRoot":"","sources":["../../src/search/self-write.ts"],"names":[],"mappings":"AAAA,4DAA4D;AAC5D,EAAE;AACF,kEAAkE;AAClE,6EAA6E;AAC7E,8EAA8E;AAC9E,8EAA8E;AAC9E,0EAA0E;AAC1E,+CAA+C;AAC/C,EAAE;AACF,8DAA8D;AAC9D,2EAA2E;AAC3E,qEAAqE;AACrE,wEAAwE;AACxE,8EAA8E;AAC9E,4EAA4E;AAC5E,6DAA6D;AAC7D,EAAE;AACF,4EAA4E;AAC5E,2EAA2E;AAC3E,+EAA+E;AAE/E,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,4EAA4E;AAC5E,2EAA2E;AAC3E,8EAA8E;AAC9E,kDAAkD;AAClD,MAAM,iBAAiB,GAAG,KAAK,CAAC;AAEhC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;AAE1C,SAAS,YAAY,CAAC,GAAW;IAC/B,KAAK,MAAM,CAAC,GAAG,EAAE,SAAS,CAAC,IAAI,OAAO,EAAE,CAAC;QACvC,IAAI,SAAS,IAAI,GAAG;YAAE,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAC5C,CAAC;AACH,CAAC;AAED,4EAA4E;AAC5E,yEAAyE;AACzE,uEAAuE;AACvE,MAAM,UAAU,aAAa,CAAC,OAAe,EAAE,MAAc,IAAI,CAAC,GAAG,EAAE;IACrE,YAAY,CAAC,GAAG,CAAC,CAAC;IAClB,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,GAAG,GAAG,iBAAiB,CAAC,CAAC;AACzD,CAAC;AAED,+EAA+E;AAC/E,4EAA4E;AAC5E,iEAAiE;AACjE,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,MAAc,IAAI,CAAC,GAAG,EAAE;IACxE,YAAY,CAAC,GAAG,CAAC,CAAC;IAClB,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACnC,IAAI,SAAS,KAAK,SAAS;QAAE,OAAO,KAAK,CAAC;IAC1C,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;QACrB,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACpB,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IACpB,OAAO,IAAI,CAAC;AACd,CAAC;AAED,0EAA0E;AAC1E,6CAA6C;AAC7C,MAAM,UAAU,mBAAmB;IACjC,OAAO,CAAC,KAAK,EAAE,CAAC;AAClB,CAAC"}
@@ -1,10 +1,19 @@
1
1
  import { type Result } from "../frontmatter/types.js";
2
- export declare const EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
2
+ import type { EmbeddingProviderId } from "../utils/config.js";
3
+ import type { EmbeddingProvider } from "./embedding-provider.js";
4
+ export declare const EMBEDDING_MODEL = "local-minilm";
3
5
  export declare const EMBEDDING_DIM = 384;
4
6
  export declare const EMBED_BATCH_SIZE = 8;
5
7
  export declare function chunkText(text: string): string[];
6
8
  export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
7
9
  export declare function meanEmbedding(vectors: Float32Array[]): Float32Array | null;
10
+ export declare function setProvider(id: EmbeddingProviderId): void;
11
+ export declare function getProvider(): EmbeddingProvider;
12
+ export declare function setProviderForTests(provider: EmbeddingProvider): void;
13
+ export declare function resetProviderForTests(): void;
14
+ export declare function isModelLoaded(): boolean;
15
+ export declare function warmModel(): Promise<Result<void, Error>>;
16
+ export declare function resetExtractorForTests(): void;
8
17
  export declare function embed(texts: string[], onProgress?: (done: number, total: number) => void): Promise<Result<Float32Array[], Error>>;
9
18
  export declare function embedQuery(text: string): Promise<Result<Float32Array, Error>>;
10
19
  //# sourceMappingURL=vector.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":"AAYA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAE/D,eAAO,MAAM,eAAe,4BAA4B,CAAC;AACzD,eAAO,MAAM,aAAa,MAAM,CAAC;AAYjC,eAAO,MAAM,gBAAgB,IAAI,CAAC;AAQlC,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CA4BhD;AAGD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAczE;AAID,wBAAgB,aAAa,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,GAAG,IAAI,CAW1E;AAuBD,wBAAsB,KAAK,CACzB,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,GACjD,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,CAAC,CAAC,CA8BxC;AAGD,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC,CAMnF"}
1
+ {"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":"AAcA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAC/D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAiBjE,eAAO,MAAM,eAAe,iBAAiB,CAAC;AAG9C,eAAO,MAAM,aAAa,MAAmB,CAAC;AAM9C,eAAO,MAAM,gBAAgB,IAAI,CAAC;AAQlC,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CA4BhD;AAGD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAczE;AAID,wBAAgB,aAAa,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,GAAG,IAAI,CAW1E;AAgCD,wBAAgB,WAAW,CAAC,EAAE,EAAE,mBAAmB,GAAG,IAAI,CAGzD;AAID,wBAAgB,WAAW,IAAI,iBAAiB,CAE/C;AAMD,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,iBAAiB,GAAG,IAAI,CAErE;AAID,wBAAgB,qBAAqB,IAAI,IAAI,CAG5C;AAQD,wBAAgB,aAAa,IAAI,OAAO,CAIvC;AAMD,wBAAsB,SAAS,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAE9D;AAKD,wBAAgB,sBAAsB,IAAI,IAAI,CAE7C;AAKD,wBAAsB,KAAK,CACzB,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,GACjD,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,CAAC,CAAC,CAGxC;AAGD,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC,CAMnF"}
@@ -1,27 +1,34 @@
1
1
  // Vector (semantic) search half of hybrid search.
2
2
  //
3
- // Documents are split into chunks; each chunk is embedded into a 384-dim
4
- // vector with the all-MiniLM-L6-v2 sentence-transformer (run locally via
5
- // @huggingface/transformers no network at query time once the model is cached).
6
- // Similarity is cosine distance. Embeddings come back L2-normalised, so cosine
7
- // reduces to a dot product, but cosineSimilarity stays general for safety.
3
+ // Documents are split into chunks; each chunk is embedded via the active
4
+ // EmbeddingProvider (defaults to local-minilm: 384-dim sentence-transformers
5
+ // all-MiniLM-L6-v2 run locally via @huggingface/transformers). Similarity is
6
+ // cosine distance. Embeddings come back L2-normalised, so cosine reduces to a
7
+ // dot product, but cosineSimilarity stays general for safety.
8
8
  //
9
- // The model loads lazily and is memoised for the process. Loading can fail
10
- // (e.g. no network on first run, before the model is cached); embed() surfaces
11
- // that as Result.err so the caller can fall back to lexical-only ranking.
9
+ // The provider is selected by .daftari/config.yaml's `embeddings.provider`
10
+ // key and instantiated once per process (memoised by `setProvider` /
11
+ // `getProvider`). embed/embedQuery/warmModel/isModelLoaded delegate to the
12
+ // active provider, so the rest of the search stack (reindex.ts, hybrid.ts)
13
+ // is provider-agnostic.
12
14
  import { err, ok } from "../frontmatter/types.js";
13
- export const EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
14
- export const EMBEDDING_DIM = 384;
15
- // Texts are embedded in fixed-size sub-batches rather than one call. The model
16
- // pads every batch to its longest sequence and allocates activation tensors
17
- // proportional to the batch size, so an unbounded batch makes peak memory
18
- // scale with the whole vault a few hundred documents is enough to exhaust
19
- // RAM and stall in a GC death spiral. A small fixed batch keeps peak memory
20
- // flat regardless of vault size.
15
+ import { isLocalMinilmLoaded, LOCAL_MINILM_DIM, localMinilmProvider, resetLocalMinilmForTests, } from "./providers/local-minilm.js";
16
+ import { makeOpenAi3SmallProvider } from "./providers/openai-3-small.js";
17
+ // EMBEDDING_MODEL and EMBEDDING_DIM are retained as deprecated plain
18
+ // constants pointing at the local-minilm provider's values. They were the
19
+ // single embedding identity before this PR; reindex.ts, hybrid.ts and the
20
+ // tests imported them as literals (SQL binds, length comparisons). New code
21
+ // must read `getProvider().id` and `getProvider().dim` instead these
22
+ // exports are scheduled for removal next release.
21
23
  //
22
- // 8 was measured as the sweet spot: on CPU inference larger batches were both
23
- // heavier (more activation memory) and slower (more compute wasted padding
24
- // short chunks up to the batch's longest sequence), not faster.
24
+ // @deprecated Use `getProvider().id` instead.
25
+ export const EMBEDDING_MODEL = "local-minilm";
26
+ // @deprecated Use `getProvider().dim` instead.
27
+ export const EMBEDDING_DIM = LOCAL_MINILM_DIM;
28
+ // Texts are embedded in fixed-size sub-batches; see provider implementations.
29
+ // The constant lives here for tests that probe the local-minilm batching
30
+ // behaviour. (No provider exposes this directly through the interface
31
+ // because batching is an implementation detail.)
25
32
  export const EMBED_BATCH_SIZE = 8;
26
33
  const CHUNK_MAX_CHARS = 800;
27
34
  // Splits a document body into embeddable chunks. Paragraphs (blank-line
@@ -95,51 +102,87 @@ export function meanEmbedding(vectors) {
95
102
  sum[i] = sum[i] / vectors.length;
96
103
  return sum;
97
104
  }
98
- let extractorPromise = null;
99
- async function getExtractor() {
100
- if (!extractorPromise) {
101
- extractorPromise = import("@huggingface/transformers").then(({ pipeline }) => pipeline("feature-extraction", EMBEDDING_MODEL));
105
+ // --- Provider selection ----------------------------------------------------
106
+ // The active provider for this process. Memoised so a server run uses one
107
+ // provider for its whole lifetime; switching providers means restarting the
108
+ // server (and the next reindex populates a fresh row set under the new
109
+ // provider's id — the old rows stay in the cache as cheap insurance for
110
+ // switching back).
111
+ let activeProvider = localMinilmProvider;
112
+ // Resolves the active provider from a config id. The OPENAI_API_KEY presence
113
+ // has already been validated by loadConfig; if it's somehow missing here we
114
+ // fail loud rather than constructing a broken provider.
115
+ function instantiateProvider(id) {
116
+ switch (id) {
117
+ case "local-minilm":
118
+ return localMinilmProvider;
119
+ case "openai-3-small": {
120
+ const key = process.env.OPENAI_API_KEY;
121
+ if (!key) {
122
+ throw new Error("OPENAI_API_KEY is not set — cannot construct openai-3-small provider");
123
+ }
124
+ return makeOpenAi3SmallProvider(key);
125
+ }
102
126
  }
103
- return extractorPromise;
104
127
  }
105
- // Embeds texts in EMBED_BATCH_SIZE sub-batches so peak memory stays flat
106
- // regardless of how many texts are passed. Returns one Float32Array per input
107
- // text, in input order. An empty input yields an empty array without loading
108
- // the model. `onProgress` (if given) fires after each sub-batch with the count
109
- // embedded so far and the total — used to drive reindex progress output.
128
+ // Called once at server startup (after loadConfig). Idempotent for the same
129
+ // id subsequent calls with the same id are no-ops, so test code can call
130
+ // it freely without thrashing. A different id replaces the provider; tests
131
+ // rely on this.
132
+ export function setProvider(id) {
133
+ if (activeProvider.id === id)
134
+ return;
135
+ activeProvider = instantiateProvider(id);
136
+ }
137
+ // Returns the active provider. Default is local-minilm; setProvider() (which
138
+ // the server's main() invokes after loadConfig) swaps in another.
139
+ export function getProvider() {
140
+ return activeProvider;
141
+ }
142
+ // Test-only: install an arbitrary provider object. Used by reindex tests
143
+ // that need to simulate a provider switch without paying the network or
144
+ // model-load cost. Resets the local-minilm memoised extractor too so a
145
+ // later swap back to local-minilm starts cold.
146
+ export function setProviderForTests(provider) {
147
+ activeProvider = provider;
148
+ }
149
+ // Test-only: revert to the default local-minilm provider and clear its
150
+ // memoised extractor. Production code must not call this.
151
+ export function resetProviderForTests() {
152
+ activeProvider = localMinilmProvider;
153
+ resetLocalMinilmForTests();
154
+ }
155
+ // --- Provider-delegating surface (kept for back-compat) -------------------
156
+ // Returns true once the active provider's underlying model is loaded. For
157
+ // providers with no warm-up cost (e.g. the stateless OpenAI HTTP client)
158
+ // this is always true; for local-minilm it tracks the transformers.js
159
+ // extractor promise.
160
+ export function isModelLoaded() {
161
+ if (activeProvider.id === "local-minilm")
162
+ return isLocalMinilmLoaded();
163
+ // Stateless / always-ready providers are "loaded" by definition.
164
+ return true;
165
+ }
166
+ // Eagerly loads the active provider so the first user search does not pay
167
+ // the cold start. Intended to be invoked as a background `void warmModel()`
168
+ // after startup completes. Returns Result rather than throwing — a warm
169
+ // failure must never crash the server.
170
+ export async function warmModel() {
171
+ return activeProvider.warm();
172
+ }
173
+ // Test-only: clear the local-minilm memoised extractor so a fresh import is
174
+ // forced on the next call. Production code must not invoke this. Kept under
175
+ // the historic name for the existing lazy-model-load tests.
176
+ export function resetExtractorForTests() {
177
+ resetLocalMinilmForTests();
178
+ }
179
+ // Embeds texts via the active provider. Returns one Float32Array per input,
180
+ // in input order. An empty input yields an empty array. `onProgress` (if
181
+ // given) fires after each sub-batch.
110
182
  export async function embed(texts, onProgress) {
111
183
  if (texts.length === 0)
112
184
  return ok([]);
113
- try {
114
- const extractor = await getExtractor();
115
- const vectors = [];
116
- for (let start = 0; start < texts.length; start += EMBED_BATCH_SIZE) {
117
- const batch = texts.slice(start, start + EMBED_BATCH_SIZE);
118
- const output = await extractor(batch, {
119
- pooling: "mean",
120
- normalize: true,
121
- });
122
- const dim = output.dims[output.dims.length - 1] ?? EMBEDDING_DIM;
123
- for (let i = 0; i < batch.length; i++) {
124
- vectors.push(output.data.slice(i * dim, (i + 1) * dim));
125
- }
126
- // Progress is a best-effort side channel: a failing reporter (e.g. a
127
- // closed stderr pipe) must never abort embedding the vault.
128
- if (onProgress) {
129
- try {
130
- onProgress(vectors.length, texts.length);
131
- }
132
- catch {
133
- // ignore — progress reporting is not load-bearing
134
- }
135
- }
136
- }
137
- return ok(vectors);
138
- }
139
- catch (e) {
140
- const reason = e instanceof Error ? e.message : String(e);
141
- return err(new Error(`embedding failed: ${reason}`));
142
- }
185
+ return activeProvider.embed(texts, onProgress);
143
186
  }
144
187
  // Convenience wrapper for embedding a single query string.
145
188
  export async function embedQuery(text) {
@@ -1 +1 @@
1
- {"version":3,"file":"vector.js","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":"AAAA,kDAAkD;AAClD,EAAE;AACF,yEAAyE;AACzE,yEAAyE;AACzE,kFAAkF;AAClF,+EAA+E;AAC/E,2EAA2E;AAC3E,EAAE;AACF,2EAA2E;AAC3E,+EAA+E;AAC/E,0EAA0E;AAE1E,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,yBAAyB,CAAC;AAE/D,MAAM,CAAC,MAAM,eAAe,GAAG,yBAAyB,CAAC;AACzD,MAAM,CAAC,MAAM,aAAa,GAAG,GAAG,CAAC;AAEjC,+EAA+E;AAC/E,4EAA4E;AAC5E,0EAA0E;AAC1E,4EAA4E;AAC5E,4EAA4E;AAC5E,iCAAiC;AACjC,EAAE;AACF,8EAA8E;AAC9E,2EAA2E;AAC3E,gEAAgE;AAChE,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAElC,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B,wEAAwE;AACxE,2EAA2E;AAC3E,6EAA6E;AAC7E,4DAA4D;AAC5D,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,MAAM,UAAU,GAAG,IAAI;SACpB,KAAK,CAAC,SAAS,CAAC;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE/B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,MAAM,GAAG,eAAe,EAAE,CAAC;YAClC,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACrB,OAAO,GAAG,EAAE,CAAC;YACf,CAAC;YACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,eAAe,EAAE,CAAC;gBACtD,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC,CAAC,CAAC;YAClD,CAAC;YACD,SAAS;QACX,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,eAAe,IAAI,OAAO,EAAE,CAAC;YAClE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACrD,CAAC;IACH,CAAC;IACD,IAAI,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAClC,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;AACpD,CAAC;AAED,6EAA6E;AAC7E,MAAM,UAAU,gBAAgB,CAAC,CAAe,EAAE,CAAe;IAC/D,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACtD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAW,CAAC;QACzB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAW,CAAC;QACzB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;QACf,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IACD,IAAI,KAAK,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACzC,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AACrD,CAAC;AAED,6EAA6E;AAC7E,uEAAuE;AACvE,MAAM,UAAU,aAAa,CAAC,OAAuB;IACnD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC;IACpC,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC3B,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,MAAM,KAAK,GAAG;YAAE,SAAS;QAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAI,GAAG,CAAC,CAAC,CAAY,GAAI,CAAC,CAAC,CAAC,CAAY,CAAC;IAC/E,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;QAAE,GAAG,CAAC,CAAC,CAAC,GAAI,GAAG,CAAC,CAAC,CAAY,GAAG,OAAO,CAAC,MAAM,CAAC;IAC3E,OAAO,GAAG,CAAC;AACb,CAAC;AAOD,IAAI,gBAAgB,GAA8B,IAAI,CAAC;AAEvD,KAAK,UAAU,YAAY;IACzB,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACtB,gBAAgB,GAAG,MAAM,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,CAC3E,QAAQ,CAAC,oBAAoB,EAAE,eAAe,CAAC,CAC1B,CAAC;IAC1B,CAAC;IACD,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,yEAAyE;AACzE,8EAA8E;AAC9E,6EAA6E;AAC7E,+EAA+E;AAC/E,yEAAyE;AACzE,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,KAAe,EACf,UAAkD;IAElD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;IACtC,IAAI,CAAC;QACH,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,MAAM,OAAO,GAAmB,EAAE,CAAC;QACnC,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,KAAK,IAAI,gBAAgB,EAAE,CAAC;YACpE,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,gBAAgB,CAAC,CAAC;YAC3D,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,KAAK,EAAE;gBACpC,OAAO,EAAE,MAAM;gBACf,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YACH,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,IAAI,aAAa,CAAC;YACjE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC;YAC1D,CAAC;YACD,qEAAqE;YACrE,4DAA4D;YAC5D,IAAI,UAAU,EAAE,CAAC;gBACf,IAAI,CAAC;oBACH,UAAU,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;gBAC3C,CAAC;gBAAC,MAAM,CAAC;oBACP,kDAAkD;gBACpD,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,EAAE,CAAC,OAAO,CAAC,CAAC;IACrB,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,MAAM,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1D,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,qBAAqB,MAAM,EAAE,CAAC,CAAC,CAAC;IACvD,CAAC;AACH,CAAC;AAED,2DAA2D;AAC3D,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,IAAY;IAC3C,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACnC,IAAI,CAAC,MAAM,CAAC,EAAE;QAAE,OAAO,MAAM,CAAC;IAC9B,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC9B,IAAI,CAAC,KAAK;QAAE,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC,CAAC;IAClE,OAAO,EAAE,CAAC,KAAK,CAAC,CAAC;AACnB,CAAC"}
1
+ {"version":3,"file":"vector.js","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":"AAAA,kDAAkD;AAClD,EAAE;AACF,yEAAyE;AACzE,6EAA6E;AAC7E,6EAA6E;AAC7E,8EAA8E;AAC9E,8DAA8D;AAC9D,EAAE;AACF,2EAA2E;AAC3E,qEAAqE;AACrE,2EAA2E;AAC3E,2EAA2E;AAC3E,wBAAwB;AAExB,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,yBAAyB,CAAC;AAG/D,OAAO,EACL,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,wBAAwB,GACzB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAEzE,qEAAqE;AACrE,0EAA0E;AAC1E,0EAA0E;AAC1E,4EAA4E;AAC5E,uEAAuE;AACvE,kDAAkD;AAClD,EAAE;AACF,8CAA8C;AAC9C,MAAM,CAAC,MAAM,eAAe,GAAG,cAAc,CAAC;AAE9C,+CAA+C;AAC/C,MAAM,CAAC,MAAM,aAAa,GAAG,gBAAgB,CAAC;AAE9C,8EAA8E;AAC9E,yEAAyE;AACzE,sEAAsE;AACtE,iDAAiD;AACjD,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAElC,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B,wEAAwE;AACxE,2EAA2E;AAC3E,6EAA6E;AAC7E,4DAA4D;AAC5D,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,MAAM,UAAU,GAAG,IAAI;SACpB,KAAK,CAAC,SAAS,CAAC;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE/B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,MAAM,GAAG,eAAe,EAAE,CAAC;YAClC,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACrB,OAAO,GAAG,EAAE,CAAC;YACf,CAAC;YACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,eAAe,EAAE,CAAC;gBACtD,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC,CAAC,CAAC;YAClD,CAAC;YACD,SAAS;QACX,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,eAAe,IAAI,OAAO,EAAE,CAAC;YAClE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACrD,CAAC;IACH,CAAC;IACD,IAAI,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAClC,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;AACpD,CAAC;AAED,6EAA6E;AAC7E,MAAM,UAAU,gBAAgB,CAAC,CAAe,EAAE,CAAe;IAC/D,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACtD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAW,CAAC;QACzB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAW,CAAC;QACzB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;QACf,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IACD,IAAI,KAAK,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACzC,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AACrD,CAAC;AAED,6EAA6E;AAC7E,uEAAuE;AACvE,MAAM,UAAU,aAAa,CAAC,OAAuB;IACnD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC;IACpC,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC3B,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,MAAM,KAAK,GAAG;YAAE,SAAS;QAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAI,GAAG,CAAC,CAAC,CAAY,GAAI,CAAC,CAAC,CAAC,CAAY,CAAC;IAC/E,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;QAAE,GAAG,CAAC,CAAC,CAAC,GAAI,GAAG,CAAC,CAAC,CAAY,GAAG,OAAO,CAAC,MAAM,CAAC;IAC3E,OAAO,GAAG,CAAC;AACb,CAAC;AAED,8EAA8E;AAE9E,0EAA0E;AAC1E,4EAA4E;AAC5E,uEAAuE;AACvE,wEAAwE;AACxE,mBAAmB;AACnB,IAAI,cAAc,GAAsB,mBAAmB,CAAC;AAE5D,6EAA6E;AAC7E,4EAA4E;AAC5E,wDAAwD;AACxD,SAAS,mBAAmB,CAAC,EAAuB;IAClD,QAAQ,EAAE,EAAE,CAAC;QACX,KAAK,cAAc;YACjB,OAAO,mBAAmB,CAAC;QAC7B,KAAK,gBAAgB,CAAC,CAAC,CAAC;YACtB,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;YACvC,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,MAAM,IAAI,KAAK,CAAC,sEAAsE,CAAC,CAAC;YAC1F,CAAC;YACD,OAAO,wBAAwB,CAAC,GAAG,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;AACH,CAAC;AAED,4EAA4E;AAC5E,2EAA2E;AAC3E,2EAA2E;AAC3E,gBAAgB;AAChB,MAAM,UAAU,WAAW,CAAC,EAAuB;IACjD,IAAI,cAAc,CAAC,EAAE,KAAK,EAAE;QAAE,OAAO;IACrC,cAAc,GAAG,mBAAmB,CAAC,EAAE,CAAC,CAAC;AAC3C,CAAC;AAED,6EAA6E;AAC7E,kEAAkE;AAClE,MAAM,UAAU,WAAW;IACzB,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,yEAAyE;AACzE,wEAAwE;AACxE,uEAAuE;AACvE,+CAA+C;AAC/C,MAAM,UAAU,mBAAmB,CAAC,QAA2B;IAC7D,cAAc,GAAG,QAAQ,CAAC;AAC5B,CAAC;AAED,uEAAuE;AACvE,0DAA0D;AAC1D,MAAM,UAAU,qBAAqB;IACnC,cAAc,GAAG,mBAAmB,CAAC;IACrC,wBAAwB,EAAE,CAAC;AAC7B,CAAC;AAED,6EAA6E;AAE7E,0EAA0E;AAC1E,yEAAyE;AACzE,sEAAsE;AACtE,qBAAqB;AACrB,MAAM,UAAU,aAAa;IAC3B,IAAI,cAAc,CAAC,EAAE,KAAK,cAAc;QAAE,OAAO,mBAAmB,EAAE,CAAC;IACvE,iEAAiE;IACjE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,0EAA0E;AAC1E,4EAA4E;AAC5E,wEAAwE;AACxE,uCAAuC;AACvC,MAAM,CAAC,KAAK,UAAU,SAAS;IAC7B,OAAO,cAAc,CAAC,IAAI,EAAE,CAAC;AAC/B,CAAC;AAED,4EAA4E;AAC5E,4EAA4E;AAC5E,4DAA4D;AAC5D,MAAM,UAAU,sBAAsB;IACpC,wBAAwB,EAAE,CAAC;AAC7B,CAAC;AAED,4EAA4E;AAC5E,yEAAyE;AACzE,qCAAqC;AACrC,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,KAAe,EACf,UAAkD;IAElD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;IACtC,OAAO,cAAc,CAAC,KAAK,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;AACjD,CAAC;AAED,2DAA2D;AAC3D,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,IAAY;IAC3C,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACnC,IAAI,CAAC,MAAM,CAAC,EAAE;QAAE,OAAO,MAAM,CAAC;IAC9B,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC9B,IAAI,CAAC,KAAK;QAAE,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC,CAAC;IAClE,OAAO,EAAE,CAAC,KAAK,CAAC,CAAC;AACnB,CAAC"}
@@ -0,0 +1,18 @@
1
+ import { type FSWatcher } from "chokidar";
2
+ import { type Result } from "../frontmatter/types.js";
3
+ export declare const WATCH_DEBOUNCE_MS = 500;
4
+ export interface VaultWatcher {
5
+ close: () => Promise<void>;
6
+ }
7
+ export interface WatcherOptions {
8
+ log?: (msg: string) => void;
9
+ debounceMs?: number;
10
+ indexFn?: (vaultRoot: string, relPath: string) => Promise<Result<unknown, Error>>;
11
+ deleteFn?: (vaultRoot: string, relPath: string) => Promise<Result<unknown, Error>>;
12
+ statFn?: (absPath: string) => Promise<{
13
+ exists: boolean;
14
+ }>;
15
+ watcherFactory?: (vaultRoot: string) => FSWatcher;
16
+ }
17
+ export declare function startWatcher(vaultRoot: string, opts?: WatcherOptions): VaultWatcher;
18
+ //# sourceMappingURL=watcher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"watcher.d.ts","sourceRoot":"","sources":["../../src/search/watcher.ts"],"names":[],"mappings":"AAgCA,OAAO,EAAuB,KAAK,SAAS,EAAE,MAAM,UAAU,CAAC;AAC/D,OAAO,EAAM,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAY1D,eAAO,MAAM,iBAAiB,MAAM,CAAC;AAErC,MAAM,WAAW,YAAY;IAI3B,KAAK,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;CAC5B;AAED,MAAM,WAAW,cAAc;IAG7B,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAC;IAG5B,UAAU,CAAC,EAAE,MAAM,CAAC;IAIpB,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;IAClF,QAAQ,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;IACnF,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC;QAAE,MAAM,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;IAG3D,cAAc,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,SAAS,CAAC;CACnD;AA2GD,wBAAgB,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,YAAY,CAmKvF"}
@@ -0,0 +1,300 @@
1
+ // fs.watch reactive indexing.
2
+ //
3
+ // Daftari's startup freshness check (search/reindex.ts isIndexFresh) keeps
4
+ // the index honest across *restarts*: a manifest of path→mtime is compared
5
+ // to disk and a full reindex runs when anything has drifted. But while the
6
+ // server is up, an editor save, a sync engine pull, or a scripted writer
7
+ // will rewrite a vault file *out of band* — Daftari's write-path tools are
8
+ // not the only writer. Without a watcher the index drifts until the next
9
+ // startup. The watcher closes that gap: chokidar listens on the vault root,
10
+ // and `add` / `change` events trigger an indexDocument() pass for that one
11
+ // file; `unlink` events evict the doc from the index after a re-stat
12
+ // confirms the file really is gone (FSEvents on macOS, iCloud, and Dropbox
13
+ // emit phantom unlink+add pairs during atomic-rename saves).
14
+ //
15
+ // Per-path debounce: editors save in bursts (atomic rename: write tmp,
16
+ // rename, delete tmp), so a single user save can produce 3-5 chokidar
17
+ // events for the same path inside a few ms. We collect events into a Map
18
+ // keyed by relative path; each touch resets a 500ms timer; when the timer
19
+ // fires we run the indexer once. Different paths debounce independently.
20
+ //
21
+ // Self-write suppression: when the write-path tools (vault_write etc.)
22
+ // finish, they note the absolute path in search/self-write.ts. The watcher
23
+ // consults that set when its debounce fires and silently drops the event if
24
+ // the path is registered, so the in-process indexDocument() the writer
25
+ // already ran is not duplicated.
26
+ //
27
+ // Errors from chokidar are logged to stderr but never crash the server.
28
+ // stderr is used throughout so the MCP stdio JSON-RPC stream on stdout
29
+ // stays clean.
30
+ import { stat } from "node:fs/promises";
31
+ import { relative, resolve, sep } from "node:path";
32
+ import { default as chokidar } from "chokidar";
33
+ import { ok } from "../frontmatter/types.js";
34
+ import { deleteDocument, openIndexDb } from "../storage/index-db.js";
35
+ import { resolveVaultPath } from "../storage/local.js";
36
+ import { getIndexStatus, markPathIndexing, markPathReady } from "./index-state.js";
37
+ import { indexDocument } from "./reindex.js";
38
+ import { consumeSelfWrite } from "./self-write.js";
39
+ import { getProvider } from "./vector.js";
40
+ // 500ms is the floor the design locks in: short enough for the index to feel
41
+ // live to a human typing in their editor, long enough to coalesce an
42
+ // atomic-rename burst (write tmp, rename onto target, delete tmp) into a
43
+ // single indexer call.
44
+ export const WATCH_DEBOUNCE_MS = 500;
45
+ // Defaults so production callers only need to pass vaultRoot. Pulled out so
46
+ // the option-resolution at the top of startWatcher reads as one block.
47
+ function resolveOptions(opts) {
48
+ return {
49
+ log: opts.log ?? ((msg) => process.stderr.write(msg)),
50
+ debounceMs: opts.debounceMs ?? WATCH_DEBOUNCE_MS,
51
+ indexFn: opts.indexFn ?? indexDocument,
52
+ deleteFn: opts.deleteFn ?? defaultDeleteFn,
53
+ statFn: opts.statFn ?? defaultStatFn,
54
+ watcherFactory: opts.watcherFactory,
55
+ };
56
+ }
57
+ // Default unlink handler: open the index db, drop the document and its
58
+ // chunks, and patch the manifest so the next startup freshness check does
59
+ // not see a missing-on-disk entry as drift. The embeddings cache is left
60
+ // alone (content-addressed; reaped by the next full reindex's gc pass).
61
+ async function defaultDeleteFn(vaultRoot, relPath) {
62
+ const dbResult = openIndexDb(vaultRoot, getProvider().dim);
63
+ if (!dbResult.ok)
64
+ return dbResult;
65
+ const db = dbResult.value;
66
+ try {
67
+ deleteDocument(db, relPath);
68
+ // Patch the manifest in place. Reusing reindex.ts internals would
69
+ // require an export churn for one use; the meta row is a JSON blob
70
+ // and a short read-modify-write under WAL is safe here.
71
+ const row = db.prepare("SELECT value FROM meta WHERE key = ?").get("vault_manifest");
72
+ if (row) {
73
+ try {
74
+ const manifest = JSON.parse(row.value);
75
+ if (relPath in manifest) {
76
+ delete manifest[relPath];
77
+ db.prepare("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value").run("vault_manifest", JSON.stringify(manifest));
78
+ }
79
+ }
80
+ catch {
81
+ // A malformed manifest is non-fatal here — the next full reindex
82
+ // rewrites it from scratch.
83
+ }
84
+ }
85
+ return ok(undefined);
86
+ }
87
+ finally {
88
+ db.close();
89
+ }
90
+ }
91
+ async function defaultStatFn(absPath) {
92
+ try {
93
+ await stat(absPath);
94
+ return { exists: true };
95
+ }
96
+ catch {
97
+ return { exists: false };
98
+ }
99
+ }
100
+ // Maps a chokidar-emitted absolute path back to the vault-relative POSIX
101
+ // path indexDocument / deleteDocument expect. Returns null when the path is
102
+ // outside the vault, which can happen with symlinks chokidar followed.
103
+ function toVaultRelative(vaultRoot, absPath) {
104
+ const root = resolve(vaultRoot);
105
+ const abs = resolve(absPath);
106
+ const rel = relative(root, abs);
107
+ if (rel.length === 0)
108
+ return null;
109
+ if (rel.startsWith(".."))
110
+ return null;
111
+ // chokidar emits OS-native separators; index storage uses POSIX. Normalise.
112
+ return sep === "/" ? rel : rel.split(sep).join("/");
113
+ }
114
+ // Returns true when chokidar's path points inside a directory we want to
115
+ // ignore (the .daftari control dir, .git, any other hidden top-level path).
116
+ // chokidar's `ignored` option already excludes these at watch time, but we
117
+ // double-check at dispatch time because chokidar sometimes ignores its own
118
+ // `ignored` pattern for `unlinkDir` events on macOS.
119
+ function isIgnoredPath(relPath) {
120
+ // Anything inside .daftari/ is the index itself or a lock file. Watching
121
+ // it would feed our own writes back as events.
122
+ if (relPath.startsWith(".daftari/") || relPath === ".daftari")
123
+ return true;
124
+ // .git/ — same problem, plus we don't index git internals.
125
+ if (relPath.startsWith(".git/") || relPath === ".git")
126
+ return true;
127
+ // Other hidden top-level paths (editor swap files, etc).
128
+ const first = relPath.split("/")[0] ?? "";
129
+ if (first.startsWith(".") && first !== ".")
130
+ return true;
131
+ return false;
132
+ }
133
+ // Markdown-only: chokidar watches every file under the root, but only .md
134
+ // files are indexed. Skipping non-markdown here keeps random sibling files
135
+ // (LICENSE, CHANGELOG.md aside — .md, that counts — images, .DS_Store) from
136
+ // firing redundant debounces and indexer calls.
137
+ function isMarkdown(relPath) {
138
+ return relPath.toLowerCase().endsWith(".md");
139
+ }
140
+ // Starts watching `vaultRoot`. Returns a handle whose close() shuts the
141
+ // watcher down. The caller is responsible for honouring the `watch` config
142
+ // flag — startWatcher itself does not consult config, so tests can drive it
143
+ // without a config file.
144
+ export function startWatcher(vaultRoot, opts = {}) {
145
+ const resolved = resolveOptions(opts);
146
+ const root = resolve(vaultRoot);
147
+ const pending = new Map();
148
+ // Spawn chokidar (or the injected fake). The ignored pattern mirrors
149
+ // listFiles in storage/local.ts so the watcher and listing agree on
150
+ // "what's vault content".
151
+ let watcher;
152
+ if (resolved.watcherFactory) {
153
+ watcher = resolved.watcherFactory(root);
154
+ }
155
+ else {
156
+ watcher = chokidar.watch(root, {
157
+ ignored: (p) => {
158
+ // chokidar v4: ignored is called for every path. Return true to skip.
159
+ // The root itself must NOT be ignored.
160
+ if (p === root)
161
+ return false;
162
+ const rel = toVaultRelative(root, p);
163
+ if (rel === null)
164
+ return false;
165
+ return isIgnoredPath(rel);
166
+ },
167
+ ignoreInitial: true, // startup freshness check already covers the initial state
168
+ persistent: true,
169
+ followSymlinks: false,
170
+ awaitWriteFinish: false, // we run our own debounce
171
+ });
172
+ }
173
+ let closed = false;
174
+ // Single dispatcher for "the debounce window for `relPath` elapsed — do
175
+ // the work". Pulls the lastEvent so a unlink-then-add inside the window
176
+ // is treated as a change (FSEvents atomic-save quirk).
177
+ async function dispatch(relPath, lastEvent) {
178
+ if (closed)
179
+ return;
180
+ // Self-write suppression. The write-path tools register the absolute
181
+ // path after their in-process indexDocument() returns; if it's there,
182
+ // the event was Daftari's own write and the index is already current.
183
+ const resolvedAbs = resolveVaultPath(root, relPath);
184
+ if (resolvedAbs.ok && consumeSelfWrite(resolvedAbs.value)) {
185
+ return;
186
+ }
187
+ // While a full reindex is running, the indexer is rebuilding from
188
+ // scratch — a per-file index call would race the bulk write and may
189
+ // be wiped by clearIndex(). Defer the event by re-scheduling a fresh
190
+ // debounce window; if the reindex finishes before the next event, the
191
+ // already-current-on-disk content will be picked up on the next external
192
+ // change anyway, and the manifest's mtime will match either way.
193
+ const status = getIndexStatus();
194
+ if (status.status === "indexing") {
195
+ pending.delete(relPath);
196
+ const timer = setTimeout(() => {
197
+ const p = pending.get(relPath);
198
+ if (!p)
199
+ return;
200
+ pending.delete(relPath);
201
+ void dispatch(relPath, p.lastEvent);
202
+ }, resolved.debounceMs);
203
+ pending.set(relPath, { timer, lastEvent });
204
+ return;
205
+ }
206
+ markPathIndexing(relPath);
207
+ try {
208
+ if (lastEvent === "unlink") {
209
+ // FSEvents (macOS), iCloud, and Dropbox emit phantom unlink+add
210
+ // pairs during atomic-rename saves. Re-stat before deleting: if
211
+ // the file is back, treat the event as a change instead.
212
+ const absResolved = resolveVaultPath(root, relPath);
213
+ if (absResolved.ok) {
214
+ const present = await resolved.statFn(absResolved.value);
215
+ if (present.exists) {
216
+ const r = await resolved.indexFn(root, relPath);
217
+ if (!r.ok) {
218
+ resolved.log(`daftari: watcher: index update failed for ${relPath}: ${r.error.message}\n`);
219
+ }
220
+ return;
221
+ }
222
+ }
223
+ const d = await resolved.deleteFn(root, relPath);
224
+ if (!d.ok) {
225
+ resolved.log(`daftari: watcher: delete failed for ${relPath}: ${d.error.message}\n`);
226
+ }
227
+ return;
228
+ }
229
+ // add / change — both route to indexDocument.
230
+ const r = await resolved.indexFn(root, relPath);
231
+ if (!r.ok) {
232
+ resolved.log(`daftari: watcher: index update failed for ${relPath}: ${r.error.message}\n`);
233
+ }
234
+ }
235
+ catch (e) {
236
+ const reason = e instanceof Error ? e.message : String(e);
237
+ resolved.log(`daftari: watcher: dispatch crashed for ${relPath}: ${reason}\n`);
238
+ }
239
+ finally {
240
+ markPathReady(relPath);
241
+ }
242
+ }
243
+ // Schedule (or reschedule) the debounce timer for one path. Called for
244
+ // every chokidar event. The most recent event "wins" — an unlink
245
+ // followed by an add inside the window arrives at dispatch() as an add.
246
+ function schedule(relPath, event) {
247
+ if (closed)
248
+ return;
249
+ if (isIgnoredPath(relPath))
250
+ return;
251
+ if (!isMarkdown(relPath))
252
+ return;
253
+ const existing = pending.get(relPath);
254
+ if (existing)
255
+ clearTimeout(existing.timer);
256
+ const timer = setTimeout(() => {
257
+ const p = pending.get(relPath);
258
+ if (!p)
259
+ return;
260
+ pending.delete(relPath);
261
+ void dispatch(relPath, p.lastEvent);
262
+ }, resolved.debounceMs);
263
+ pending.set(relPath, { timer, lastEvent: event });
264
+ }
265
+ // Chokidar's per-event handlers. Each maps a chokidar event to our
266
+ // schedule() call. The `addDir` / `unlinkDir` events are intentionally
267
+ // ignored — per-file events cover everything we care about and a
268
+ // directory delete fires unlink for each contained file anyway.
269
+ watcher.on("add", (p) => {
270
+ const rel = toVaultRelative(root, p);
271
+ if (rel)
272
+ schedule(rel, "add");
273
+ });
274
+ watcher.on("change", (p) => {
275
+ const rel = toVaultRelative(root, p);
276
+ if (rel)
277
+ schedule(rel, "change");
278
+ });
279
+ watcher.on("unlink", (p) => {
280
+ const rel = toVaultRelative(root, p);
281
+ if (rel)
282
+ schedule(rel, "unlink");
283
+ });
284
+ watcher.on("error", (e) => {
285
+ const reason = e instanceof Error ? e.message : String(e);
286
+ resolved.log(`daftari: watcher error: ${reason}\n`);
287
+ });
288
+ return {
289
+ close: async () => {
290
+ if (closed)
291
+ return;
292
+ closed = true;
293
+ for (const { timer } of pending.values())
294
+ clearTimeout(timer);
295
+ pending.clear();
296
+ await watcher.close();
297
+ },
298
+ };
299
+ }
300
+ //# sourceMappingURL=watcher.js.map