daftari 1.7.1 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +122 -0
- package/README.md +74 -25
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +85 -2
- package/dist/index.js.map +1 -1
- package/dist/search/bm25.d.ts +1 -17
- package/dist/search/bm25.d.ts.map +1 -1
- package/dist/search/bm25.js +43 -65
- package/dist/search/bm25.js.map +1 -1
- package/dist/search/embedding-provider.d.ts +8 -0
- package/dist/search/embedding-provider.d.ts.map +1 -0
- package/dist/search/embedding-provider.js +26 -0
- package/dist/search/embedding-provider.js.map +1 -0
- package/dist/search/hybrid.d.ts.map +1 -1
- package/dist/search/hybrid.js +106 -34
- package/dist/search/hybrid.js.map +1 -1
- package/dist/search/index-state.d.ts +10 -0
- package/dist/search/index-state.d.ts.map +1 -1
- package/dist/search/index-state.js +58 -3
- package/dist/search/index-state.js.map +1 -1
- package/dist/search/providers/local-minilm.d.ts +7 -0
- package/dist/search/providers/local-minilm.d.ts.map +1 -0
- package/dist/search/providers/local-minilm.js +114 -0
- package/dist/search/providers/local-minilm.js.map +1 -0
- package/dist/search/providers/openai-3-small.d.ts +5 -0
- package/dist/search/providers/openai-3-small.d.ts.map +1 -0
- package/dist/search/providers/openai-3-small.js +174 -0
- package/dist/search/providers/openai-3-small.js.map +1 -0
- package/dist/search/reindex.d.ts +3 -0
- package/dist/search/reindex.d.ts.map +1 -1
- package/dist/search/reindex.js +189 -39
- package/dist/search/reindex.js.map +1 -1
- package/dist/search/self-write.d.ts +4 -0
- package/dist/search/self-write.d.ts.map +1 -0
- package/dist/search/self-write.js +62 -0
- package/dist/search/self-write.js.map +1 -0
- package/dist/search/vector.d.ts +10 -1
- package/dist/search/vector.d.ts.map +1 -1
- package/dist/search/vector.js +102 -59
- package/dist/search/vector.js.map +1 -1
- package/dist/search/watcher.d.ts +18 -0
- package/dist/search/watcher.d.ts.map +1 -0
- package/dist/search/watcher.js +300 -0
- package/dist/search/watcher.js.map +1 -0
- package/dist/storage/index-db.d.ts +17 -4
- package/dist/storage/index-db.d.ts.map +1 -1
- package/dist/storage/index-db.js +329 -28
- package/dist/storage/index-db.js.map +1 -1
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +11 -3
- package/dist/tools/search.js.map +1 -1
- package/dist/tools/write.d.ts.map +1 -1
- package/dist/tools/write.js +9 -0
- package/dist/tools/write.js.map +1 -1
- package/dist/utils/config.d.ts +5 -0
- package/dist/utils/config.d.ts.map +1 -1
- package/dist/utils/config.js +53 -0
- package/dist/utils/config.js.map +1 -1
- package/package.json +4 -2
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
// Self-write suppression for the fs.watch reactive indexer.
|
|
2
|
+
//
|
|
3
|
+
// The write-path tools (vault_write, vault_append, vault_promote,
|
|
4
|
+
// vault_deprecate) already call indexDocument() in-process after writing the
|
|
5
|
+
// file to disk. The chokidar watcher will *also* see that write as an `add` /
|
|
6
|
+
// `change` event and would queue a redundant re-index. To avoid the duplicate
|
|
7
|
+
// work we mark each path the writer just touched and the watcher silently
|
|
8
|
+
// drops events whose path is still in the set.
|
|
9
|
+
//
|
|
10
|
+
// Implementation: a Map<absPath, expiresAt>. The writer calls
|
|
11
|
+
// noteSelfWrite(absPath) after the file is on disk and indexDocument() has
|
|
12
|
+
// returned, so by the time chokidar fires (no earlier than its 500ms
|
|
13
|
+
// per-path debounce window elapses) the path is already registered. The
|
|
14
|
+
// watcher calls consumeSelfWrite(absPath) when an event fires; if the path is
|
|
15
|
+
// present and the TTL has not lapsed, the event is dropped and the entry is
|
|
16
|
+
// removed. Expired entries are purged lazily on every check.
|
|
17
|
+
//
|
|
18
|
+
// Keys are normalized via path.resolve() so the writer and watcher agree on
|
|
19
|
+
// "the same path" regardless of how each side formed it (chokidar may emit
|
|
20
|
+
// paths with no symlink resolution; node:path.resolve does the same on macOS).
|
|
21
|
+
import { resolve } from "node:path";
|
|
22
|
+
// 1 second is long enough to cover the watcher's 500ms debounce window plus
|
|
23
|
+
// some slack for FSEvents latency, and short enough that a *real* external
|
|
24
|
+
// edit that lands within the window is at worst delayed-not-dropped: the next
|
|
25
|
+
// edit will still fire after the set has expired.
|
|
26
|
+
const SELF_WRITE_TTL_MS = 1_000;
|
|
27
|
+
const pending = new Map();
|
|
28
|
+
function purgeExpired(now) {
|
|
29
|
+
for (const [key, expiresAt] of pending) {
|
|
30
|
+
if (expiresAt <= now)
|
|
31
|
+
pending.delete(key);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
// Register a path that Daftari itself just wrote so the watcher ignores its
|
|
35
|
+
// next event for that path. Path is resolved to its absolute form so the
|
|
36
|
+
// watcher's normalised path matches regardless of how it was supplied.
|
|
37
|
+
export function noteSelfWrite(absPath, now = Date.now()) {
|
|
38
|
+
purgeExpired(now);
|
|
39
|
+
pending.set(resolve(absPath), now + SELF_WRITE_TTL_MS);
|
|
40
|
+
}
|
|
41
|
+
// True if `absPath` is currently in the self-write set. When true the entry is
|
|
42
|
+
// consumed (a single self-write covers a single watcher event) so a second,
|
|
43
|
+
// genuinely-external edit shortly after is not silently dropped.
|
|
44
|
+
export function consumeSelfWrite(absPath, now = Date.now()) {
|
|
45
|
+
purgeExpired(now);
|
|
46
|
+
const key = resolve(absPath);
|
|
47
|
+
const expiresAt = pending.get(key);
|
|
48
|
+
if (expiresAt === undefined)
|
|
49
|
+
return false;
|
|
50
|
+
if (expiresAt <= now) {
|
|
51
|
+
pending.delete(key);
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
pending.delete(key);
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
// Tests load multiple suites against the singleton; clearing between them
|
|
58
|
+
// keeps cross-test pollution out of the set.
|
|
59
|
+
export function resetSelfWriteState() {
|
|
60
|
+
pending.clear();
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=self-write.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"self-write.js","sourceRoot":"","sources":["../../src/search/self-write.ts"],"names":[],"mappings":"AAAA,4DAA4D;AAC5D,EAAE;AACF,kEAAkE;AAClE,6EAA6E;AAC7E,8EAA8E;AAC9E,8EAA8E;AAC9E,0EAA0E;AAC1E,+CAA+C;AAC/C,EAAE;AACF,8DAA8D;AAC9D,2EAA2E;AAC3E,qEAAqE;AACrE,wEAAwE;AACxE,8EAA8E;AAC9E,4EAA4E;AAC5E,6DAA6D;AAC7D,EAAE;AACF,4EAA4E;AAC5E,2EAA2E;AAC3E,+EAA+E;AAE/E,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,4EAA4E;AAC5E,2EAA2E;AAC3E,8EAA8E;AAC9E,kDAAkD;AAClD,MAAM,iBAAiB,GAAG,KAAK,CAAC;AAEhC,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;AAE1C,SAAS,YAAY,CAAC,GAAW;IAC/B,KAAK,MAAM,CAAC,GAAG,EAAE,SAAS,CAAC,IAAI,OAAO,EAAE,CAAC;QACvC,IAAI,SAAS,IAAI,GAAG;YAAE,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAC5C,CAAC;AACH,CAAC;AAED,4EAA4E;AAC5E,yEAAyE;AACzE,uEAAuE;AACvE,MAAM,UAAU,aAAa,CAAC,OAAe,EAAE,MAAc,IAAI,CAAC,GAAG,EAAE;IACrE,YAAY,CAAC,GAAG,CAAC,CAAC;IAClB,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,GAAG,GAAG,iBAAiB,CAAC,CAAC;AACzD,CAAC;AAED,+EAA+E;AAC/E,4EAA4E;AAC5E,iEAAiE;AACjE,MAAM,UAAU,gBAAgB,CAAC,OAAe,EAAE,MAAc,IAAI,CAAC,GAAG,EAAE;IACxE,YAAY,CAAC,GAAG,CAAC,CAAC;IAClB,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;IAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACnC,IAAI,SAAS,KAAK,SAAS;QAAE,OAAO,KAAK,CAAC;IAC1C,IAAI,SAAS,IAAI,GAAG,EAAE,CAAC;QACrB,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACpB,OAAO,KAAK,CAAC;IACf,CAAC;IACD,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IACpB,OAAO,IAAI,CAAC;AACd,CAAC;AAED,0EAA0E;AAC1E,6CAA6C;AAC7C,MAAM,UAAU,mBAAmB;IACjC,OAAO,CAAC,KAAK,EAAE,CAAC;AAClB,CAAC"}
|
package/dist/search/vector.d.ts
CHANGED
|
@@ -1,10 +1,19 @@
|
|
|
1
1
|
import { type Result } from "../frontmatter/types.js";
|
|
2
|
-
|
|
2
|
+
import type { EmbeddingProviderId } from "../utils/config.js";
|
|
3
|
+
import type { EmbeddingProvider } from "./embedding-provider.js";
|
|
4
|
+
export declare const EMBEDDING_MODEL = "local-minilm";
|
|
3
5
|
export declare const EMBEDDING_DIM = 384;
|
|
4
6
|
export declare const EMBED_BATCH_SIZE = 8;
|
|
5
7
|
export declare function chunkText(text: string): string[];
|
|
6
8
|
export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
|
|
7
9
|
export declare function meanEmbedding(vectors: Float32Array[]): Float32Array | null;
|
|
10
|
+
export declare function setProvider(id: EmbeddingProviderId): void;
|
|
11
|
+
export declare function getProvider(): EmbeddingProvider;
|
|
12
|
+
export declare function setProviderForTests(provider: EmbeddingProvider): void;
|
|
13
|
+
export declare function resetProviderForTests(): void;
|
|
14
|
+
export declare function isModelLoaded(): boolean;
|
|
15
|
+
export declare function warmModel(): Promise<Result<void, Error>>;
|
|
16
|
+
export declare function resetExtractorForTests(): void;
|
|
8
17
|
export declare function embed(texts: string[], onProgress?: (done: number, total: number) => void): Promise<Result<Float32Array[], Error>>;
|
|
9
18
|
export declare function embedQuery(text: string): Promise<Result<Float32Array, Error>>;
|
|
10
19
|
//# sourceMappingURL=vector.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"vector.d.ts","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":"AAcA,OAAO,EAAW,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAC/D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AAC9D,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAiBjE,eAAO,MAAM,eAAe,iBAAiB,CAAC;AAG9C,eAAO,MAAM,aAAa,MAAmB,CAAC;AAM9C,eAAO,MAAM,gBAAgB,IAAI,CAAC;AAQlC,wBAAgB,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CA4BhD;AAGD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM,CAczE;AAID,wBAAgB,aAAa,CAAC,OAAO,EAAE,YAAY,EAAE,GAAG,YAAY,GAAG,IAAI,CAW1E;AAgCD,wBAAgB,WAAW,CAAC,EAAE,EAAE,mBAAmB,GAAG,IAAI,CAGzD;AAID,wBAAgB,WAAW,IAAI,iBAAiB,CAE/C;AAMD,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,iBAAiB,GAAG,IAAI,CAErE;AAID,wBAAgB,qBAAqB,IAAI,IAAI,CAG5C;AAQD,wBAAgB,aAAa,IAAI,OAAO,CAIvC;AAMD,wBAAsB,SAAS,IAAI,OAAO,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAE9D;AAKD,wBAAgB,sBAAsB,IAAI,IAAI,CAE7C;AAKD,wBAAsB,KAAK,CACzB,KAAK,EAAE,MAAM,EAAE,EACf,UAAU,CAAC,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,GACjD,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,KAAK,CAAC,CAAC,CAGxC;AAGD,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC,CAMnF"}
|
package/dist/search/vector.js
CHANGED
|
@@ -1,27 +1,34 @@
|
|
|
1
1
|
// Vector (semantic) search half of hybrid search.
|
|
2
2
|
//
|
|
3
|
-
// Documents are split into chunks; each chunk is embedded
|
|
4
|
-
//
|
|
5
|
-
// @huggingface/transformers
|
|
6
|
-
//
|
|
7
|
-
//
|
|
3
|
+
// Documents are split into chunks; each chunk is embedded via the active
|
|
4
|
+
// EmbeddingProvider (defaults to local-minilm: 384-dim sentence-transformers
|
|
5
|
+
// all-MiniLM-L6-v2 run locally via @huggingface/transformers). Similarity is
|
|
6
|
+
// cosine distance. Embeddings come back L2-normalised, so cosine reduces to a
|
|
7
|
+
// dot product, but cosineSimilarity stays general for safety.
|
|
8
8
|
//
|
|
9
|
-
// The
|
|
10
|
-
//
|
|
11
|
-
//
|
|
9
|
+
// The provider is selected by .daftari/config.yaml's `embeddings.provider`
|
|
10
|
+
// key and instantiated once per process (memoised by `setProvider` /
|
|
11
|
+
// `getProvider`). embed/embedQuery/warmModel/isModelLoaded delegate to the
|
|
12
|
+
// active provider, so the rest of the search stack (reindex.ts, hybrid.ts)
|
|
13
|
+
// is provider-agnostic.
|
|
12
14
|
import { err, ok } from "../frontmatter/types.js";
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
//
|
|
16
|
-
//
|
|
17
|
-
//
|
|
18
|
-
//
|
|
19
|
-
//
|
|
20
|
-
//
|
|
15
|
+
import { isLocalMinilmLoaded, LOCAL_MINILM_DIM, localMinilmProvider, resetLocalMinilmForTests, } from "./providers/local-minilm.js";
|
|
16
|
+
import { makeOpenAi3SmallProvider } from "./providers/openai-3-small.js";
|
|
17
|
+
// EMBEDDING_MODEL and EMBEDDING_DIM are retained as deprecated plain
|
|
18
|
+
// constants pointing at the local-minilm provider's values. They were the
|
|
19
|
+
// single embedding identity before this PR; reindex.ts, hybrid.ts and the
|
|
20
|
+
// tests imported them as literals (SQL binds, length comparisons). New code
|
|
21
|
+
// must read `getProvider().id` and `getProvider().dim` instead — these
|
|
22
|
+
// exports are scheduled for removal next release.
|
|
21
23
|
//
|
|
22
|
-
//
|
|
23
|
-
|
|
24
|
-
//
|
|
24
|
+
// @deprecated Use `getProvider().id` instead.
|
|
25
|
+
export const EMBEDDING_MODEL = "local-minilm";
|
|
26
|
+
// @deprecated Use `getProvider().dim` instead.
|
|
27
|
+
export const EMBEDDING_DIM = LOCAL_MINILM_DIM;
|
|
28
|
+
// Texts are embedded in fixed-size sub-batches; see provider implementations.
|
|
29
|
+
// The constant lives here for tests that probe the local-minilm batching
|
|
30
|
+
// behaviour. (No provider exposes this directly through the interface
|
|
31
|
+
// because batching is an implementation detail.)
|
|
25
32
|
export const EMBED_BATCH_SIZE = 8;
|
|
26
33
|
const CHUNK_MAX_CHARS = 800;
|
|
27
34
|
// Splits a document body into embeddable chunks. Paragraphs (blank-line
|
|
@@ -95,51 +102,87 @@ export function meanEmbedding(vectors) {
|
|
|
95
102
|
sum[i] = sum[i] / vectors.length;
|
|
96
103
|
return sum;
|
|
97
104
|
}
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
105
|
+
// --- Provider selection ----------------------------------------------------
|
|
106
|
+
// The active provider for this process. Memoised so a server run uses one
|
|
107
|
+
// provider for its whole lifetime; switching providers means restarting the
|
|
108
|
+
// server (and the next reindex populates a fresh row set under the new
|
|
109
|
+
// provider's id — the old rows stay in the cache as cheap insurance for
|
|
110
|
+
// switching back).
|
|
111
|
+
let activeProvider = localMinilmProvider;
|
|
112
|
+
// Resolves the active provider from a config id. The OPENAI_API_KEY presence
|
|
113
|
+
// has already been validated by loadConfig; if it's somehow missing here we
|
|
114
|
+
// fail loud rather than constructing a broken provider.
|
|
115
|
+
function instantiateProvider(id) {
|
|
116
|
+
switch (id) {
|
|
117
|
+
case "local-minilm":
|
|
118
|
+
return localMinilmProvider;
|
|
119
|
+
case "openai-3-small": {
|
|
120
|
+
const key = process.env.OPENAI_API_KEY;
|
|
121
|
+
if (!key) {
|
|
122
|
+
throw new Error("OPENAI_API_KEY is not set — cannot construct openai-3-small provider");
|
|
123
|
+
}
|
|
124
|
+
return makeOpenAi3SmallProvider(key);
|
|
125
|
+
}
|
|
102
126
|
}
|
|
103
|
-
return extractorPromise;
|
|
104
127
|
}
|
|
105
|
-
//
|
|
106
|
-
//
|
|
107
|
-
//
|
|
108
|
-
//
|
|
109
|
-
|
|
128
|
+
// Called once at server startup (after loadConfig). Idempotent for the same
|
|
129
|
+
// id — subsequent calls with the same id are no-ops, so test code can call
|
|
130
|
+
// it freely without thrashing. A different id replaces the provider; tests
|
|
131
|
+
// rely on this.
|
|
132
|
+
export function setProvider(id) {
|
|
133
|
+
if (activeProvider.id === id)
|
|
134
|
+
return;
|
|
135
|
+
activeProvider = instantiateProvider(id);
|
|
136
|
+
}
|
|
137
|
+
// Returns the active provider. Default is local-minilm; setProvider() (which
|
|
138
|
+
// the server's main() invokes after loadConfig) swaps in another.
|
|
139
|
+
export function getProvider() {
|
|
140
|
+
return activeProvider;
|
|
141
|
+
}
|
|
142
|
+
// Test-only: install an arbitrary provider object. Used by reindex tests
|
|
143
|
+
// that need to simulate a provider switch without paying the network or
|
|
144
|
+
// model-load cost. Resets the local-minilm memoised extractor too so a
|
|
145
|
+
// later swap back to local-minilm starts cold.
|
|
146
|
+
export function setProviderForTests(provider) {
|
|
147
|
+
activeProvider = provider;
|
|
148
|
+
}
|
|
149
|
+
// Test-only: revert to the default local-minilm provider and clear its
|
|
150
|
+
// memoised extractor. Production code must not call this.
|
|
151
|
+
export function resetProviderForTests() {
|
|
152
|
+
activeProvider = localMinilmProvider;
|
|
153
|
+
resetLocalMinilmForTests();
|
|
154
|
+
}
|
|
155
|
+
// --- Provider-delegating surface (kept for back-compat) -------------------
|
|
156
|
+
// Returns true once the active provider's underlying model is loaded. For
|
|
157
|
+
// providers with no warm-up cost (e.g. the stateless OpenAI HTTP client)
|
|
158
|
+
// this is always true; for local-minilm it tracks the transformers.js
|
|
159
|
+
// extractor promise.
|
|
160
|
+
export function isModelLoaded() {
|
|
161
|
+
if (activeProvider.id === "local-minilm")
|
|
162
|
+
return isLocalMinilmLoaded();
|
|
163
|
+
// Stateless / always-ready providers are "loaded" by definition.
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
// Eagerly loads the active provider so the first user search does not pay
|
|
167
|
+
// the cold start. Intended to be invoked as a background `void warmModel()`
|
|
168
|
+
// after startup completes. Returns Result rather than throwing — a warm
|
|
169
|
+
// failure must never crash the server.
|
|
170
|
+
export async function warmModel() {
|
|
171
|
+
return activeProvider.warm();
|
|
172
|
+
}
|
|
173
|
+
// Test-only: clear the local-minilm memoised extractor so a fresh import is
|
|
174
|
+
// forced on the next call. Production code must not invoke this. Kept under
|
|
175
|
+
// the historic name for the existing lazy-model-load tests.
|
|
176
|
+
export function resetExtractorForTests() {
|
|
177
|
+
resetLocalMinilmForTests();
|
|
178
|
+
}
|
|
179
|
+
// Embeds texts via the active provider. Returns one Float32Array per input,
|
|
180
|
+
// in input order. An empty input yields an empty array. `onProgress` (if
|
|
181
|
+
// given) fires after each sub-batch.
|
|
110
182
|
export async function embed(texts, onProgress) {
|
|
111
183
|
if (texts.length === 0)
|
|
112
184
|
return ok([]);
|
|
113
|
-
|
|
114
|
-
const extractor = await getExtractor();
|
|
115
|
-
const vectors = [];
|
|
116
|
-
for (let start = 0; start < texts.length; start += EMBED_BATCH_SIZE) {
|
|
117
|
-
const batch = texts.slice(start, start + EMBED_BATCH_SIZE);
|
|
118
|
-
const output = await extractor(batch, {
|
|
119
|
-
pooling: "mean",
|
|
120
|
-
normalize: true,
|
|
121
|
-
});
|
|
122
|
-
const dim = output.dims[output.dims.length - 1] ?? EMBEDDING_DIM;
|
|
123
|
-
for (let i = 0; i < batch.length; i++) {
|
|
124
|
-
vectors.push(output.data.slice(i * dim, (i + 1) * dim));
|
|
125
|
-
}
|
|
126
|
-
// Progress is a best-effort side channel: a failing reporter (e.g. a
|
|
127
|
-
// closed stderr pipe) must never abort embedding the vault.
|
|
128
|
-
if (onProgress) {
|
|
129
|
-
try {
|
|
130
|
-
onProgress(vectors.length, texts.length);
|
|
131
|
-
}
|
|
132
|
-
catch {
|
|
133
|
-
// ignore — progress reporting is not load-bearing
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
return ok(vectors);
|
|
138
|
-
}
|
|
139
|
-
catch (e) {
|
|
140
|
-
const reason = e instanceof Error ? e.message : String(e);
|
|
141
|
-
return err(new Error(`embedding failed: ${reason}`));
|
|
142
|
-
}
|
|
185
|
+
return activeProvider.embed(texts, onProgress);
|
|
143
186
|
}
|
|
144
187
|
// Convenience wrapper for embedding a single query string.
|
|
145
188
|
export async function embedQuery(text) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vector.js","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":"AAAA,kDAAkD;AAClD,EAAE;AACF,yEAAyE;AACzE,
|
|
1
|
+
{"version":3,"file":"vector.js","sourceRoot":"","sources":["../../src/search/vector.ts"],"names":[],"mappings":"AAAA,kDAAkD;AAClD,EAAE;AACF,yEAAyE;AACzE,6EAA6E;AAC7E,6EAA6E;AAC7E,8EAA8E;AAC9E,8DAA8D;AAC9D,EAAE;AACF,2EAA2E;AAC3E,qEAAqE;AACrE,2EAA2E;AAC3E,2EAA2E;AAC3E,wBAAwB;AAExB,OAAO,EAAE,GAAG,EAAE,EAAE,EAAe,MAAM,yBAAyB,CAAC;AAG/D,OAAO,EACL,mBAAmB,EACnB,gBAAgB,EAChB,mBAAmB,EACnB,wBAAwB,GACzB,MAAM,6BAA6B,CAAC;AACrC,OAAO,EAAE,wBAAwB,EAAE,MAAM,+BAA+B,CAAC;AAEzE,qEAAqE;AACrE,0EAA0E;AAC1E,0EAA0E;AAC1E,4EAA4E;AAC5E,uEAAuE;AACvE,kDAAkD;AAClD,EAAE;AACF,8CAA8C;AAC9C,MAAM,CAAC,MAAM,eAAe,GAAG,cAAc,CAAC;AAE9C,+CAA+C;AAC/C,MAAM,CAAC,MAAM,aAAa,GAAG,gBAAgB,CAAC;AAE9C,8EAA8E;AAC9E,yEAAyE;AACzE,sEAAsE;AACtE,iDAAiD;AACjD,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,CAAC;AAElC,MAAM,eAAe,GAAG,GAAG,CAAC;AAE5B,wEAAwE;AACxE,2EAA2E;AAC3E,6EAA6E;AAC7E,4DAA4D;AAC5D,MAAM,UAAU,SAAS,CAAC,IAAY;IACpC,MAAM,UAAU,GAAG,IAAI;SACpB,KAAK,CAAC,SAAS,CAAC;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAE/B,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC9B,IAAI,IAAI,CAAC,MAAM,GAAG,eAAe,EAAE,CAAC;YAClC,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACrB,OAAO,GAAG,EAAE,CAAC;YACf,CAAC;YACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,IAAI,eAAe,EAAE,CAAC;gBACtD,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,eAAe,CAAC,CAAC,CAAC;YAClD,CAAC;YACD,SAAS;QACX,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,eAAe,IAAI,OAAO,EAAE,CAAC;YAClE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACrD,CAAC;IACH,CAAC;IACD,IAAI,OAAO;QAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAClC,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;AACpD,CAAC;AAED,6EAA6E;AAC7E,MAAM,UAAU,gBAAgB,CAAC,CAAe,EAAE,CAAe;IAC/D,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACtD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAW,CAAC;QACzB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAW,CAAC;QACzB,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;QACb,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;QACf,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC;IACjB,CAAC;IACD,IAAI,KAAK,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IACzC,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AACrD,CAAC;AAED,6EAA6E;AAC7E,uEAAuE;AACvE,MAAM,UAAU,aAAa,CAAC,OAAuB;IACnD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACtC,MAAM,GAAG,GAAG,OAAO,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC;IACpC,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAC3B,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC;IAClC,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,MAAM,KAAK,GAAG;YAAE,SAAS;QAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAI,GAAG,CAAC,CAAC,CAAY,GAAI,CAAC,CAAC,CAAC,CAAY,CAAC;IAC/E,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;QAAE,GAAG,CAAC,CAAC,CAAC,GAAI,GAAG,CAAC,CAAC,CAAY,GAAG,OAAO,CAAC,MAAM,CAAC;IAC3E,OAAO,GAAG,CAAC;AACb,CAAC;AAED,8EAA8E;AAE9E,0EAA0E;AAC1E,4EAA4E;AAC5E,uEAAuE;AACvE,wEAAwE;AACxE,mBAAmB;AACnB,IAAI,cAAc,GAAsB,mBAAmB,CAAC;AAE5D,6EAA6E;AAC7E,4EAA4E;AAC5E,wDAAwD;AACxD,SAAS,mBAAmB,CAAC,EAAuB;IAClD,QAAQ,EAAE,EAAE,CAAC;QACX,KAAK,cAAc;YACjB,OAAO,mBAAmB,CAAC;QAC7B,KAAK,gBAAgB,CAAC,CAAC,CAAC;YACtB,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;YACvC,IAAI,CAAC,GAAG,EAAE,CAAC;gBACT,MAAM,IAAI,KAAK,CAAC,sEAAsE,CAAC,CAAC;YAC1F,CAAC;YACD,OAAO,wBAAwB,CAAC,GAAG,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;AACH,CAAC;AAED,4EAA4E;AAC5E,2EAA2E;AAC3E,2EAA2E;AAC3E,gBAAgB;AAChB,MAAM,UAAU,WAAW,CAAC,EAAuB;IACjD,IAAI,cAAc,CAAC,EAAE,KAAK,EAAE;QAAE,OAAO;IACrC,cAAc,GAAG,mBAAmB,CAAC,EAAE,CAAC,CAAC;AAC3C,CAAC;AAED,6EAA6E;AAC7E,kEAAkE;AAClE,MAAM,UAAU,WAAW;IACzB,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,yEAAyE;AACzE,wEAAwE;AACxE,uEAAuE;AACvE,+CAA+C;AAC/C,MAAM,UAAU,mBAAmB,CAAC,QAA2B;IAC7D,cAAc,GAAG,QAAQ,CAAC;AAC5B,CAAC;AAED,uEAAuE;AACvE,0DAA0D;AAC1D,MAAM,UAAU,qBAAqB;IACnC,cAAc,GAAG,mBAAmB,CAAC;IACrC,wBAAwB,EAAE,CAAC;AAC7B,CAAC;AAED,6EAA6E;AAE7E,0EAA0E;AAC1E,yEAAyE;AACzE,sEAAsE;AACtE,qBAAqB;AACrB,MAAM,UAAU,aAAa;IAC3B,IAAI,cAAc,CAAC,EAAE,KAAK,cAAc;QAAE,OAAO,mBAAmB,EAAE,CAAC;IACvE,iEAAiE;IACjE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,0EAA0E;AAC1E,4EAA4E;AAC5E,wEAAwE;AACxE,uCAAuC;AACvC,MAAM,CAAC,KAAK,UAAU,SAAS;IAC7B,OAAO,cAAc,CAAC,IAAI,EAAE,CAAC;AAC/B,CAAC;AAED,4EAA4E;AAC5E,4EAA4E;AAC5E,4DAA4D;AAC5D,MAAM,UAAU,sBAAsB;IACpC,wBAAwB,EAAE,CAAC;AAC7B,CAAC;AAED,4EAA4E;AAC5E,yEAAyE;AACzE,qCAAqC;AACrC,MAAM,CAAC,KAAK,UAAU,KAAK,CACzB,KAAe,EACf,UAAkD;IAElD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC,EAAE,CAAC,CAAC;IACtC,OAAO,cAAc,CAAC,KAAK,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;AACjD,CAAC;AAED,2DAA2D;AAC3D,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,IAAY;IAC3C,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;IACnC,IAAI,CAAC,MAAM,CAAC,EAAE;QAAE,OAAO,MAAM,CAAC;IAC9B,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC9B,IAAI,CAAC,KAAK;QAAE,OAAO,GAAG,CAAC,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC,CAAC;IAClE,OAAO,EAAE,CAAC,KAAK,CAAC,CAAC;AACnB,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { type FSWatcher } from "chokidar";
|
|
2
|
+
import { type Result } from "../frontmatter/types.js";
|
|
3
|
+
export declare const WATCH_DEBOUNCE_MS = 500;
|
|
4
|
+
export interface VaultWatcher {
|
|
5
|
+
close: () => Promise<void>;
|
|
6
|
+
}
|
|
7
|
+
export interface WatcherOptions {
|
|
8
|
+
log?: (msg: string) => void;
|
|
9
|
+
debounceMs?: number;
|
|
10
|
+
indexFn?: (vaultRoot: string, relPath: string) => Promise<Result<unknown, Error>>;
|
|
11
|
+
deleteFn?: (vaultRoot: string, relPath: string) => Promise<Result<unknown, Error>>;
|
|
12
|
+
statFn?: (absPath: string) => Promise<{
|
|
13
|
+
exists: boolean;
|
|
14
|
+
}>;
|
|
15
|
+
watcherFactory?: (vaultRoot: string) => FSWatcher;
|
|
16
|
+
}
|
|
17
|
+
export declare function startWatcher(vaultRoot: string, opts?: WatcherOptions): VaultWatcher;
|
|
18
|
+
//# sourceMappingURL=watcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"watcher.d.ts","sourceRoot":"","sources":["../../src/search/watcher.ts"],"names":[],"mappings":"AAgCA,OAAO,EAAuB,KAAK,SAAS,EAAE,MAAM,UAAU,CAAC;AAC/D,OAAO,EAAM,KAAK,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAY1D,eAAO,MAAM,iBAAiB,MAAM,CAAC;AAErC,MAAM,WAAW,YAAY;IAI3B,KAAK,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;CAC5B;AAED,MAAM,WAAW,cAAc;IAG7B,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,IAAI,CAAC;IAG5B,UAAU,CAAC,EAAE,MAAM,CAAC;IAIpB,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;IAClF,QAAQ,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC;IACnF,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC;QAAE,MAAM,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;IAG3D,cAAc,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,KAAK,SAAS,CAAC;CACnD;AA2GD,wBAAgB,YAAY,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,YAAY,CAmKvF"}
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
// fs.watch reactive indexing.
|
|
2
|
+
//
|
|
3
|
+
// Daftari's startup freshness check (search/reindex.ts isIndexFresh) keeps
|
|
4
|
+
// the index honest across *restarts*: a manifest of path→mtime is compared
|
|
5
|
+
// to disk and a full reindex runs when anything has drifted. But while the
|
|
6
|
+
// server is up, an editor save, a sync engine pull, or a scripted writer
|
|
7
|
+
// will rewrite a vault file *out of band* — Daftari's write-path tools are
|
|
8
|
+
// not the only writer. Without a watcher the index drifts until the next
|
|
9
|
+
// startup. The watcher closes that gap: chokidar listens on the vault root,
|
|
10
|
+
// and `add` / `change` events trigger an indexDocument() pass for that one
|
|
11
|
+
// file; `unlink` events evict the doc from the index after a re-stat
|
|
12
|
+
// confirms the file really is gone (FSEvents on macOS, iCloud, and Dropbox
|
|
13
|
+
// emit phantom unlink+add pairs during atomic-rename saves).
|
|
14
|
+
//
|
|
15
|
+
// Per-path debounce: editors save in bursts (atomic rename: write tmp,
|
|
16
|
+
// rename, delete tmp), so a single user save can produce 3-5 chokidar
|
|
17
|
+
// events for the same path inside a few ms. We collect events into a Map
|
|
18
|
+
// keyed by relative path; each touch resets a 500ms timer; when the timer
|
|
19
|
+
// fires we run the indexer once. Different paths debounce independently.
|
|
20
|
+
//
|
|
21
|
+
// Self-write suppression: when the write-path tools (vault_write etc.)
|
|
22
|
+
// finish, they note the absolute path in search/self-write.ts. The watcher
|
|
23
|
+
// consults that set when its debounce fires and silently drops the event if
|
|
24
|
+
// the path is registered, so the in-process indexDocument() the writer
|
|
25
|
+
// already ran is not duplicated.
|
|
26
|
+
//
|
|
27
|
+
// Errors from chokidar are logged to stderr but never crash the server.
|
|
28
|
+
// stderr is used throughout so the MCP stdio JSON-RPC stream on stdout
|
|
29
|
+
// stays clean.
|
|
30
|
+
import { stat } from "node:fs/promises";
|
|
31
|
+
import { relative, resolve, sep } from "node:path";
|
|
32
|
+
import { default as chokidar } from "chokidar";
|
|
33
|
+
import { ok } from "../frontmatter/types.js";
|
|
34
|
+
import { deleteDocument, openIndexDb } from "../storage/index-db.js";
|
|
35
|
+
import { resolveVaultPath } from "../storage/local.js";
|
|
36
|
+
import { getIndexStatus, markPathIndexing, markPathReady } from "./index-state.js";
|
|
37
|
+
import { indexDocument } from "./reindex.js";
|
|
38
|
+
import { consumeSelfWrite } from "./self-write.js";
|
|
39
|
+
import { getProvider } from "./vector.js";
|
|
40
|
+
// 500ms is the floor the design locks in: short enough for the index to feel
|
|
41
|
+
// live to a human typing in their editor, long enough to coalesce an
|
|
42
|
+
// atomic-rename burst (write tmp, rename onto target, delete tmp) into a
|
|
43
|
+
// single indexer call.
|
|
44
|
+
export const WATCH_DEBOUNCE_MS = 500;
|
|
45
|
+
// Defaults so production callers only need to pass vaultRoot. Pulled out so
|
|
46
|
+
// the option-resolution at the top of startWatcher reads as one block.
|
|
47
|
+
function resolveOptions(opts) {
|
|
48
|
+
return {
|
|
49
|
+
log: opts.log ?? ((msg) => process.stderr.write(msg)),
|
|
50
|
+
debounceMs: opts.debounceMs ?? WATCH_DEBOUNCE_MS,
|
|
51
|
+
indexFn: opts.indexFn ?? indexDocument,
|
|
52
|
+
deleteFn: opts.deleteFn ?? defaultDeleteFn,
|
|
53
|
+
statFn: opts.statFn ?? defaultStatFn,
|
|
54
|
+
watcherFactory: opts.watcherFactory,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
// Default unlink handler: open the index db, drop the document and its
|
|
58
|
+
// chunks, and patch the manifest so the next startup freshness check does
|
|
59
|
+
// not see a missing-on-disk entry as drift. The embeddings cache is left
|
|
60
|
+
// alone (content-addressed; reaped by the next full reindex's gc pass).
|
|
61
|
+
async function defaultDeleteFn(vaultRoot, relPath) {
|
|
62
|
+
const dbResult = openIndexDb(vaultRoot, getProvider().dim);
|
|
63
|
+
if (!dbResult.ok)
|
|
64
|
+
return dbResult;
|
|
65
|
+
const db = dbResult.value;
|
|
66
|
+
try {
|
|
67
|
+
deleteDocument(db, relPath);
|
|
68
|
+
// Patch the manifest in place. Reusing reindex.ts internals would
|
|
69
|
+
// require an export churn for one use; the meta row is a JSON blob
|
|
70
|
+
// and a short read-modify-write under WAL is safe here.
|
|
71
|
+
const row = db.prepare("SELECT value FROM meta WHERE key = ?").get("vault_manifest");
|
|
72
|
+
if (row) {
|
|
73
|
+
try {
|
|
74
|
+
const manifest = JSON.parse(row.value);
|
|
75
|
+
if (relPath in manifest) {
|
|
76
|
+
delete manifest[relPath];
|
|
77
|
+
db.prepare("INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value").run("vault_manifest", JSON.stringify(manifest));
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
// A malformed manifest is non-fatal here — the next full reindex
|
|
82
|
+
// rewrites it from scratch.
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
return ok(undefined);
|
|
86
|
+
}
|
|
87
|
+
finally {
|
|
88
|
+
db.close();
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
async function defaultStatFn(absPath) {
|
|
92
|
+
try {
|
|
93
|
+
await stat(absPath);
|
|
94
|
+
return { exists: true };
|
|
95
|
+
}
|
|
96
|
+
catch {
|
|
97
|
+
return { exists: false };
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// Maps a chokidar-emitted absolute path back to the vault-relative POSIX
|
|
101
|
+
// path indexDocument / deleteDocument expect. Returns null when the path is
|
|
102
|
+
// outside the vault, which can happen with symlinks chokidar followed.
|
|
103
|
+
function toVaultRelative(vaultRoot, absPath) {
|
|
104
|
+
const root = resolve(vaultRoot);
|
|
105
|
+
const abs = resolve(absPath);
|
|
106
|
+
const rel = relative(root, abs);
|
|
107
|
+
if (rel.length === 0)
|
|
108
|
+
return null;
|
|
109
|
+
if (rel.startsWith(".."))
|
|
110
|
+
return null;
|
|
111
|
+
// chokidar emits OS-native separators; index storage uses POSIX. Normalise.
|
|
112
|
+
return sep === "/" ? rel : rel.split(sep).join("/");
|
|
113
|
+
}
|
|
114
|
+
// Returns true when chokidar's path points inside a directory we want to
|
|
115
|
+
// ignore (the .daftari control dir, .git, any other hidden top-level path).
|
|
116
|
+
// chokidar's `ignored` option already excludes these at watch time, but we
|
|
117
|
+
// double-check at dispatch time because chokidar sometimes ignores its own
|
|
118
|
+
// `ignored` pattern for `unlinkDir` events on macOS.
|
|
119
|
+
function isIgnoredPath(relPath) {
|
|
120
|
+
// Anything inside .daftari/ is the index itself or a lock file. Watching
|
|
121
|
+
// it would feed our own writes back as events.
|
|
122
|
+
if (relPath.startsWith(".daftari/") || relPath === ".daftari")
|
|
123
|
+
return true;
|
|
124
|
+
// .git/ — same problem, plus we don't index git internals.
|
|
125
|
+
if (relPath.startsWith(".git/") || relPath === ".git")
|
|
126
|
+
return true;
|
|
127
|
+
// Other hidden top-level paths (editor swap files, etc).
|
|
128
|
+
const first = relPath.split("/")[0] ?? "";
|
|
129
|
+
if (first.startsWith(".") && first !== ".")
|
|
130
|
+
return true;
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
// Markdown-only: chokidar watches every file under the root, but only .md
|
|
134
|
+
// files are indexed. Skipping non-markdown here keeps random sibling files
|
|
135
|
+
// (LICENSE, CHANGELOG.md aside — .md, that counts — images, .DS_Store) from
|
|
136
|
+
// firing redundant debounces and indexer calls.
|
|
137
|
+
function isMarkdown(relPath) {
|
|
138
|
+
return relPath.toLowerCase().endsWith(".md");
|
|
139
|
+
}
|
|
140
|
+
// Starts watching `vaultRoot`. Returns a handle whose close() shuts the
|
|
141
|
+
// watcher down. The caller is responsible for honouring the `watch` config
|
|
142
|
+
// flag — startWatcher itself does not consult config, so tests can drive it
|
|
143
|
+
// without a config file.
|
|
144
|
+
export function startWatcher(vaultRoot, opts = {}) {
|
|
145
|
+
const resolved = resolveOptions(opts);
|
|
146
|
+
const root = resolve(vaultRoot);
|
|
147
|
+
const pending = new Map();
|
|
148
|
+
// Spawn chokidar (or the injected fake). The ignored pattern mirrors
|
|
149
|
+
// listFiles in storage/local.ts so the watcher and listing agree on
|
|
150
|
+
// "what's vault content".
|
|
151
|
+
let watcher;
|
|
152
|
+
if (resolved.watcherFactory) {
|
|
153
|
+
watcher = resolved.watcherFactory(root);
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
watcher = chokidar.watch(root, {
|
|
157
|
+
ignored: (p) => {
|
|
158
|
+
// chokidar v4: ignored is called for every path. Return true to skip.
|
|
159
|
+
// The root itself must NOT be ignored.
|
|
160
|
+
if (p === root)
|
|
161
|
+
return false;
|
|
162
|
+
const rel = toVaultRelative(root, p);
|
|
163
|
+
if (rel === null)
|
|
164
|
+
return false;
|
|
165
|
+
return isIgnoredPath(rel);
|
|
166
|
+
},
|
|
167
|
+
ignoreInitial: true, // startup freshness check already covers the initial state
|
|
168
|
+
persistent: true,
|
|
169
|
+
followSymlinks: false,
|
|
170
|
+
awaitWriteFinish: false, // we run our own debounce
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
let closed = false;
|
|
174
|
+
// Single dispatcher for "the debounce window for `relPath` elapsed — do
|
|
175
|
+
// the work". Pulls the lastEvent so a unlink-then-add inside the window
|
|
176
|
+
// is treated as a change (FSEvents atomic-save quirk).
|
|
177
|
+
async function dispatch(relPath, lastEvent) {
|
|
178
|
+
if (closed)
|
|
179
|
+
return;
|
|
180
|
+
// Self-write suppression. The write-path tools register the absolute
|
|
181
|
+
// path after their in-process indexDocument() returns; if it's there,
|
|
182
|
+
// the event was Daftari's own write and the index is already current.
|
|
183
|
+
const resolvedAbs = resolveVaultPath(root, relPath);
|
|
184
|
+
if (resolvedAbs.ok && consumeSelfWrite(resolvedAbs.value)) {
|
|
185
|
+
return;
|
|
186
|
+
}
|
|
187
|
+
// While a full reindex is running, the indexer is rebuilding from
|
|
188
|
+
// scratch — a per-file index call would race the bulk write and may
|
|
189
|
+
// be wiped by clearIndex(). Defer the event by re-scheduling a fresh
|
|
190
|
+
// debounce window; if the reindex finishes before the next event, the
|
|
191
|
+
// already-current-on-disk content will be picked up on the next external
|
|
192
|
+
// change anyway, and the manifest's mtime will match either way.
|
|
193
|
+
const status = getIndexStatus();
|
|
194
|
+
if (status.status === "indexing") {
|
|
195
|
+
pending.delete(relPath);
|
|
196
|
+
const timer = setTimeout(() => {
|
|
197
|
+
const p = pending.get(relPath);
|
|
198
|
+
if (!p)
|
|
199
|
+
return;
|
|
200
|
+
pending.delete(relPath);
|
|
201
|
+
void dispatch(relPath, p.lastEvent);
|
|
202
|
+
}, resolved.debounceMs);
|
|
203
|
+
pending.set(relPath, { timer, lastEvent });
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
markPathIndexing(relPath);
|
|
207
|
+
try {
|
|
208
|
+
if (lastEvent === "unlink") {
|
|
209
|
+
// FSEvents (macOS), iCloud, and Dropbox emit phantom unlink+add
|
|
210
|
+
// pairs during atomic-rename saves. Re-stat before deleting: if
|
|
211
|
+
// the file is back, treat the event as a change instead.
|
|
212
|
+
const absResolved = resolveVaultPath(root, relPath);
|
|
213
|
+
if (absResolved.ok) {
|
|
214
|
+
const present = await resolved.statFn(absResolved.value);
|
|
215
|
+
if (present.exists) {
|
|
216
|
+
const r = await resolved.indexFn(root, relPath);
|
|
217
|
+
if (!r.ok) {
|
|
218
|
+
resolved.log(`daftari: watcher: index update failed for ${relPath}: ${r.error.message}\n`);
|
|
219
|
+
}
|
|
220
|
+
return;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
const d = await resolved.deleteFn(root, relPath);
|
|
224
|
+
if (!d.ok) {
|
|
225
|
+
resolved.log(`daftari: watcher: delete failed for ${relPath}: ${d.error.message}\n`);
|
|
226
|
+
}
|
|
227
|
+
return;
|
|
228
|
+
}
|
|
229
|
+
// add / change — both route to indexDocument.
|
|
230
|
+
const r = await resolved.indexFn(root, relPath);
|
|
231
|
+
if (!r.ok) {
|
|
232
|
+
resolved.log(`daftari: watcher: index update failed for ${relPath}: ${r.error.message}\n`);
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
catch (e) {
|
|
236
|
+
const reason = e instanceof Error ? e.message : String(e);
|
|
237
|
+
resolved.log(`daftari: watcher: dispatch crashed for ${relPath}: ${reason}\n`);
|
|
238
|
+
}
|
|
239
|
+
finally {
|
|
240
|
+
markPathReady(relPath);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
// Schedule (or reschedule) the debounce timer for one path. Called for
|
|
244
|
+
// every chokidar event. The most recent event "wins" — an unlink
|
|
245
|
+
// followed by an add inside the window arrives at dispatch() as an add.
|
|
246
|
+
function schedule(relPath, event) {
|
|
247
|
+
if (closed)
|
|
248
|
+
return;
|
|
249
|
+
if (isIgnoredPath(relPath))
|
|
250
|
+
return;
|
|
251
|
+
if (!isMarkdown(relPath))
|
|
252
|
+
return;
|
|
253
|
+
const existing = pending.get(relPath);
|
|
254
|
+
if (existing)
|
|
255
|
+
clearTimeout(existing.timer);
|
|
256
|
+
const timer = setTimeout(() => {
|
|
257
|
+
const p = pending.get(relPath);
|
|
258
|
+
if (!p)
|
|
259
|
+
return;
|
|
260
|
+
pending.delete(relPath);
|
|
261
|
+
void dispatch(relPath, p.lastEvent);
|
|
262
|
+
}, resolved.debounceMs);
|
|
263
|
+
pending.set(relPath, { timer, lastEvent: event });
|
|
264
|
+
}
|
|
265
|
+
// Chokidar's per-event handlers. Each maps a chokidar event to our
|
|
266
|
+
// schedule() call. The `addDir` / `unlinkDir` events are intentionally
|
|
267
|
+
// ignored — per-file events cover everything we care about and a
|
|
268
|
+
// directory delete fires unlink for each contained file anyway.
|
|
269
|
+
watcher.on("add", (p) => {
|
|
270
|
+
const rel = toVaultRelative(root, p);
|
|
271
|
+
if (rel)
|
|
272
|
+
schedule(rel, "add");
|
|
273
|
+
});
|
|
274
|
+
watcher.on("change", (p) => {
|
|
275
|
+
const rel = toVaultRelative(root, p);
|
|
276
|
+
if (rel)
|
|
277
|
+
schedule(rel, "change");
|
|
278
|
+
});
|
|
279
|
+
watcher.on("unlink", (p) => {
|
|
280
|
+
const rel = toVaultRelative(root, p);
|
|
281
|
+
if (rel)
|
|
282
|
+
schedule(rel, "unlink");
|
|
283
|
+
});
|
|
284
|
+
watcher.on("error", (e) => {
|
|
285
|
+
const reason = e instanceof Error ? e.message : String(e);
|
|
286
|
+
resolved.log(`daftari: watcher error: ${reason}\n`);
|
|
287
|
+
});
|
|
288
|
+
return {
|
|
289
|
+
close: async () => {
|
|
290
|
+
if (closed)
|
|
291
|
+
return;
|
|
292
|
+
closed = true;
|
|
293
|
+
for (const { timer } of pending.values())
|
|
294
|
+
clearTimeout(timer);
|
|
295
|
+
pending.clear();
|
|
296
|
+
await watcher.close();
|
|
297
|
+
},
|
|
298
|
+
};
|
|
299
|
+
}
|
|
300
|
+
//# sourceMappingURL=watcher.js.map
|