clawvault 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +54 -14
  2. package/bin/clawvault.js +0 -2
  3. package/bin/command-registration.test.js +13 -1
  4. package/bin/help-contract.test.js +14 -0
  5. package/bin/register-core-commands.js +88 -0
  6. package/bin/register-core-commands.test.js +80 -0
  7. package/bin/register-maintenance-commands.js +57 -6
  8. package/bin/register-query-commands.js +10 -28
  9. package/bin/test-helpers/cli-command-fixtures.js +1 -0
  10. package/dist/chunk-2PKBIKDH.js +130 -0
  11. package/dist/{chunk-2JQ3O2YL.js → chunk-5EFSWZO6.js} +3 -3
  12. package/dist/{chunk-77Q5CSPJ.js → chunk-7SWP5FKU.js} +33 -701
  13. package/dist/{chunk-URXDAUVH.js → chunk-AXSJIFOJ.js} +174 -1
  14. package/dist/{chunk-23YDQ3QU.js → chunk-BLQXXX7Q.js} +6 -6
  15. package/dist/chunk-CSHO3PJB.js +684 -0
  16. package/dist/{chunk-SLXOR3CC.js → chunk-DOIUYIXV.js} +2 -2
  17. package/dist/{chunk-NCKFNBHJ.js → chunk-DVOUSOR3.js} +79 -5
  18. package/dist/{chunk-CLJTREDS.js → chunk-ECGJYWNA.js} +193 -41
  19. package/dist/{chunk-BUEW6IIK.js → chunk-EL6UBSX5.js} +5 -5
  20. package/dist/{chunk-6FH3IULF.js → chunk-FZ5I2NF7.js} +1 -1
  21. package/dist/{chunk-ZN54U2OZ.js → chunk-GFCHWMGD.js} +3 -3
  22. package/dist/{chunk-GNJL4YGR.js → chunk-GJO3CFUN.js} +30 -6
  23. package/dist/chunk-H3JZIB5O.js +322 -0
  24. package/dist/chunk-HEHO7SMV.js +51 -0
  25. package/dist/{chunk-STCQGCEQ.js → chunk-HGDDW24U.js} +3 -3
  26. package/dist/chunk-J3YUXVID.js +907 -0
  27. package/dist/{chunk-Y6VJKXGL.js → chunk-KCYWJDDW.js} +1 -1
  28. package/dist/{chunk-W4SPAEE7.js → chunk-OFOCU2V4.js} +5 -4
  29. package/dist/chunk-PTWPPVC7.js +972 -0
  30. package/dist/{chunk-QSHD36LH.js → chunk-QFWERBDP.js} +2 -2
  31. package/dist/{chunk-QSRRMEYM.js → chunk-S7N7HI5E.js} +1 -1
  32. package/dist/{chunk-PBACDKKP.js → chunk-T7E764W3.js} +3 -3
  33. package/dist/chunk-TDWFBDAQ.js +1016 -0
  34. package/dist/{chunk-ESVS6K2B.js → chunk-TWMI3SNN.js} +6 -5
  35. package/dist/{chunk-2RAZ4ZFE.js → chunk-VBILES4B.js} +1 -1
  36. package/dist/{chunk-ESFLMDRB.js → chunk-VXAGOLDP.js} +3 -3
  37. package/dist/chunk-YCUVAOFC.js +158 -0
  38. package/dist/{chunk-SS4B7P7V.js → chunk-YIDV4VV2.js} +1 -1
  39. package/dist/chunk-ZKWPCBYT.js +600 -0
  40. package/dist/cli/index.js +24 -24
  41. package/dist/commands/archive.js +2 -2
  42. package/dist/commands/benchmark.d.ts +12 -0
  43. package/dist/commands/benchmark.js +12 -0
  44. package/dist/commands/context.js +6 -5
  45. package/dist/commands/doctor.d.ts +8 -3
  46. package/dist/commands/doctor.js +6 -20
  47. package/dist/commands/embed.js +5 -4
  48. package/dist/commands/entities.js +1 -1
  49. package/dist/commands/graph.js +2 -2
  50. package/dist/commands/inbox.d.ts +23 -0
  51. package/dist/commands/inbox.js +11 -0
  52. package/dist/commands/inject.d.ts +1 -1
  53. package/dist/commands/inject.js +3 -3
  54. package/dist/commands/link.js +6 -6
  55. package/dist/commands/maintain.d.ts +32 -0
  56. package/dist/commands/maintain.js +12 -0
  57. package/dist/commands/migrate-observations.js +2 -2
  58. package/dist/commands/observe.js +9 -8
  59. package/dist/commands/rebuild-embeddings.js +47 -16
  60. package/dist/commands/rebuild.js +7 -6
  61. package/dist/commands/reflect.js +5 -5
  62. package/dist/commands/replay.js +8 -7
  63. package/dist/commands/setup.js +3 -2
  64. package/dist/commands/sleep.d.ts +1 -1
  65. package/dist/commands/sleep.js +17 -15
  66. package/dist/commands/status.js +26 -24
  67. package/dist/commands/sync-bd.js +2 -2
  68. package/dist/commands/tailscale.js +2 -2
  69. package/dist/commands/wake.d.ts +1 -1
  70. package/dist/commands/wake.js +8 -7
  71. package/dist/index.d.ts +168 -16
  72. package/dist/index.js +271 -108
  73. package/dist/{inject-DYUrDqQO.d.ts → inject-DEb_jpLi.d.ts} +3 -1
  74. package/dist/lib/config.js +1 -1
  75. package/dist/{types-BbWJoC1c.d.ts → types-DslKvCaj.d.ts} +51 -1
  76. package/hooks/clawvault/HOOK.md +22 -5
  77. package/hooks/clawvault/handler.js +213 -78
  78. package/hooks/clawvault/handler.test.js +109 -43
  79. package/hooks/clawvault/integrity.js +112 -0
  80. package/hooks/clawvault/integrity.test.js +32 -0
  81. package/hooks/clawvault/openclaw.plugin.json +133 -15
  82. package/openclaw.plugin.json +126 -20
  83. package/package.json +2 -2
  84. package/bin/register-workgraph-commands.js +0 -1368
  85. package/dist/chunk-33VSQP4J.js +0 -37
  86. package/dist/chunk-4BQTQMJP.js +0 -93
  87. package/dist/chunk-EK6S23ZB.js +0 -469
  88. package/dist/chunk-GAOWA7GR.js +0 -501
  89. package/dist/chunk-GGA32J2R.js +0 -784
  90. package/dist/chunk-MM6QGW3P.js +0 -207
  91. package/dist/chunk-QVEERJSP.js +0 -152
  92. package/dist/chunk-U4O6C46S.js +0 -154
  93. package/dist/chunk-VSL7KY3M.js +0 -189
  94. package/dist/chunk-WMGIIABP.js +0 -15
  95. package/dist/commands/workgraph.d.ts +0 -124
  96. package/dist/commands/workgraph.js +0 -38
  97. package/dist/ledger-B7g7jhqG.d.ts +0 -44
  98. package/dist/registry-BR4326o0.d.ts +0 -30
  99. package/dist/store-CA-6sKCJ.d.ts +0 -34
  100. package/dist/thread-B9LhXNU0.d.ts +0 -41
  101. package/dist/workgraph/index.d.ts +0 -5
  102. package/dist/workgraph/index.js +0 -23
  103. package/dist/workgraph/ledger.d.ts +0 -2
  104. package/dist/workgraph/ledger.js +0 -25
  105. package/dist/workgraph/registry.d.ts +0 -2
  106. package/dist/workgraph/registry.js +0 -19
  107. package/dist/workgraph/store.d.ts +0 -2
  108. package/dist/workgraph/store.js +0 -25
  109. package/dist/workgraph/thread.d.ts +0 -2
  110. package/dist/workgraph/thread.js +0 -25
  111. package/dist/workgraph/types.d.ts +0 -54
  112. package/dist/workgraph/types.js +0 -7
@@ -0,0 +1,972 @@
1
+ import {
2
+ EmbeddingStore,
3
+ cosineSimilarity,
4
+ embedText,
5
+ resolveEmbeddingConfig
6
+ } from "./chunk-H3JZIB5O.js";
7
+
8
+ // src/lib/search.ts
9
+ import { execFileSync, spawnSync } from "child_process";
10
+ import * as fs from "fs";
11
+ import * as path from "path";
12
+
13
+ // src/lib/hosted-rerank.ts
14
+ var DEFAULT_ENDPOINTS = {
15
+ jina: "https://api.jina.ai/v1/rerank",
16
+ voyage: "https://api.voyageai.com/v1/rerank",
17
+ siliconflow: "https://api.siliconflow.cn/v1/rerank",
18
+ pinecone: "https://api.pinecone.io/rerank"
19
+ };
20
+ var DEFAULT_MODELS = {
21
+ jina: "jina-reranker-v2-base-multilingual",
22
+ voyage: "rerank-2",
23
+ siliconflow: "BAAI/bge-reranker-v2-m3",
24
+ pinecone: "bge-reranker-v2-m3"
25
+ };
26
+ function clampWeight(value) {
27
+ if (!Number.isFinite(value)) return 0.6;
28
+ if (value < 0) return 0;
29
+ if (value > 1) return 1;
30
+ return value;
31
+ }
32
+ function resolveApiKey(provider, configured) {
33
+ if (configured?.trim()) return configured.trim();
34
+ const envKeyByProvider = {
35
+ jina: ["JINA_API_KEY"],
36
+ voyage: ["VOYAGE_API_KEY"],
37
+ siliconflow: ["SILICONFLOW_API_KEY"],
38
+ pinecone: ["PINECONE_API_KEY"]
39
+ };
40
+ for (const key of envKeyByProvider[provider]) {
41
+ const value = process.env[key]?.trim();
42
+ if (value) return value;
43
+ }
44
+ return process.env.RERANK_API_KEY?.trim();
45
+ }
46
+ function resolveRerankConfig(searchConfig) {
47
+ const provider = searchConfig?.rerank?.provider ?? "none";
48
+ if (provider === "none") {
49
+ return null;
50
+ }
51
+ const apiKey = resolveApiKey(provider, searchConfig?.rerank?.apiKey);
52
+ if (!apiKey) {
53
+ return null;
54
+ }
55
+ const endpoint = (searchConfig?.rerank?.endpoint?.trim() || DEFAULT_ENDPOINTS[provider]).replace(/\/+$/, "");
56
+ const model = searchConfig?.rerank?.model?.trim() || DEFAULT_MODELS[provider];
57
+ const weight = clampWeight(searchConfig?.rerank?.weight ?? 0.6);
58
+ return {
59
+ provider,
60
+ endpoint,
61
+ model,
62
+ apiKey,
63
+ weight
64
+ };
65
+ }
66
+ async function crossEncoderRerank(query, documents, config) {
67
+ if (!documents.length) return null;
68
+ try {
69
+ const response = await fetch(config.endpoint, {
70
+ method: "POST",
71
+ headers: {
72
+ "Content-Type": "application/json",
73
+ Authorization: `Bearer ${config.apiKey}`
74
+ },
75
+ body: JSON.stringify({
76
+ model: config.model,
77
+ query,
78
+ documents,
79
+ top_n: documents.length
80
+ }),
81
+ signal: AbortSignal.timeout(15e3)
82
+ });
83
+ if (!response.ok) {
84
+ return null;
85
+ }
86
+ const payload = await response.json();
87
+ const items = payload.results ?? payload.data;
88
+ if (!Array.isArray(items)) {
89
+ return null;
90
+ }
91
+ const scores = new Array(documents.length).fill(0);
92
+ for (const item of items) {
93
+ const score = Number(item.relevance_score ?? item.score ?? 0);
94
+ if (Number.isFinite(item.index) && item.index >= 0 && item.index < documents.length) {
95
+ scores[item.index] = score;
96
+ }
97
+ }
98
+ return scores;
99
+ } catch {
100
+ return null;
101
+ }
102
+ }
103
+
104
+ // src/lib/in-process-search.ts
105
+ var BM25_K1 = 1.2;
106
+ var BM25_B = 0.75;
107
+ var RRF_K = 60;
108
+ var DEFAULT_CHUNK_SIZE = 700;
109
+ var DEFAULT_CHUNK_OVERLAP = 100;
110
+ function tokenize(text) {
111
+ return text.toLowerCase().split(/[^\p{L}\p{N}_-]+/u).map((token) => token.trim()).filter((token) => token.length > 1);
112
+ }
113
+ function normalizeScore(value, min, max) {
114
+ if (!Number.isFinite(value)) return 0;
115
+ if (max <= min) return value > 0 ? 1 : 0;
116
+ return (value - min) / (max - min);
117
+ }
118
+ function normalizeCandidateScores(candidates) {
119
+ if (!candidates.length) return candidates;
120
+ const values = candidates.map((item) => item.score).filter((value) => Number.isFinite(value));
121
+ const min = Math.min(...values);
122
+ const max = Math.max(...values);
123
+ return candidates.map((candidate) => ({
124
+ ...candidate,
125
+ score: normalizeScore(candidate.score, min, max)
126
+ }));
127
+ }
128
+ var InProcessSearchEngine = class {
129
+ vaultPath = "";
130
+ config = {};
131
+ documents = /* @__PURE__ */ new Map();
132
+ chunks = /* @__PURE__ */ new Map();
133
+ chunkIdsByDoc = /* @__PURE__ */ new Map();
134
+ documentFrequency = /* @__PURE__ */ new Map();
135
+ totalChunkLength = 0;
136
+ embeddingStore = new EmbeddingStore(process.cwd());
137
+ embeddingStoreLoaded = false;
138
+ setVaultPath(vaultPath) {
139
+ this.vaultPath = vaultPath;
140
+ this.embeddingStore.setVaultPath(vaultPath);
141
+ this.embeddingStoreLoaded = false;
142
+ }
143
+ setConfig(config) {
144
+ this.config = config ?? {};
145
+ }
146
+ addDocument(doc) {
147
+ const existing = this.documents.get(doc.id);
148
+ if (existing) {
149
+ this.removeDocument(existing.id);
150
+ }
151
+ this.documents.set(doc.id, doc);
152
+ const chunkIds = [];
153
+ const chunkTexts = this.splitIntoChunks(`${doc.title}
154
+ ${doc.content}`);
155
+ for (let index = 0; index < chunkTexts.length; index += 1) {
156
+ const text = chunkTexts[index];
157
+ const terms = tokenize(text);
158
+ if (!terms.length) continue;
159
+ const termFreq = /* @__PURE__ */ new Map();
160
+ for (const term of terms) {
161
+ termFreq.set(term, (termFreq.get(term) ?? 0) + 1);
162
+ }
163
+ const chunkId = `${doc.id}#${index + 1}`;
164
+ chunkIds.push(chunkId);
165
+ this.totalChunkLength += terms.length;
166
+ this.chunks.set(chunkId, {
167
+ id: chunkId,
168
+ docId: doc.id,
169
+ text,
170
+ termFreq,
171
+ length: terms.length
172
+ });
173
+ for (const term of termFreq.keys()) {
174
+ this.documentFrequency.set(term, (this.documentFrequency.get(term) ?? 0) + 1);
175
+ }
176
+ }
177
+ this.chunkIdsByDoc.set(doc.id, chunkIds);
178
+ }
179
+ removeDocument(docId) {
180
+ const existingChunkIds = this.chunkIdsByDoc.get(docId) ?? [];
181
+ for (const chunkId of existingChunkIds) {
182
+ const chunk = this.chunks.get(chunkId);
183
+ if (!chunk) continue;
184
+ this.totalChunkLength = Math.max(0, this.totalChunkLength - chunk.length);
185
+ for (const term of chunk.termFreq.keys()) {
186
+ const next = (this.documentFrequency.get(term) ?? 0) - 1;
187
+ if (next <= 0) {
188
+ this.documentFrequency.delete(term);
189
+ } else {
190
+ this.documentFrequency.set(term, next);
191
+ }
192
+ }
193
+ this.chunks.delete(chunkId);
194
+ }
195
+ this.chunkIdsByDoc.delete(docId);
196
+ this.documents.delete(docId);
197
+ }
198
+ clear() {
199
+ this.documents.clear();
200
+ this.chunks.clear();
201
+ this.chunkIdsByDoc.clear();
202
+ this.documentFrequency.clear();
203
+ this.totalChunkLength = 0;
204
+ }
205
+ getAllDocuments() {
206
+ return [...this.documents.values()];
207
+ }
208
+ get size() {
209
+ return this.documents.size;
210
+ }
211
+ export() {
212
+ return { documents: this.getAllDocuments() };
213
+ }
214
+ import(data) {
215
+ this.clear();
216
+ for (const doc of data.documents) {
217
+ this.addDocument(doc);
218
+ }
219
+ }
220
+ async search(query, options = {}) {
221
+ if (!query.trim()) return [];
222
+ const limit = Math.max(1, options.limit ?? 10);
223
+ const bm25Candidates = this.runBm25(query, options, limit * 5);
224
+ const semanticRanks = await this.getSemanticRanks(query, options);
225
+ let fused = this.fuseHybrid(bm25Candidates, semanticRanks, limit * 5);
226
+ fused = await this.applyCrossEncoderRerank(query, fused);
227
+ return this.toSearchResults(fused, options, limit);
228
+ }
229
+ async vsearch(query, options = {}) {
230
+ if (!query.trim()) return [];
231
+ const limit = Math.max(1, options.limit ?? 10);
232
+ const semanticRanks = await this.getSemanticRanks(query, options);
233
+ if (semanticRanks.length === 0) {
234
+ return [];
235
+ }
236
+ const candidates = [];
237
+ for (const { docId, score } of semanticRanks) {
238
+ const doc = this.documents.get(docId);
239
+ if (!doc || !this.matchesFilters(doc, options)) continue;
240
+ candidates.push({
241
+ id: `${docId}#semantic`,
242
+ docId,
243
+ snippet: this.buildSnippet(doc.content, []),
244
+ score,
245
+ matchedTerms: []
246
+ });
247
+ }
248
+ const reranked = await this.applyCrossEncoderRerank(query, candidates);
249
+ return this.toSearchResults(reranked, options, limit);
250
+ }
251
+ async query(queryText, options = {}) {
252
+ return this.search(queryText, options);
253
+ }
254
+ splitIntoChunks(content) {
255
+ const normalized = content.replace(/\r\n/g, "\n").trim();
256
+ if (!normalized) return [];
257
+ const chunkSize = Math.max(200, this.config.chunkSize ?? DEFAULT_CHUNK_SIZE);
258
+ const overlap = Math.max(0, Math.min(chunkSize - 1, this.config.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP));
259
+ const chunks = [];
260
+ let start = 0;
261
+ while (start < normalized.length) {
262
+ let end = Math.min(normalized.length, start + chunkSize);
263
+ if (end < normalized.length) {
264
+ const boundary = normalized.slice(end, Math.min(normalized.length, end + 100)).search(/\s/);
265
+ if (boundary >= 0) {
266
+ end += boundary;
267
+ }
268
+ }
269
+ const piece = normalized.slice(start, end).trim();
270
+ if (piece) {
271
+ chunks.push(piece);
272
+ }
273
+ if (end >= normalized.length) break;
274
+ start = Math.max(0, end - overlap);
275
+ }
276
+ return chunks;
277
+ }
278
+ runBm25(query, options, topK) {
279
+ const queryTerms = tokenize(query);
280
+ if (!queryTerms.length || this.chunks.size === 0) {
281
+ return [];
282
+ }
283
+ const uniqueTerms = [...new Set(queryTerms)];
284
+ const chunkCount = this.chunks.size;
285
+ const avgChunkLength = chunkCount > 0 ? this.totalChunkLength / chunkCount : 1;
286
+ const candidates = [];
287
+ for (const chunk of this.chunks.values()) {
288
+ const doc = this.documents.get(chunk.docId);
289
+ if (!doc || !this.matchesFilters(doc, options)) continue;
290
+ let score = 0;
291
+ const matchedTerms = [];
292
+ for (const term of uniqueTerms) {
293
+ const termFreq = chunk.termFreq.get(term) ?? 0;
294
+ if (termFreq === 0) continue;
295
+ matchedTerms.push(term);
296
+ const docFreq = this.documentFrequency.get(term) ?? 0;
297
+ const idf = Math.log((chunkCount - docFreq + 0.5) / (docFreq + 0.5) + 1);
298
+ const tfNorm = termFreq * (BM25_K1 + 1) / (termFreq + BM25_K1 * (1 - BM25_B + BM25_B * (chunk.length / Math.max(1, avgChunkLength))));
299
+ score += idf * tfNorm;
300
+ }
301
+ if (score <= 0) continue;
302
+ candidates.push({
303
+ id: chunk.id,
304
+ docId: chunk.docId,
305
+ snippet: this.buildSnippet(chunk.text, matchedTerms),
306
+ score,
307
+ matchedTerms
308
+ });
309
+ }
310
+ return normalizeCandidateScores(
311
+ candidates.sort((left, right) => right.score - left.score).slice(0, topK)
312
+ );
313
+ }
314
+ async getSemanticRanks(query, options) {
315
+ const embeddingConfig = resolveEmbeddingConfig(this.config);
316
+ if (!embeddingConfig) {
317
+ return [];
318
+ }
319
+ if (!this.loadEmbeddingStoreIfNeeded(embeddingConfig.provider, embeddingConfig.model)) {
320
+ return [];
321
+ }
322
+ let queryEmbedding;
323
+ try {
324
+ queryEmbedding = await embedText(query, embeddingConfig, { isQuery: true });
325
+ } catch {
326
+ return [];
327
+ }
328
+ const ranked = Array.from(this.documents.values()).filter((doc) => this.matchesFilters(doc, options)).map((doc) => {
329
+ const vector = this.embeddingStore.get(doc.id);
330
+ if (!vector) {
331
+ return null;
332
+ }
333
+ return {
334
+ docId: doc.id,
335
+ score: cosineSimilarity(queryEmbedding, vector)
336
+ };
337
+ }).filter((entry) => entry !== null).sort((left, right) => right.score - left.score).slice(0, Math.max(20, (options.limit ?? 10) * 5));
338
+ if (ranked.length === 0) {
339
+ return [];
340
+ }
341
+ const min = Math.min(...ranked.map((entry) => entry.score));
342
+ const max = Math.max(...ranked.map((entry) => entry.score));
343
+ return ranked.map((entry, index) => ({
344
+ docId: entry.docId,
345
+ rank: index,
346
+ score: normalizeScore(entry.score, min, max)
347
+ }));
348
+ }
349
+ fuseHybrid(bm25Candidates, semanticRanks, topK) {
350
+ if (bm25Candidates.length === 0) {
351
+ return semanticRanks.map((entry) => {
352
+ const doc = this.documents.get(entry.docId);
353
+ return {
354
+ id: `${entry.docId}#semantic`,
355
+ docId: entry.docId,
356
+ snippet: this.buildSnippet(doc.content, []),
357
+ score: 1 / (RRF_K + entry.rank + 1),
358
+ matchedTerms: []
359
+ };
360
+ }).slice(0, topK);
361
+ }
362
+ const semanticRankMap = new Map(
363
+ semanticRanks.map((entry) => [entry.docId, { rank: entry.rank, score: entry.score }])
364
+ );
365
+ const fused = [];
366
+ for (let index = 0; index < bm25Candidates.length; index += 1) {
367
+ const candidate = bm25Candidates[index];
368
+ const bm25Rrf = 0.65 / (RRF_K + index + 1);
369
+ const semantic = semanticRankMap.get(candidate.docId);
370
+ const semanticRrf = semantic ? 0.35 / (RRF_K + semantic.rank + 1) : 0;
371
+ fused.push({
372
+ ...candidate,
373
+ score: bm25Rrf + semanticRrf
374
+ });
375
+ }
376
+ const seenDocs = new Set(fused.map((entry) => entry.docId));
377
+ for (const entry of semanticRanks) {
378
+ if (seenDocs.has(entry.docId)) continue;
379
+ const doc = this.documents.get(entry.docId);
380
+ if (!doc) continue;
381
+ fused.push({
382
+ id: `${entry.docId}#semantic`,
383
+ docId: entry.docId,
384
+ snippet: this.buildSnippet(doc.content, []),
385
+ matchedTerms: [],
386
+ score: 0.35 / (RRF_K + entry.rank + 1)
387
+ });
388
+ }
389
+ return normalizeCandidateScores(
390
+ fused.sort((left, right) => right.score - left.score).slice(0, topK)
391
+ );
392
+ }
393
+ async applyCrossEncoderRerank(query, candidates) {
394
+ const rerankConfig = resolveRerankConfig(this.config);
395
+ if (!rerankConfig || candidates.length === 0) {
396
+ return candidates;
397
+ }
398
+ const texts = candidates.map((candidate) => {
399
+ const doc = this.documents.get(candidate.docId);
400
+ const title = doc?.title ?? candidate.docId;
401
+ return `${title}
402
+ ${candidate.snippet}`.trim();
403
+ });
404
+ const rerankScores = await crossEncoderRerank(query, texts, rerankConfig);
405
+ if (!rerankScores) {
406
+ return candidates;
407
+ }
408
+ const normalizedRerank = normalizeCandidateScores(
409
+ candidates.map((candidate, index) => ({
410
+ ...candidate,
411
+ score: rerankScores[index] ?? 0
412
+ }))
413
+ );
414
+ const weighted = candidates.map((candidate, index) => ({
415
+ ...candidate,
416
+ score: (1 - rerankConfig.weight) * candidate.score + rerankConfig.weight * normalizedRerank[index].score
417
+ }));
418
+ return weighted.sort((left, right) => right.score - left.score);
419
+ }
420
+ toSearchResults(candidates, options, limit) {
421
+ const minScore = options.minScore ?? 0;
422
+ const boosted = candidates.map((candidate) => {
423
+ const doc = this.documents.get(candidate.docId);
424
+ if (!doc) return null;
425
+ const temporal = options.temporalBoost ? this.getRecencyFactor(doc.modified) : 1;
426
+ return {
427
+ candidate,
428
+ doc,
429
+ score: candidate.score * temporal
430
+ };
431
+ }).filter((entry) => entry !== null).filter((entry) => entry.score >= minScore).sort((left, right) => right.score - left.score).slice(0, limit);
432
+ return boosted.map((entry) => ({
433
+ document: options.fullContent ? entry.doc : { ...entry.doc, content: "" },
434
+ score: entry.score,
435
+ snippet: entry.candidate.snippet,
436
+ matchedTerms: entry.candidate.matchedTerms
437
+ }));
438
+ }
439
+ matchesFilters(doc, options) {
440
+ if (options.category && doc.category !== options.category) {
441
+ return false;
442
+ }
443
+ if (options.tags?.length) {
444
+ const docTags = new Set(doc.tags.map((tag) => tag.toLowerCase()));
445
+ const hasTag = options.tags.some((tag) => docTags.has(tag.toLowerCase()));
446
+ if (!hasTag) return false;
447
+ }
448
+ return true;
449
+ }
450
+ getRecencyFactor(modifiedAt) {
451
+ const ageMs = Math.max(0, Date.now() - modifiedAt.getTime());
452
+ const ageDays = ageMs / (24 * 60 * 60 * 1e3);
453
+ if (ageDays < 1) return 1;
454
+ if (ageDays <= 7) return 0.9;
455
+ return 0.7;
456
+ }
457
+ buildSnippet(text, matchedTerms) {
458
+ const normalized = text.replace(/\s+/g, " ").trim();
459
+ if (!normalized) return "";
460
+ if (!matchedTerms.length) {
461
+ return normalized.slice(0, 260);
462
+ }
463
+ const lower = normalized.toLowerCase();
464
+ const firstTerm = matchedTerms.find((term) => lower.includes(term.toLowerCase()));
465
+ if (!firstTerm) {
466
+ return normalized.slice(0, 260);
467
+ }
468
+ const start = Math.max(0, lower.indexOf(firstTerm.toLowerCase()) - 80);
469
+ const end = Math.min(normalized.length, start + 260);
470
+ return normalized.slice(start, end).trim();
471
+ }
472
+ loadEmbeddingStoreIfNeeded(provider, model) {
473
+ if (!this.embeddingStoreLoaded) {
474
+ this.embeddingStore.load();
475
+ this.embeddingStoreLoaded = true;
476
+ }
477
+ if (!this.embeddingStore.isCompatible(provider, model)) {
478
+ return false;
479
+ }
480
+ this.embeddingStore.setSignature(provider, model);
481
+ return true;
482
+ }
483
+ };
484
+
485
+ // src/lib/search.ts
486
+ var QMD_INSTALL_URL = "https://github.com/tobi/qmd";
487
+ var QMD_INSTALL_COMMAND = "bun install -g github:tobi/qmd";
488
+ var QMD_INDEX_ENV_VAR = "CLAWVAULT_QMD_INDEX";
489
+ var QMD_ERROR_MESSAGES = {
490
+ NOT_INSTALLED: {
491
+ code: "NOT_INSTALLED",
492
+ message: "qmd is not installed",
493
+ hint: `Install qmd to enable ClawVault search and indexing:
494
+ ${QMD_INSTALL_COMMAND}
495
+
496
+ For more information: ${QMD_INSTALL_URL}`
497
+ },
498
+ NOT_CONFIGURED: {
499
+ code: "NOT_CONFIGURED",
500
+ message: "qmd collection is not configured",
501
+ hint: "Run `clawvault doctor` to diagnose configuration issues, or `clawvault migrate` to fix common setup problems."
502
+ },
503
+ COLLECTION_NOT_FOUND: {
504
+ code: "COLLECTION_NOT_FOUND",
505
+ message: "qmd collection not found",
506
+ hint: "The configured qmd collection does not exist. Run `clawvault migrate` to recreate it, or `qmd collection add <name> <path>` manually."
507
+ },
508
+ EXECUTION_FAILED: {
509
+ code: "EXECUTION_FAILED",
510
+ message: "qmd command failed",
511
+ hint: "Run `clawvault doctor` to diagnose qmd issues."
512
+ }
513
+ };
514
+ var QmdUnavailableError = class extends Error {
515
+ code;
516
+ hint;
517
+ constructor(code = "NOT_INSTALLED", additionalContext) {
518
+ const details = QMD_ERROR_MESSAGES[code];
519
+ const fullMessage = additionalContext ? `${details.message}: ${additionalContext}` : details.message;
520
+ super(fullMessage);
521
+ this.name = "QmdUnavailableError";
522
+ this.code = code;
523
+ this.hint = details.hint;
524
+ }
525
+ toUserMessage() {
526
+ return `Error: ${this.message}
527
+
528
+ ${this.hint}`;
529
+ }
530
+ };
531
+ function getQmdErrorDetails(code) {
532
+ return QMD_ERROR_MESSAGES[code];
533
+ }
534
+ var QmdConfigurationError = class extends Error {
535
+ constructor(message, hint) {
536
+ super(message);
537
+ this.hint = hint;
538
+ this.name = "QmdConfigurationError";
539
+ }
540
+ };
541
+ function ensureJsonArgs(args) {
542
+ return args.includes("--json") ? args : [...args, "--json"];
543
+ }
544
+ function resolveQmdIndexName(indexName) {
545
+ const explicit = indexName?.trim();
546
+ if (explicit) {
547
+ return explicit;
548
+ }
549
+ const fromEnv = process.env[QMD_INDEX_ENV_VAR]?.trim();
550
+ return fromEnv || void 0;
551
+ }
552
+ function withQmdIndexArgs(args, indexName) {
553
+ if (args.includes("--index")) {
554
+ return [...args];
555
+ }
556
+ const resolvedIndexName = resolveQmdIndexName(indexName);
557
+ if (!resolvedIndexName) {
558
+ return [...args];
559
+ }
560
+ return ["--index", resolvedIndexName, ...args];
561
+ }
562
+ function tryParseJson(raw) {
563
+ try {
564
+ return JSON.parse(raw);
565
+ } catch {
566
+ return null;
567
+ }
568
+ }
569
+ function extractJsonPayload(raw) {
570
+ const start = raw.search(/[\[{]/);
571
+ if (start === -1) return null;
572
+ const end = Math.max(raw.lastIndexOf("]"), raw.lastIndexOf("}"));
573
+ if (end <= start) return null;
574
+ return raw.slice(start, end + 1);
575
+ }
576
+ function stripQmdNoise(raw) {
577
+ return raw.split("\n").filter((line) => {
578
+ const t = line.trim();
579
+ if (!t) return true;
580
+ if (t.startsWith("[node-llama-cpp]")) return false;
581
+ if (t.startsWith("Expanding query")) return false;
582
+ if (t.startsWith("Searching ") && t.endsWith("queries...")) return false;
583
+ if (/^[├└─│]/.test(t)) return false;
584
+ return true;
585
+ }).join("\n");
586
+ }
587
+ function parseQmdOutput(raw) {
588
+ const trimmed = stripQmdNoise(raw).trim();
589
+ if (!trimmed) return [];
590
+ const direct = tryParseJson(trimmed);
591
+ const extracted = direct ? null : extractJsonPayload(trimmed);
592
+ const parsed = direct ?? (extracted ? tryParseJson(extracted) : null);
593
+ if (!parsed) {
594
+ throw new Error("qmd returned non-JSON output. Ensure qmd supports --json.");
595
+ }
596
+ if (Array.isArray(parsed)) {
597
+ return parsed;
598
+ }
599
+ if (parsed && typeof parsed === "object") {
600
+ const candidate = parsed.results ?? parsed.items ?? parsed.data;
601
+ if (Array.isArray(candidate)) {
602
+ return candidate;
603
+ }
604
+ }
605
+ throw new Error("qmd returned an unexpected JSON shape.");
606
+ }
607
+ function ensureQmdAvailable() {
608
+ if (!hasQmd()) {
609
+ throw new QmdUnavailableError("NOT_INSTALLED");
610
+ }
611
+ }
612
+ function detectQmdError(output, args) {
613
+ const lowerOutput = output.toLowerCase();
614
+ if (lowerOutput.includes("missing required arguments") || lowerOutput.includes("unknown option")) {
615
+ return new QmdConfigurationError(
616
+ 'qmd does not support the search command with the expected arguments. This may indicate an incompatible qmd version or a different tool named "qmd".',
617
+ `Ensure you have the correct qmd installed: ${QMD_INSTALL_COMMAND}`
618
+ );
619
+ }
620
+ if (lowerOutput.includes("collection not found") || lowerOutput.includes("no collection")) {
621
+ const collectionArg = args.findIndex((a) => a === "-c");
622
+ const collectionName = collectionArg >= 0 && args[collectionArg + 1] ? args[collectionArg + 1] : "unknown";
623
+ return new QmdConfigurationError(
624
+ `qmd collection "${collectionName}" not found.`,
625
+ 'Run `qmd update -c <collection>` to create the collection, or check your vault\'s .clawvault.json "name" field.'
626
+ );
627
+ }
628
+ if (lowerOutput.includes("no index") || lowerOutput.includes("index not found")) {
629
+ return new QmdConfigurationError(
630
+ "qmd index not found. The vault may not be indexed yet.",
631
+ "Run `clawvault rebuild` or `qmd update` to build the search index."
632
+ );
633
+ }
634
+ if (lowerOutput.includes("embedding") && (lowerOutput.includes("not found") || lowerOutput.includes("missing"))) {
635
+ return new QmdConfigurationError(
636
+ "qmd embeddings not found. Vector search requires embeddings to be generated.",
637
+ "Run `clawvault embed` or `qmd embed` to generate embeddings for semantic search."
638
+ );
639
+ }
640
+ return null;
641
+ }
642
+ function execQmd(args, indexName) {
643
+ ensureQmdAvailable();
644
+ const finalArgs = withQmdIndexArgs(ensureJsonArgs(args), indexName);
645
+ try {
646
+ const result = execFileSync("qmd", finalArgs, {
647
+ encoding: "utf-8",
648
+ stdio: ["ignore", "pipe", "pipe"],
649
+ maxBuffer: 10 * 1024 * 1024,
650
+ // 10MB
651
+ shell: process.platform === "win32"
652
+ });
653
+ return parseQmdOutput(result);
654
+ } catch (err) {
655
+ if (err?.code === "ENOENT") {
656
+ throw new QmdUnavailableError("NOT_INSTALLED");
657
+ }
658
+ if (err?.status === 1 && err?.stdout) {
659
+ return parseQmdOutput(err.stdout);
660
+ }
661
+ const output = [err?.stdout, err?.stderr].filter(Boolean).join("\n");
662
+ const detectedError = detectQmdError(output, finalArgs);
663
+ if (detectedError) {
664
+ throw detectedError;
665
+ }
666
+ if (output) {
667
+ try {
668
+ return parseQmdOutput(output);
669
+ } catch {
670
+ }
671
+ if (output.includes("collection not found") || output.includes("no such collection")) {
672
+ throw new QmdUnavailableError("COLLECTION_NOT_FOUND", output.trim());
673
+ }
674
+ }
675
+ const errorDetail = err?.message || "unknown error";
676
+ throw new QmdUnavailableError("EXECUTION_FAILED", errorDetail);
677
+ }
678
+ }
679
+ function hasQmd() {
680
+ const result = spawnSync("qmd", ["--version"], { stdio: "ignore", shell: process.platform === "win32" });
681
+ return !result.error && (result.status === 0 || result.status === 1);
682
+ }
683
+ function qmdUpdate(collection, indexName) {
684
+ ensureQmdAvailable();
685
+ const args = ["update"];
686
+ if (collection) {
687
+ args.push("-c", collection);
688
+ }
689
+ execFileSync("qmd", withQmdIndexArgs(args, indexName), { stdio: "inherit", shell: process.platform === "win32" });
690
+ }
691
+ function qmdEmbed(collection, indexName) {
692
+ ensureQmdAvailable();
693
+ const args = ["embed"];
694
+ if (collection) {
695
+ args.push("-c", collection);
696
+ }
697
+ execFileSync("qmd", withQmdIndexArgs(args, indexName), { stdio: "inherit", shell: process.platform === "win32" });
698
+ }
699
+ var SearchEngine = class {
700
+ inProcess = new InProcessSearchEngine();
701
+ collection = "";
702
+ vaultPath = "";
703
+ collectionRoot = "";
704
+ qmdIndexName;
705
+ searchConfig = {};
706
+ setSearchConfig(config) {
707
+ this.searchConfig = config ?? {};
708
+ this.inProcess.setConfig(this.searchConfig);
709
+ }
710
+ /**
711
+ * Set the collection name (usually vault name)
712
+ */
713
+ setCollection(name) {
714
+ this.collection = name;
715
+ }
716
+ /**
717
+ * Get the current collection name
718
+ */
719
+ getCollection() {
720
+ return this.collection;
721
+ }
722
+ /**
723
+ * Set the vault path for file resolution
724
+ */
725
+ setVaultPath(vaultPath) {
726
+ this.vaultPath = vaultPath;
727
+ this.inProcess.setVaultPath(vaultPath);
728
+ }
729
+ /**
730
+ * Set the collection root for qmd:// URI resolution
731
+ */
732
+ setCollectionRoot(root) {
733
+ this.collectionRoot = path.resolve(root);
734
+ }
735
+ /**
736
+ * Set qmd index name (defaults to qmd global default when omitted)
737
+ */
738
+ setIndexName(indexName) {
739
+ this.qmdIndexName = indexName;
740
+ }
741
+ /**
742
+ * Add or update a document in the local cache
743
+ * Note: qmd indexing happens via qmd update command
744
+ */
745
+ addDocument(doc) {
746
+ this.inProcess.addDocument(doc);
747
+ }
748
+ /**
749
+ * Remove a document from the local cache
750
+ */
751
+ removeDocument(id) {
752
+ this.inProcess.removeDocument(id);
753
+ }
754
+ /**
755
+ * No-op for qmd - indexing is managed externally
756
+ */
757
+ rebuildIDF() {
758
+ }
759
+ /**
760
+ * BM25 search via qmd
761
+ */
762
+ async search(query, options = {}) {
763
+ if (!query.trim()) return [];
764
+ return this.runSearchWithFallback("search", query, options);
765
+ }
766
+ /**
767
+ * Vector/semantic search via qmd vsearch
768
+ */
769
+ async vsearch(query, options = {}) {
770
+ if (!query.trim()) return [];
771
+ return this.runSearchWithFallback("vsearch", query, options);
772
+ }
773
+ /**
774
+ * Combined search with query expansion (qmd query command)
775
+ */
776
+ async query(query, options = {}) {
777
+ if (!query.trim()) return [];
778
+ return this.runSearchWithFallback("query", query, options);
779
+ }
780
+ async runSearchWithFallback(command, query, options) {
781
+ const preferQmd = this.searchConfig.backend === "qmd";
782
+ const qmdFallbackEnabled = this.searchConfig.qmdFallback ?? true;
783
+ if (preferQmd) {
784
+ if (hasQmd()) {
785
+ return this.runQmdQuery(command, query, options);
786
+ }
787
+ return this.runInProcessQuery(command, query, options);
788
+ }
789
+ try {
790
+ const inProcessResults = await this.runInProcessQuery(command, query, options);
791
+ if (inProcessResults.length > 0 || command === "search" || !qmdFallbackEnabled || !hasQmd()) {
792
+ return inProcessResults;
793
+ }
794
+ return this.runQmdQuery(command, query, options);
795
+ } catch (error) {
796
+ if (qmdFallbackEnabled && hasQmd()) {
797
+ return this.runQmdQuery(command, query, options);
798
+ }
799
+ throw error;
800
+ }
801
+ }
802
+ async runInProcessQuery(command, query, options) {
803
+ if (command === "vsearch") {
804
+ return this.inProcess.vsearch(query, options);
805
+ }
806
+ if (command === "query") {
807
+ return this.inProcess.query(query, options);
808
+ }
809
+ return this.inProcess.search(query, options);
810
+ }
811
+ runQmdQuery(command, query, options) {
812
+ const { limit = 10, minScore = 0, category, tags, fullContent = false, temporalBoost = false } = options;
813
+ const args = [command, query, "-n", String(limit * 2), "--json"];
814
+ if (this.collection) {
815
+ args.push("-c", this.collection);
816
+ }
817
+ return this.convertResults(execQmd(args, this.qmdIndexName), {
818
+ limit,
819
+ minScore,
820
+ category,
821
+ tags,
822
+ fullContent,
823
+ temporalBoost
824
+ });
825
+ }
826
+ /**
827
+ * Convert qmd results to ClawVault SearchResult format
828
+ */
829
+ convertResults(qmdResults, options) {
830
+ const { limit = 10, minScore = 0, category, tags, fullContent = false, temporalBoost = false } = options;
831
+ const results = [];
832
+ const docs = this.inProcess.getAllDocuments();
833
+ const docsById = new Map(docs.map((doc) => [doc.id, doc]));
834
+ const maxScore = qmdResults[0]?.score || 1;
835
+ for (const qr of qmdResults) {
836
+ const filePath = this.qmdUriToPath(qr.file);
837
+ const relativePath = this.vaultPath ? path.relative(this.vaultPath, filePath) : filePath;
838
+ const normalizedRelativePath = relativePath.replace(/\\/g, "/");
839
+ if (normalizedRelativePath.startsWith("ledger/archive/") || normalizedRelativePath.includes("/ledger/archive/")) {
840
+ continue;
841
+ }
842
+ const docId = normalizedRelativePath.replace(/\.md$/, "");
843
+ let doc = docsById.get(docId) ?? docsById.get(docId.split("/").join(path.sep));
844
+ const modifiedAt = this.resolveModifiedAt(doc, filePath);
845
+ const parts = normalizedRelativePath.split("/");
846
+ const docCategory = parts.length > 1 ? parts[0] : "root";
847
+ if (category && docCategory !== category) continue;
848
+ if (tags && tags.length > 0 && doc) {
849
+ const docTags = new Set(doc.tags);
850
+ if (!tags.some((t) => docTags.has(t))) continue;
851
+ }
852
+ const normalizedScore = maxScore > 0 ? qr.score / maxScore : 0;
853
+ const finalScore = temporalBoost ? normalizedScore * this.getRecencyFactor(modifiedAt) : normalizedScore;
854
+ if (finalScore < minScore) continue;
855
+ if (!doc) {
856
+ doc = {
857
+ id: docId,
858
+ path: filePath,
859
+ category: docCategory,
860
+ title: qr.title || path.basename(relativePath, ".md"),
861
+ content: "",
862
+ // Content loaded separately if needed
863
+ frontmatter: {},
864
+ links: [],
865
+ tags: [],
866
+ modified: modifiedAt
867
+ };
868
+ }
869
+ results.push({
870
+ document: fullContent ? doc : { ...doc, content: "" },
871
+ score: finalScore,
872
+ snippet: this.cleanSnippet(qr.snippet),
873
+ matchedTerms: []
874
+ // qmd doesn't provide this
875
+ });
876
+ }
877
+ return results.sort((a, b) => b.score - a.score).slice(0, limit);
878
+ }
879
+ resolveModifiedAt(doc, filePath) {
880
+ if (doc) return doc.modified;
881
+ try {
882
+ return fs.statSync(filePath).mtime;
883
+ } catch {
884
+ return /* @__PURE__ */ new Date(0);
885
+ }
886
+ }
887
+ getRecencyFactor(modifiedAt) {
888
+ const ageMs = Math.max(0, Date.now() - modifiedAt.getTime());
889
+ const ageDays = ageMs / (24 * 60 * 60 * 1e3);
890
+ if (ageDays < 1) return 1;
891
+ if (ageDays <= 7) return 0.9;
892
+ return 0.7;
893
+ }
894
+ /**
895
+ * Convert qmd:// URI to file path
896
+ */
897
+ qmdUriToPath(uri) {
898
+ if (uri.startsWith("qmd://")) {
899
+ const withoutScheme = uri.slice(6);
900
+ const slashIndex = withoutScheme.indexOf("/");
901
+ if (slashIndex > -1) {
902
+ const relativePath = withoutScheme.slice(slashIndex + 1);
903
+ const root = this.collectionRoot || this.vaultPath;
904
+ if (root) {
905
+ return path.join(root, relativePath);
906
+ }
907
+ return relativePath;
908
+ }
909
+ }
910
+ return uri;
911
+ }
912
+ /**
913
+ * Clean up qmd snippet format
914
+ */
915
+ cleanSnippet(snippet) {
916
+ if (!snippet) return "";
917
+ return snippet.replace(/@@ [-+]?\d+,?\d* @@ \([^)]+\)/g, "").trim().split("\n").slice(0, 3).join("\n").slice(0, 300);
918
+ }
919
+ /**
920
+ * Get all cached documents
921
+ */
922
+ getAllDocuments() {
923
+ return this.inProcess.getAllDocuments();
924
+ }
925
+ /**
926
+ * Get document count
927
+ */
928
+ get size() {
929
+ return this.inProcess.size;
930
+ }
931
+ /**
932
+ * Clear the local document cache
933
+ */
934
+ clear() {
935
+ this.inProcess.clear();
936
+ }
937
+ /**
938
+ * Export documents for persistence
939
+ */
940
+ export() {
941
+ return this.inProcess.export();
942
+ }
943
+ /**
944
+ * Import from persisted data
945
+ */
946
+ import(data) {
947
+ this.inProcess.import(data);
948
+ }
949
+ };
950
+ function extractWikiLinks(content) {
951
+ const matches = content.match(/\[\[([^\]]+)\]\]/g) || [];
952
+ return matches.map((m) => m.slice(2, -2).toLowerCase());
953
+ }
954
+ function extractTags(content) {
955
+ const matches = content.match(/#[\w-]+/g) || [];
956
+ return [...new Set(matches.map((m) => m.slice(1).toLowerCase()))];
957
+ }
958
+
959
+ export {
960
+ QMD_INSTALL_URL,
961
+ QMD_INSTALL_COMMAND,
962
+ QmdUnavailableError,
963
+ getQmdErrorDetails,
964
+ QmdConfigurationError,
965
+ withQmdIndexArgs,
966
+ hasQmd,
967
+ qmdUpdate,
968
+ qmdEmbed,
969
+ SearchEngine,
970
+ extractWikiLinks,
971
+ extractTags
972
+ };