clawvault 3.2.1 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -16
- package/bin/clawvault.js +0 -2
- package/bin/command-registration.test.js +15 -2
- package/bin/help-contract.test.js +16 -0
- package/bin/register-core-commands.js +88 -0
- package/bin/register-core-commands.test.js +80 -0
- package/bin/register-maintenance-commands.js +84 -7
- package/bin/register-query-commands.js +45 -28
- package/bin/register-query-commands.test.js +15 -0
- package/bin/test-helpers/cli-command-fixtures.js +1 -0
- package/dist/chunk-2PKBIKDH.js +130 -0
- package/dist/{chunk-U67V476Y.js → chunk-2ZDO52B4.js} +18 -1
- package/dist/{chunk-ZZA73MFY.js → chunk-33DOSHTA.js} +176 -36
- package/dist/chunk-35JCYSRR.js +158 -0
- package/dist/{chunk-AZYOKJYC.js → chunk-4PY655YM.js} +13 -1
- package/dist/{chunk-2JQ3O2YL.js → chunk-5EFSWZO6.js} +3 -3
- package/dist/{chunk-Y3TIJEBP.js → chunk-7SWP5FKU.js} +34 -613
- package/dist/{chunk-4VQTUVH7.js → chunk-7YZWHM36.js} +52 -26
- package/dist/{chunk-URXDAUVH.js → chunk-AXSJIFOJ.js} +174 -1
- package/dist/{chunk-4ITRXIVT.js → chunk-BLQXXX7Q.js} +6 -6
- package/dist/chunk-CSHO3PJB.js +684 -0
- package/dist/chunk-D5U3Q4N5.js +872 -0
- package/dist/chunk-DCF4KMFD.js +158 -0
- package/dist/{chunk-S5OJEGFG.js → chunk-DOIUYIXV.js} +2 -2
- package/dist/{chunk-YXQCA6B7.js → chunk-DVOUSOR3.js} +112 -7
- package/dist/{chunk-YDWHS4LJ.js → chunk-ECGJYWNA.js} +205 -33
- package/dist/{chunk-QMHPQYUV.js → chunk-EL6UBSX5.js} +7 -6
- package/dist/chunk-FZ5I2NF7.js +352 -0
- package/dist/{chunk-WJVWINEM.js → chunk-GFCHWMGD.js} +55 -6
- package/dist/{chunk-GNJL4YGR.js → chunk-GJO3CFUN.js} +30 -6
- package/dist/chunk-H3JZIB5O.js +322 -0
- package/dist/chunk-HEHO7SMV.js +51 -0
- package/dist/{chunk-UCQAOZHW.js → chunk-HGDDW24U.js} +3 -3
- package/dist/chunk-J3YUXVID.js +907 -0
- package/dist/{chunk-Y6VJKXGL.js → chunk-KCYWJDDW.js} +1 -1
- package/dist/{chunk-P5EPF6MB.js → chunk-MW5C6ZQA.js} +110 -13
- package/dist/chunk-NSXYM6EZ.js +255 -0
- package/dist/{chunk-YNIPYN4F.js → chunk-OFOCU2V4.js} +6 -5
- package/dist/{chunk-42MXU7A6.js → chunk-P62WHA27.js} +58 -47
- package/dist/chunk-PTWPPVC7.js +972 -0
- package/dist/{chunk-FAKNOB7Y.js → chunk-QFWERBDP.js} +2 -2
- package/dist/chunk-QYQAGBTM.js +2097 -0
- package/dist/chunk-RL2L6I6K.js +223 -0
- package/dist/{chunk-IIOU45CK.js → chunk-S7N7HI5E.js} +2 -2
- package/dist/{chunk-ECRZL5XR.js → chunk-T7E764W3.js} +23 -7
- package/dist/{chunk-MNPUYCHQ.js → chunk-TWMI3SNN.js} +6 -5
- package/dist/{chunk-2RAZ4ZFE.js → chunk-VBILES4B.js} +1 -1
- package/dist/{chunk-PI4WMLMG.js → chunk-VXAGOLDP.js} +1 -1
- package/dist/{chunk-SS4B7P7V.js → chunk-YIDV4VV2.js} +1 -1
- package/dist/chunk-YTRZNA64.js +37 -0
- package/dist/chunk-ZKWPCBYT.js +600 -0
- package/dist/cli/index.js +28 -21
- package/dist/commands/archive.js +3 -3
- package/dist/commands/backlog.js +1 -1
- package/dist/commands/benchmark.d.ts +12 -0
- package/dist/commands/benchmark.js +12 -0
- package/dist/commands/blocked.js +1 -1
- package/dist/commands/canvas.js +2 -2
- package/dist/commands/checkpoint.js +1 -1
- package/dist/commands/compat.js +1 -1
- package/dist/commands/context.js +8 -7
- package/dist/commands/doctor.d.ts +8 -3
- package/dist/commands/doctor.js +8 -22
- package/dist/commands/embed.js +6 -5
- package/dist/commands/entities.d.ts +8 -1
- package/dist/commands/entities.js +46 -3
- package/dist/commands/graph.js +4 -4
- package/dist/commands/inbox.d.ts +23 -0
- package/dist/commands/inbox.js +11 -0
- package/dist/commands/inject.d.ts +1 -1
- package/dist/commands/inject.js +5 -5
- package/dist/commands/kanban.js +1 -1
- package/dist/commands/link.js +5 -5
- package/dist/commands/maintain.d.ts +32 -0
- package/dist/commands/maintain.js +13 -0
- package/dist/commands/migrate-observations.js +3 -3
- package/dist/commands/observe.js +11 -10
- package/dist/commands/project.js +2 -2
- package/dist/commands/rebuild-embeddings.js +48 -17
- package/dist/commands/rebuild.js +9 -8
- package/dist/commands/recall.d.ts +14 -0
- package/dist/commands/recall.js +15 -0
- package/dist/commands/recover.js +1 -1
- package/dist/commands/reflect.js +6 -6
- package/dist/commands/repair-session.js +1 -1
- package/dist/commands/replay.js +10 -9
- package/dist/commands/session-recap.js +1 -1
- package/dist/commands/setup.js +4 -3
- package/dist/commands/shell-init.js +1 -1
- package/dist/commands/sleep.d.ts +1 -1
- package/dist/commands/sleep.js +20 -18
- package/dist/commands/status.js +40 -26
- package/dist/commands/sync-bd.js +3 -3
- package/dist/commands/tailscale.js +3 -3
- package/dist/commands/task.js +1 -1
- package/dist/commands/template.js +1 -1
- package/dist/commands/wake.d.ts +1 -1
- package/dist/commands/wake.js +10 -9
- package/dist/index.d.ts +233 -16
- package/dist/index.js +325 -111
- package/dist/{inject-DYUrDqQO.d.ts → inject-DEb_jpLi.d.ts} +3 -1
- package/dist/lib/auto-linker.js +2 -2
- package/dist/lib/canvas-layout.js +1 -1
- package/dist/lib/config.js +2 -2
- package/dist/lib/entity-index.js +1 -1
- package/dist/lib/project-utils.js +2 -2
- package/dist/lib/session-repair.js +1 -1
- package/dist/lib/session-utils.js +1 -1
- package/dist/lib/tailscale.js +1 -1
- package/dist/lib/task-utils.js +1 -1
- package/dist/lib/template-engine.js +1 -1
- package/dist/lib/webdav.js +1 -1
- package/dist/onnxruntime_binding-5QEF3SUC.node +0 -0
- package/dist/onnxruntime_binding-BKPKNEGC.node +0 -0
- package/dist/onnxruntime_binding-FMOXGIUT.node +0 -0
- package/dist/onnxruntime_binding-OI2KMXC5.node +0 -0
- package/dist/onnxruntime_binding-UX44MLAZ.node +0 -0
- package/dist/onnxruntime_binding-Y2W7N7WY.node +0 -0
- package/dist/openclaw-plugin--gqA2BZw.d.ts +267 -0
- package/dist/openclaw-plugin.d.ts +4 -0
- package/dist/openclaw-plugin.js +20 -0
- package/dist/transformers.node-A2ZRORSQ.js +46775 -0
- package/dist/types-CbL-wIKi.d.ts +36 -0
- package/dist/{types-BbWJoC1c.d.ts → types-DslKvCaj.d.ts} +51 -1
- package/hooks/clawvault/HOOK.md +25 -8
- package/hooks/clawvault/handler.js +215 -78
- package/hooks/clawvault/handler.test.js +109 -43
- package/hooks/clawvault/integrity.js +112 -0
- package/hooks/clawvault/integrity.test.js +32 -0
- package/hooks/clawvault/openclaw.plugin.json +133 -15
- package/openclaw.plugin.json +161 -194
- package/package.json +8 -5
- package/bin/register-workgraph-commands.js +0 -451
- package/dist/chunk-5PJ4STIC.js +0 -465
- package/dist/chunk-ERNE2FZ5.js +0 -189
- package/dist/chunk-HR4KN6S2.js +0 -152
- package/dist/chunk-IJBFGPCS.js +0 -33
- package/dist/chunk-K7PNYS45.js +0 -93
- package/dist/chunk-NTOPJI7W.js +0 -207
- package/dist/chunk-PG56HX5T.js +0 -154
- package/dist/chunk-QPDDIHXE.js +0 -501
- package/dist/chunk-WIOLLGAD.js +0 -190
- package/dist/chunk-WMGIIABP.js +0 -15
- package/dist/ledger-B7g7jhqG.d.ts +0 -44
- package/dist/plugin/index.d.ts +0 -352
- package/dist/plugin/index.js +0 -4264
- package/dist/registry-BR4326o0.d.ts +0 -30
- package/dist/store-CA-6sKCJ.d.ts +0 -34
- package/dist/thread-B9LhXNU0.d.ts +0 -41
- package/dist/workgraph/index.d.ts +0 -5
- package/dist/workgraph/index.js +0 -23
- package/dist/workgraph/ledger.d.ts +0 -2
- package/dist/workgraph/ledger.js +0 -25
- package/dist/workgraph/registry.d.ts +0 -2
- package/dist/workgraph/registry.js +0 -19
- package/dist/workgraph/store.d.ts +0 -2
- package/dist/workgraph/store.js +0 -25
- package/dist/workgraph/thread.d.ts +0 -2
- package/dist/workgraph/thread.js +0 -25
- package/dist/workgraph/types.d.ts +0 -54
- package/dist/workgraph/types.js +0 -7
|
@@ -0,0 +1,972 @@
|
|
|
1
|
+
import {
|
|
2
|
+
EmbeddingStore,
|
|
3
|
+
cosineSimilarity,
|
|
4
|
+
embedText,
|
|
5
|
+
resolveEmbeddingConfig
|
|
6
|
+
} from "./chunk-H3JZIB5O.js";
|
|
7
|
+
|
|
8
|
+
// src/lib/search.ts
|
|
9
|
+
import { execFileSync, spawnSync } from "child_process";
|
|
10
|
+
import * as fs from "fs";
|
|
11
|
+
import * as path from "path";
|
|
12
|
+
|
|
13
|
+
// src/lib/hosted-rerank.ts
|
|
14
|
+
var DEFAULT_ENDPOINTS = {
|
|
15
|
+
jina: "https://api.jina.ai/v1/rerank",
|
|
16
|
+
voyage: "https://api.voyageai.com/v1/rerank",
|
|
17
|
+
siliconflow: "https://api.siliconflow.cn/v1/rerank",
|
|
18
|
+
pinecone: "https://api.pinecone.io/rerank"
|
|
19
|
+
};
|
|
20
|
+
var DEFAULT_MODELS = {
|
|
21
|
+
jina: "jina-reranker-v2-base-multilingual",
|
|
22
|
+
voyage: "rerank-2",
|
|
23
|
+
siliconflow: "BAAI/bge-reranker-v2-m3",
|
|
24
|
+
pinecone: "bge-reranker-v2-m3"
|
|
25
|
+
};
|
|
26
|
+
function clampWeight(value) {
|
|
27
|
+
if (!Number.isFinite(value)) return 0.6;
|
|
28
|
+
if (value < 0) return 0;
|
|
29
|
+
if (value > 1) return 1;
|
|
30
|
+
return value;
|
|
31
|
+
}
|
|
32
|
+
function resolveApiKey(provider, configured) {
|
|
33
|
+
if (configured?.trim()) return configured.trim();
|
|
34
|
+
const envKeyByProvider = {
|
|
35
|
+
jina: ["JINA_API_KEY"],
|
|
36
|
+
voyage: ["VOYAGE_API_KEY"],
|
|
37
|
+
siliconflow: ["SILICONFLOW_API_KEY"],
|
|
38
|
+
pinecone: ["PINECONE_API_KEY"]
|
|
39
|
+
};
|
|
40
|
+
for (const key of envKeyByProvider[provider]) {
|
|
41
|
+
const value = process.env[key]?.trim();
|
|
42
|
+
if (value) return value;
|
|
43
|
+
}
|
|
44
|
+
return process.env.RERANK_API_KEY?.trim();
|
|
45
|
+
}
|
|
46
|
+
function resolveRerankConfig(searchConfig) {
|
|
47
|
+
const provider = searchConfig?.rerank?.provider ?? "none";
|
|
48
|
+
if (provider === "none") {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
const apiKey = resolveApiKey(provider, searchConfig?.rerank?.apiKey);
|
|
52
|
+
if (!apiKey) {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
const endpoint = (searchConfig?.rerank?.endpoint?.trim() || DEFAULT_ENDPOINTS[provider]).replace(/\/+$/, "");
|
|
56
|
+
const model = searchConfig?.rerank?.model?.trim() || DEFAULT_MODELS[provider];
|
|
57
|
+
const weight = clampWeight(searchConfig?.rerank?.weight ?? 0.6);
|
|
58
|
+
return {
|
|
59
|
+
provider,
|
|
60
|
+
endpoint,
|
|
61
|
+
model,
|
|
62
|
+
apiKey,
|
|
63
|
+
weight
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
async function crossEncoderRerank(query, documents, config) {
|
|
67
|
+
if (!documents.length) return null;
|
|
68
|
+
try {
|
|
69
|
+
const response = await fetch(config.endpoint, {
|
|
70
|
+
method: "POST",
|
|
71
|
+
headers: {
|
|
72
|
+
"Content-Type": "application/json",
|
|
73
|
+
Authorization: `Bearer ${config.apiKey}`
|
|
74
|
+
},
|
|
75
|
+
body: JSON.stringify({
|
|
76
|
+
model: config.model,
|
|
77
|
+
query,
|
|
78
|
+
documents,
|
|
79
|
+
top_n: documents.length
|
|
80
|
+
}),
|
|
81
|
+
signal: AbortSignal.timeout(15e3)
|
|
82
|
+
});
|
|
83
|
+
if (!response.ok) {
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
const payload = await response.json();
|
|
87
|
+
const items = payload.results ?? payload.data;
|
|
88
|
+
if (!Array.isArray(items)) {
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
const scores = new Array(documents.length).fill(0);
|
|
92
|
+
for (const item of items) {
|
|
93
|
+
const score = Number(item.relevance_score ?? item.score ?? 0);
|
|
94
|
+
if (Number.isFinite(item.index) && item.index >= 0 && item.index < documents.length) {
|
|
95
|
+
scores[item.index] = score;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
return scores;
|
|
99
|
+
} catch {
|
|
100
|
+
return null;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// src/lib/in-process-search.ts
|
|
105
|
+
var BM25_K1 = 1.2;
|
|
106
|
+
var BM25_B = 0.75;
|
|
107
|
+
var RRF_K = 60;
|
|
108
|
+
var DEFAULT_CHUNK_SIZE = 700;
|
|
109
|
+
var DEFAULT_CHUNK_OVERLAP = 100;
|
|
110
|
+
function tokenize(text) {
|
|
111
|
+
return text.toLowerCase().split(/[^\p{L}\p{N}_-]+/u).map((token) => token.trim()).filter((token) => token.length > 1);
|
|
112
|
+
}
|
|
113
|
+
function normalizeScore(value, min, max) {
|
|
114
|
+
if (!Number.isFinite(value)) return 0;
|
|
115
|
+
if (max <= min) return value > 0 ? 1 : 0;
|
|
116
|
+
return (value - min) / (max - min);
|
|
117
|
+
}
|
|
118
|
+
function normalizeCandidateScores(candidates) {
|
|
119
|
+
if (!candidates.length) return candidates;
|
|
120
|
+
const values = candidates.map((item) => item.score).filter((value) => Number.isFinite(value));
|
|
121
|
+
const min = Math.min(...values);
|
|
122
|
+
const max = Math.max(...values);
|
|
123
|
+
return candidates.map((candidate) => ({
|
|
124
|
+
...candidate,
|
|
125
|
+
score: normalizeScore(candidate.score, min, max)
|
|
126
|
+
}));
|
|
127
|
+
}
|
|
128
|
+
var InProcessSearchEngine = class {
|
|
129
|
+
vaultPath = "";
|
|
130
|
+
config = {};
|
|
131
|
+
documents = /* @__PURE__ */ new Map();
|
|
132
|
+
chunks = /* @__PURE__ */ new Map();
|
|
133
|
+
chunkIdsByDoc = /* @__PURE__ */ new Map();
|
|
134
|
+
documentFrequency = /* @__PURE__ */ new Map();
|
|
135
|
+
totalChunkLength = 0;
|
|
136
|
+
embeddingStore = new EmbeddingStore(process.cwd());
|
|
137
|
+
embeddingStoreLoaded = false;
|
|
138
|
+
setVaultPath(vaultPath) {
|
|
139
|
+
this.vaultPath = vaultPath;
|
|
140
|
+
this.embeddingStore.setVaultPath(vaultPath);
|
|
141
|
+
this.embeddingStoreLoaded = false;
|
|
142
|
+
}
|
|
143
|
+
setConfig(config) {
|
|
144
|
+
this.config = config ?? {};
|
|
145
|
+
}
|
|
146
|
+
addDocument(doc) {
|
|
147
|
+
const existing = this.documents.get(doc.id);
|
|
148
|
+
if (existing) {
|
|
149
|
+
this.removeDocument(existing.id);
|
|
150
|
+
}
|
|
151
|
+
this.documents.set(doc.id, doc);
|
|
152
|
+
const chunkIds = [];
|
|
153
|
+
const chunkTexts = this.splitIntoChunks(`${doc.title}
|
|
154
|
+
${doc.content}`);
|
|
155
|
+
for (let index = 0; index < chunkTexts.length; index += 1) {
|
|
156
|
+
const text = chunkTexts[index];
|
|
157
|
+
const terms = tokenize(text);
|
|
158
|
+
if (!terms.length) continue;
|
|
159
|
+
const termFreq = /* @__PURE__ */ new Map();
|
|
160
|
+
for (const term of terms) {
|
|
161
|
+
termFreq.set(term, (termFreq.get(term) ?? 0) + 1);
|
|
162
|
+
}
|
|
163
|
+
const chunkId = `${doc.id}#${index + 1}`;
|
|
164
|
+
chunkIds.push(chunkId);
|
|
165
|
+
this.totalChunkLength += terms.length;
|
|
166
|
+
this.chunks.set(chunkId, {
|
|
167
|
+
id: chunkId,
|
|
168
|
+
docId: doc.id,
|
|
169
|
+
text,
|
|
170
|
+
termFreq,
|
|
171
|
+
length: terms.length
|
|
172
|
+
});
|
|
173
|
+
for (const term of termFreq.keys()) {
|
|
174
|
+
this.documentFrequency.set(term, (this.documentFrequency.get(term) ?? 0) + 1);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
this.chunkIdsByDoc.set(doc.id, chunkIds);
|
|
178
|
+
}
|
|
179
|
+
removeDocument(docId) {
|
|
180
|
+
const existingChunkIds = this.chunkIdsByDoc.get(docId) ?? [];
|
|
181
|
+
for (const chunkId of existingChunkIds) {
|
|
182
|
+
const chunk = this.chunks.get(chunkId);
|
|
183
|
+
if (!chunk) continue;
|
|
184
|
+
this.totalChunkLength = Math.max(0, this.totalChunkLength - chunk.length);
|
|
185
|
+
for (const term of chunk.termFreq.keys()) {
|
|
186
|
+
const next = (this.documentFrequency.get(term) ?? 0) - 1;
|
|
187
|
+
if (next <= 0) {
|
|
188
|
+
this.documentFrequency.delete(term);
|
|
189
|
+
} else {
|
|
190
|
+
this.documentFrequency.set(term, next);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
this.chunks.delete(chunkId);
|
|
194
|
+
}
|
|
195
|
+
this.chunkIdsByDoc.delete(docId);
|
|
196
|
+
this.documents.delete(docId);
|
|
197
|
+
}
|
|
198
|
+
clear() {
|
|
199
|
+
this.documents.clear();
|
|
200
|
+
this.chunks.clear();
|
|
201
|
+
this.chunkIdsByDoc.clear();
|
|
202
|
+
this.documentFrequency.clear();
|
|
203
|
+
this.totalChunkLength = 0;
|
|
204
|
+
}
|
|
205
|
+
getAllDocuments() {
|
|
206
|
+
return [...this.documents.values()];
|
|
207
|
+
}
|
|
208
|
+
get size() {
|
|
209
|
+
return this.documents.size;
|
|
210
|
+
}
|
|
211
|
+
export() {
|
|
212
|
+
return { documents: this.getAllDocuments() };
|
|
213
|
+
}
|
|
214
|
+
import(data) {
|
|
215
|
+
this.clear();
|
|
216
|
+
for (const doc of data.documents) {
|
|
217
|
+
this.addDocument(doc);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
async search(query, options = {}) {
|
|
221
|
+
if (!query.trim()) return [];
|
|
222
|
+
const limit = Math.max(1, options.limit ?? 10);
|
|
223
|
+
const bm25Candidates = this.runBm25(query, options, limit * 5);
|
|
224
|
+
const semanticRanks = await this.getSemanticRanks(query, options);
|
|
225
|
+
let fused = this.fuseHybrid(bm25Candidates, semanticRanks, limit * 5);
|
|
226
|
+
fused = await this.applyCrossEncoderRerank(query, fused);
|
|
227
|
+
return this.toSearchResults(fused, options, limit);
|
|
228
|
+
}
|
|
229
|
+
async vsearch(query, options = {}) {
|
|
230
|
+
if (!query.trim()) return [];
|
|
231
|
+
const limit = Math.max(1, options.limit ?? 10);
|
|
232
|
+
const semanticRanks = await this.getSemanticRanks(query, options);
|
|
233
|
+
if (semanticRanks.length === 0) {
|
|
234
|
+
return [];
|
|
235
|
+
}
|
|
236
|
+
const candidates = [];
|
|
237
|
+
for (const { docId, score } of semanticRanks) {
|
|
238
|
+
const doc = this.documents.get(docId);
|
|
239
|
+
if (!doc || !this.matchesFilters(doc, options)) continue;
|
|
240
|
+
candidates.push({
|
|
241
|
+
id: `${docId}#semantic`,
|
|
242
|
+
docId,
|
|
243
|
+
snippet: this.buildSnippet(doc.content, []),
|
|
244
|
+
score,
|
|
245
|
+
matchedTerms: []
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
const reranked = await this.applyCrossEncoderRerank(query, candidates);
|
|
249
|
+
return this.toSearchResults(reranked, options, limit);
|
|
250
|
+
}
|
|
251
|
+
async query(queryText, options = {}) {
|
|
252
|
+
return this.search(queryText, options);
|
|
253
|
+
}
|
|
254
|
+
splitIntoChunks(content) {
|
|
255
|
+
const normalized = content.replace(/\r\n/g, "\n").trim();
|
|
256
|
+
if (!normalized) return [];
|
|
257
|
+
const chunkSize = Math.max(200, this.config.chunkSize ?? DEFAULT_CHUNK_SIZE);
|
|
258
|
+
const overlap = Math.max(0, Math.min(chunkSize - 1, this.config.chunkOverlap ?? DEFAULT_CHUNK_OVERLAP));
|
|
259
|
+
const chunks = [];
|
|
260
|
+
let start = 0;
|
|
261
|
+
while (start < normalized.length) {
|
|
262
|
+
let end = Math.min(normalized.length, start + chunkSize);
|
|
263
|
+
if (end < normalized.length) {
|
|
264
|
+
const boundary = normalized.slice(end, Math.min(normalized.length, end + 100)).search(/\s/);
|
|
265
|
+
if (boundary >= 0) {
|
|
266
|
+
end += boundary;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
const piece = normalized.slice(start, end).trim();
|
|
270
|
+
if (piece) {
|
|
271
|
+
chunks.push(piece);
|
|
272
|
+
}
|
|
273
|
+
if (end >= normalized.length) break;
|
|
274
|
+
start = Math.max(0, end - overlap);
|
|
275
|
+
}
|
|
276
|
+
return chunks;
|
|
277
|
+
}
|
|
278
|
+
runBm25(query, options, topK) {
|
|
279
|
+
const queryTerms = tokenize(query);
|
|
280
|
+
if (!queryTerms.length || this.chunks.size === 0) {
|
|
281
|
+
return [];
|
|
282
|
+
}
|
|
283
|
+
const uniqueTerms = [...new Set(queryTerms)];
|
|
284
|
+
const chunkCount = this.chunks.size;
|
|
285
|
+
const avgChunkLength = chunkCount > 0 ? this.totalChunkLength / chunkCount : 1;
|
|
286
|
+
const candidates = [];
|
|
287
|
+
for (const chunk of this.chunks.values()) {
|
|
288
|
+
const doc = this.documents.get(chunk.docId);
|
|
289
|
+
if (!doc || !this.matchesFilters(doc, options)) continue;
|
|
290
|
+
let score = 0;
|
|
291
|
+
const matchedTerms = [];
|
|
292
|
+
for (const term of uniqueTerms) {
|
|
293
|
+
const termFreq = chunk.termFreq.get(term) ?? 0;
|
|
294
|
+
if (termFreq === 0) continue;
|
|
295
|
+
matchedTerms.push(term);
|
|
296
|
+
const docFreq = this.documentFrequency.get(term) ?? 0;
|
|
297
|
+
const idf = Math.log((chunkCount - docFreq + 0.5) / (docFreq + 0.5) + 1);
|
|
298
|
+
const tfNorm = termFreq * (BM25_K1 + 1) / (termFreq + BM25_K1 * (1 - BM25_B + BM25_B * (chunk.length / Math.max(1, avgChunkLength))));
|
|
299
|
+
score += idf * tfNorm;
|
|
300
|
+
}
|
|
301
|
+
if (score <= 0) continue;
|
|
302
|
+
candidates.push({
|
|
303
|
+
id: chunk.id,
|
|
304
|
+
docId: chunk.docId,
|
|
305
|
+
snippet: this.buildSnippet(chunk.text, matchedTerms),
|
|
306
|
+
score,
|
|
307
|
+
matchedTerms
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
return normalizeCandidateScores(
|
|
311
|
+
candidates.sort((left, right) => right.score - left.score).slice(0, topK)
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
async getSemanticRanks(query, options) {
|
|
315
|
+
const embeddingConfig = resolveEmbeddingConfig(this.config);
|
|
316
|
+
if (!embeddingConfig) {
|
|
317
|
+
return [];
|
|
318
|
+
}
|
|
319
|
+
if (!this.loadEmbeddingStoreIfNeeded(embeddingConfig.provider, embeddingConfig.model)) {
|
|
320
|
+
return [];
|
|
321
|
+
}
|
|
322
|
+
let queryEmbedding;
|
|
323
|
+
try {
|
|
324
|
+
queryEmbedding = await embedText(query, embeddingConfig, { isQuery: true });
|
|
325
|
+
} catch {
|
|
326
|
+
return [];
|
|
327
|
+
}
|
|
328
|
+
const ranked = Array.from(this.documents.values()).filter((doc) => this.matchesFilters(doc, options)).map((doc) => {
|
|
329
|
+
const vector = this.embeddingStore.get(doc.id);
|
|
330
|
+
if (!vector) {
|
|
331
|
+
return null;
|
|
332
|
+
}
|
|
333
|
+
return {
|
|
334
|
+
docId: doc.id,
|
|
335
|
+
score: cosineSimilarity(queryEmbedding, vector)
|
|
336
|
+
};
|
|
337
|
+
}).filter((entry) => entry !== null).sort((left, right) => right.score - left.score).slice(0, Math.max(20, (options.limit ?? 10) * 5));
|
|
338
|
+
if (ranked.length === 0) {
|
|
339
|
+
return [];
|
|
340
|
+
}
|
|
341
|
+
const min = Math.min(...ranked.map((entry) => entry.score));
|
|
342
|
+
const max = Math.max(...ranked.map((entry) => entry.score));
|
|
343
|
+
return ranked.map((entry, index) => ({
|
|
344
|
+
docId: entry.docId,
|
|
345
|
+
rank: index,
|
|
346
|
+
score: normalizeScore(entry.score, min, max)
|
|
347
|
+
}));
|
|
348
|
+
}
|
|
349
|
+
fuseHybrid(bm25Candidates, semanticRanks, topK) {
|
|
350
|
+
if (bm25Candidates.length === 0) {
|
|
351
|
+
return semanticRanks.map((entry) => {
|
|
352
|
+
const doc = this.documents.get(entry.docId);
|
|
353
|
+
return {
|
|
354
|
+
id: `${entry.docId}#semantic`,
|
|
355
|
+
docId: entry.docId,
|
|
356
|
+
snippet: this.buildSnippet(doc.content, []),
|
|
357
|
+
score: 1 / (RRF_K + entry.rank + 1),
|
|
358
|
+
matchedTerms: []
|
|
359
|
+
};
|
|
360
|
+
}).slice(0, topK);
|
|
361
|
+
}
|
|
362
|
+
const semanticRankMap = new Map(
|
|
363
|
+
semanticRanks.map((entry) => [entry.docId, { rank: entry.rank, score: entry.score }])
|
|
364
|
+
);
|
|
365
|
+
const fused = [];
|
|
366
|
+
for (let index = 0; index < bm25Candidates.length; index += 1) {
|
|
367
|
+
const candidate = bm25Candidates[index];
|
|
368
|
+
const bm25Rrf = 0.65 / (RRF_K + index + 1);
|
|
369
|
+
const semantic = semanticRankMap.get(candidate.docId);
|
|
370
|
+
const semanticRrf = semantic ? 0.35 / (RRF_K + semantic.rank + 1) : 0;
|
|
371
|
+
fused.push({
|
|
372
|
+
...candidate,
|
|
373
|
+
score: bm25Rrf + semanticRrf
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
const seenDocs = new Set(fused.map((entry) => entry.docId));
|
|
377
|
+
for (const entry of semanticRanks) {
|
|
378
|
+
if (seenDocs.has(entry.docId)) continue;
|
|
379
|
+
const doc = this.documents.get(entry.docId);
|
|
380
|
+
if (!doc) continue;
|
|
381
|
+
fused.push({
|
|
382
|
+
id: `${entry.docId}#semantic`,
|
|
383
|
+
docId: entry.docId,
|
|
384
|
+
snippet: this.buildSnippet(doc.content, []),
|
|
385
|
+
matchedTerms: [],
|
|
386
|
+
score: 0.35 / (RRF_K + entry.rank + 1)
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
return normalizeCandidateScores(
|
|
390
|
+
fused.sort((left, right) => right.score - left.score).slice(0, topK)
|
|
391
|
+
);
|
|
392
|
+
}
|
|
393
|
+
async applyCrossEncoderRerank(query, candidates) {
|
|
394
|
+
const rerankConfig = resolveRerankConfig(this.config);
|
|
395
|
+
if (!rerankConfig || candidates.length === 0) {
|
|
396
|
+
return candidates;
|
|
397
|
+
}
|
|
398
|
+
const texts = candidates.map((candidate) => {
|
|
399
|
+
const doc = this.documents.get(candidate.docId);
|
|
400
|
+
const title = doc?.title ?? candidate.docId;
|
|
401
|
+
return `${title}
|
|
402
|
+
${candidate.snippet}`.trim();
|
|
403
|
+
});
|
|
404
|
+
const rerankScores = await crossEncoderRerank(query, texts, rerankConfig);
|
|
405
|
+
if (!rerankScores) {
|
|
406
|
+
return candidates;
|
|
407
|
+
}
|
|
408
|
+
const normalizedRerank = normalizeCandidateScores(
|
|
409
|
+
candidates.map((candidate, index) => ({
|
|
410
|
+
...candidate,
|
|
411
|
+
score: rerankScores[index] ?? 0
|
|
412
|
+
}))
|
|
413
|
+
);
|
|
414
|
+
const weighted = candidates.map((candidate, index) => ({
|
|
415
|
+
...candidate,
|
|
416
|
+
score: (1 - rerankConfig.weight) * candidate.score + rerankConfig.weight * normalizedRerank[index].score
|
|
417
|
+
}));
|
|
418
|
+
return weighted.sort((left, right) => right.score - left.score);
|
|
419
|
+
}
|
|
420
|
+
toSearchResults(candidates, options, limit) {
|
|
421
|
+
const minScore = options.minScore ?? 0;
|
|
422
|
+
const boosted = candidates.map((candidate) => {
|
|
423
|
+
const doc = this.documents.get(candidate.docId);
|
|
424
|
+
if (!doc) return null;
|
|
425
|
+
const temporal = options.temporalBoost ? this.getRecencyFactor(doc.modified) : 1;
|
|
426
|
+
return {
|
|
427
|
+
candidate,
|
|
428
|
+
doc,
|
|
429
|
+
score: candidate.score * temporal
|
|
430
|
+
};
|
|
431
|
+
}).filter((entry) => entry !== null).filter((entry) => entry.score >= minScore).sort((left, right) => right.score - left.score).slice(0, limit);
|
|
432
|
+
return boosted.map((entry) => ({
|
|
433
|
+
document: options.fullContent ? entry.doc : { ...entry.doc, content: "" },
|
|
434
|
+
score: entry.score,
|
|
435
|
+
snippet: entry.candidate.snippet,
|
|
436
|
+
matchedTerms: entry.candidate.matchedTerms
|
|
437
|
+
}));
|
|
438
|
+
}
|
|
439
|
+
matchesFilters(doc, options) {
|
|
440
|
+
if (options.category && doc.category !== options.category) {
|
|
441
|
+
return false;
|
|
442
|
+
}
|
|
443
|
+
if (options.tags?.length) {
|
|
444
|
+
const docTags = new Set(doc.tags.map((tag) => tag.toLowerCase()));
|
|
445
|
+
const hasTag = options.tags.some((tag) => docTags.has(tag.toLowerCase()));
|
|
446
|
+
if (!hasTag) return false;
|
|
447
|
+
}
|
|
448
|
+
return true;
|
|
449
|
+
}
|
|
450
|
+
getRecencyFactor(modifiedAt) {
|
|
451
|
+
const ageMs = Math.max(0, Date.now() - modifiedAt.getTime());
|
|
452
|
+
const ageDays = ageMs / (24 * 60 * 60 * 1e3);
|
|
453
|
+
if (ageDays < 1) return 1;
|
|
454
|
+
if (ageDays <= 7) return 0.9;
|
|
455
|
+
return 0.7;
|
|
456
|
+
}
|
|
457
|
+
buildSnippet(text, matchedTerms) {
|
|
458
|
+
const normalized = text.replace(/\s+/g, " ").trim();
|
|
459
|
+
if (!normalized) return "";
|
|
460
|
+
if (!matchedTerms.length) {
|
|
461
|
+
return normalized.slice(0, 260);
|
|
462
|
+
}
|
|
463
|
+
const lower = normalized.toLowerCase();
|
|
464
|
+
const firstTerm = matchedTerms.find((term) => lower.includes(term.toLowerCase()));
|
|
465
|
+
if (!firstTerm) {
|
|
466
|
+
return normalized.slice(0, 260);
|
|
467
|
+
}
|
|
468
|
+
const start = Math.max(0, lower.indexOf(firstTerm.toLowerCase()) - 80);
|
|
469
|
+
const end = Math.min(normalized.length, start + 260);
|
|
470
|
+
return normalized.slice(start, end).trim();
|
|
471
|
+
}
|
|
472
|
+
loadEmbeddingStoreIfNeeded(provider, model) {
|
|
473
|
+
if (!this.embeddingStoreLoaded) {
|
|
474
|
+
this.embeddingStore.load();
|
|
475
|
+
this.embeddingStoreLoaded = true;
|
|
476
|
+
}
|
|
477
|
+
if (!this.embeddingStore.isCompatible(provider, model)) {
|
|
478
|
+
return false;
|
|
479
|
+
}
|
|
480
|
+
this.embeddingStore.setSignature(provider, model);
|
|
481
|
+
return true;
|
|
482
|
+
}
|
|
483
|
+
};
|
|
484
|
+
|
|
485
|
+
// src/lib/search.ts
|
|
486
|
+
var QMD_INSTALL_URL = "https://github.com/tobi/qmd";
|
|
487
|
+
var QMD_INSTALL_COMMAND = "bun install -g github:tobi/qmd";
|
|
488
|
+
var QMD_INDEX_ENV_VAR = "CLAWVAULT_QMD_INDEX";
|
|
489
|
+
var QMD_ERROR_MESSAGES = {
|
|
490
|
+
NOT_INSTALLED: {
|
|
491
|
+
code: "NOT_INSTALLED",
|
|
492
|
+
message: "qmd is not installed",
|
|
493
|
+
hint: `Install qmd to enable ClawVault search and indexing:
|
|
494
|
+
${QMD_INSTALL_COMMAND}
|
|
495
|
+
|
|
496
|
+
For more information: ${QMD_INSTALL_URL}`
|
|
497
|
+
},
|
|
498
|
+
NOT_CONFIGURED: {
|
|
499
|
+
code: "NOT_CONFIGURED",
|
|
500
|
+
message: "qmd collection is not configured",
|
|
501
|
+
hint: "Run `clawvault doctor` to diagnose configuration issues, or `clawvault migrate` to fix common setup problems."
|
|
502
|
+
},
|
|
503
|
+
COLLECTION_NOT_FOUND: {
|
|
504
|
+
code: "COLLECTION_NOT_FOUND",
|
|
505
|
+
message: "qmd collection not found",
|
|
506
|
+
hint: "The configured qmd collection does not exist. Run `clawvault migrate` to recreate it, or `qmd collection add <name> <path>` manually."
|
|
507
|
+
},
|
|
508
|
+
EXECUTION_FAILED: {
|
|
509
|
+
code: "EXECUTION_FAILED",
|
|
510
|
+
message: "qmd command failed",
|
|
511
|
+
hint: "Run `clawvault doctor` to diagnose qmd issues."
|
|
512
|
+
}
|
|
513
|
+
};
|
|
514
|
+
var QmdUnavailableError = class extends Error {
|
|
515
|
+
code;
|
|
516
|
+
hint;
|
|
517
|
+
constructor(code = "NOT_INSTALLED", additionalContext) {
|
|
518
|
+
const details = QMD_ERROR_MESSAGES[code];
|
|
519
|
+
const fullMessage = additionalContext ? `${details.message}: ${additionalContext}` : details.message;
|
|
520
|
+
super(fullMessage);
|
|
521
|
+
this.name = "QmdUnavailableError";
|
|
522
|
+
this.code = code;
|
|
523
|
+
this.hint = details.hint;
|
|
524
|
+
}
|
|
525
|
+
toUserMessage() {
|
|
526
|
+
return `Error: ${this.message}
|
|
527
|
+
|
|
528
|
+
${this.hint}`;
|
|
529
|
+
}
|
|
530
|
+
};
|
|
531
|
+
function getQmdErrorDetails(code) {
|
|
532
|
+
return QMD_ERROR_MESSAGES[code];
|
|
533
|
+
}
|
|
534
|
+
var QmdConfigurationError = class extends Error {
|
|
535
|
+
constructor(message, hint) {
|
|
536
|
+
super(message);
|
|
537
|
+
this.hint = hint;
|
|
538
|
+
this.name = "QmdConfigurationError";
|
|
539
|
+
}
|
|
540
|
+
};
|
|
541
|
+
function ensureJsonArgs(args) {
|
|
542
|
+
return args.includes("--json") ? args : [...args, "--json"];
|
|
543
|
+
}
|
|
544
|
+
function resolveQmdIndexName(indexName) {
|
|
545
|
+
const explicit = indexName?.trim();
|
|
546
|
+
if (explicit) {
|
|
547
|
+
return explicit;
|
|
548
|
+
}
|
|
549
|
+
const fromEnv = process.env[QMD_INDEX_ENV_VAR]?.trim();
|
|
550
|
+
return fromEnv || void 0;
|
|
551
|
+
}
|
|
552
|
+
function withQmdIndexArgs(args, indexName) {
|
|
553
|
+
if (args.includes("--index")) {
|
|
554
|
+
return [...args];
|
|
555
|
+
}
|
|
556
|
+
const resolvedIndexName = resolveQmdIndexName(indexName);
|
|
557
|
+
if (!resolvedIndexName) {
|
|
558
|
+
return [...args];
|
|
559
|
+
}
|
|
560
|
+
return ["--index", resolvedIndexName, ...args];
|
|
561
|
+
}
|
|
562
|
+
function tryParseJson(raw) {
|
|
563
|
+
try {
|
|
564
|
+
return JSON.parse(raw);
|
|
565
|
+
} catch {
|
|
566
|
+
return null;
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
function extractJsonPayload(raw) {
|
|
570
|
+
const start = raw.search(/[\[{]/);
|
|
571
|
+
if (start === -1) return null;
|
|
572
|
+
const end = Math.max(raw.lastIndexOf("]"), raw.lastIndexOf("}"));
|
|
573
|
+
if (end <= start) return null;
|
|
574
|
+
return raw.slice(start, end + 1);
|
|
575
|
+
}
|
|
576
|
+
function stripQmdNoise(raw) {
|
|
577
|
+
return raw.split("\n").filter((line) => {
|
|
578
|
+
const t = line.trim();
|
|
579
|
+
if (!t) return true;
|
|
580
|
+
if (t.startsWith("[node-llama-cpp]")) return false;
|
|
581
|
+
if (t.startsWith("Expanding query")) return false;
|
|
582
|
+
if (t.startsWith("Searching ") && t.endsWith("queries...")) return false;
|
|
583
|
+
if (/^[├└─│]/.test(t)) return false;
|
|
584
|
+
return true;
|
|
585
|
+
}).join("\n");
|
|
586
|
+
}
|
|
587
|
+
function parseQmdOutput(raw) {
|
|
588
|
+
const trimmed = stripQmdNoise(raw).trim();
|
|
589
|
+
if (!trimmed) return [];
|
|
590
|
+
const direct = tryParseJson(trimmed);
|
|
591
|
+
const extracted = direct ? null : extractJsonPayload(trimmed);
|
|
592
|
+
const parsed = direct ?? (extracted ? tryParseJson(extracted) : null);
|
|
593
|
+
if (!parsed) {
|
|
594
|
+
throw new Error("qmd returned non-JSON output. Ensure qmd supports --json.");
|
|
595
|
+
}
|
|
596
|
+
if (Array.isArray(parsed)) {
|
|
597
|
+
return parsed;
|
|
598
|
+
}
|
|
599
|
+
if (parsed && typeof parsed === "object") {
|
|
600
|
+
const candidate = parsed.results ?? parsed.items ?? parsed.data;
|
|
601
|
+
if (Array.isArray(candidate)) {
|
|
602
|
+
return candidate;
|
|
603
|
+
}
|
|
604
|
+
}
|
|
605
|
+
throw new Error("qmd returned an unexpected JSON shape.");
|
|
606
|
+
}
|
|
607
|
+
function ensureQmdAvailable() {
|
|
608
|
+
if (!hasQmd()) {
|
|
609
|
+
throw new QmdUnavailableError("NOT_INSTALLED");
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
function detectQmdError(output, args) {
|
|
613
|
+
const lowerOutput = output.toLowerCase();
|
|
614
|
+
if (lowerOutput.includes("missing required arguments") || lowerOutput.includes("unknown option")) {
|
|
615
|
+
return new QmdConfigurationError(
|
|
616
|
+
'qmd does not support the search command with the expected arguments. This may indicate an incompatible qmd version or a different tool named "qmd".',
|
|
617
|
+
`Ensure you have the correct qmd installed: ${QMD_INSTALL_COMMAND}`
|
|
618
|
+
);
|
|
619
|
+
}
|
|
620
|
+
if (lowerOutput.includes("collection not found") || lowerOutput.includes("no collection")) {
|
|
621
|
+
const collectionArg = args.findIndex((a) => a === "-c");
|
|
622
|
+
const collectionName = collectionArg >= 0 && args[collectionArg + 1] ? args[collectionArg + 1] : "unknown";
|
|
623
|
+
return new QmdConfigurationError(
|
|
624
|
+
`qmd collection "${collectionName}" not found.`,
|
|
625
|
+
'Run `qmd update -c <collection>` to create the collection, or check your vault\'s .clawvault.json "name" field.'
|
|
626
|
+
);
|
|
627
|
+
}
|
|
628
|
+
if (lowerOutput.includes("no index") || lowerOutput.includes("index not found")) {
|
|
629
|
+
return new QmdConfigurationError(
|
|
630
|
+
"qmd index not found. The vault may not be indexed yet.",
|
|
631
|
+
"Run `clawvault rebuild` or `qmd update` to build the search index."
|
|
632
|
+
);
|
|
633
|
+
}
|
|
634
|
+
if (lowerOutput.includes("embedding") && (lowerOutput.includes("not found") || lowerOutput.includes("missing"))) {
|
|
635
|
+
return new QmdConfigurationError(
|
|
636
|
+
"qmd embeddings not found. Vector search requires embeddings to be generated.",
|
|
637
|
+
"Run `clawvault embed` or `qmd embed` to generate embeddings for semantic search."
|
|
638
|
+
);
|
|
639
|
+
}
|
|
640
|
+
return null;
|
|
641
|
+
}
|
|
642
|
+
function execQmd(args, indexName) {
|
|
643
|
+
ensureQmdAvailable();
|
|
644
|
+
const finalArgs = withQmdIndexArgs(ensureJsonArgs(args), indexName);
|
|
645
|
+
try {
|
|
646
|
+
const result = execFileSync("qmd", finalArgs, {
|
|
647
|
+
encoding: "utf-8",
|
|
648
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
649
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
650
|
+
// 10MB
|
|
651
|
+
shell: process.platform === "win32"
|
|
652
|
+
});
|
|
653
|
+
return parseQmdOutput(result);
|
|
654
|
+
} catch (err) {
|
|
655
|
+
if (err?.code === "ENOENT") {
|
|
656
|
+
throw new QmdUnavailableError("NOT_INSTALLED");
|
|
657
|
+
}
|
|
658
|
+
if (err?.status === 1 && err?.stdout) {
|
|
659
|
+
return parseQmdOutput(err.stdout);
|
|
660
|
+
}
|
|
661
|
+
const output = [err?.stdout, err?.stderr].filter(Boolean).join("\n");
|
|
662
|
+
const detectedError = detectQmdError(output, finalArgs);
|
|
663
|
+
if (detectedError) {
|
|
664
|
+
throw detectedError;
|
|
665
|
+
}
|
|
666
|
+
if (output) {
|
|
667
|
+
try {
|
|
668
|
+
return parseQmdOutput(output);
|
|
669
|
+
} catch {
|
|
670
|
+
}
|
|
671
|
+
if (output.includes("collection not found") || output.includes("no such collection")) {
|
|
672
|
+
throw new QmdUnavailableError("COLLECTION_NOT_FOUND", output.trim());
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
const errorDetail = err?.message || "unknown error";
|
|
676
|
+
throw new QmdUnavailableError("EXECUTION_FAILED", errorDetail);
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
function hasQmd() {
|
|
680
|
+
const result = spawnSync("qmd", ["--version"], { stdio: "ignore", shell: process.platform === "win32" });
|
|
681
|
+
return !result.error && (result.status === 0 || result.status === 1);
|
|
682
|
+
}
|
|
683
|
+
function qmdUpdate(collection, indexName) {
|
|
684
|
+
ensureQmdAvailable();
|
|
685
|
+
const args = ["update"];
|
|
686
|
+
if (collection) {
|
|
687
|
+
args.push("-c", collection);
|
|
688
|
+
}
|
|
689
|
+
execFileSync("qmd", withQmdIndexArgs(args, indexName), { stdio: "inherit", shell: process.platform === "win32" });
|
|
690
|
+
}
|
|
691
|
+
function qmdEmbed(collection, indexName) {
|
|
692
|
+
ensureQmdAvailable();
|
|
693
|
+
const args = ["embed"];
|
|
694
|
+
if (collection) {
|
|
695
|
+
args.push("-c", collection);
|
|
696
|
+
}
|
|
697
|
+
execFileSync("qmd", withQmdIndexArgs(args, indexName), { stdio: "inherit", shell: process.platform === "win32" });
|
|
698
|
+
}
|
|
699
|
+
var SearchEngine = class {
|
|
700
|
+
inProcess = new InProcessSearchEngine();
|
|
701
|
+
collection = "";
|
|
702
|
+
vaultPath = "";
|
|
703
|
+
collectionRoot = "";
|
|
704
|
+
qmdIndexName;
|
|
705
|
+
searchConfig = {};
|
|
706
|
+
setSearchConfig(config) {
|
|
707
|
+
this.searchConfig = config ?? {};
|
|
708
|
+
this.inProcess.setConfig(this.searchConfig);
|
|
709
|
+
}
|
|
710
|
+
/**
|
|
711
|
+
* Set the collection name (usually vault name)
|
|
712
|
+
*/
|
|
713
|
+
setCollection(name) {
|
|
714
|
+
this.collection = name;
|
|
715
|
+
}
|
|
716
|
+
/**
|
|
717
|
+
* Get the current collection name
|
|
718
|
+
*/
|
|
719
|
+
getCollection() {
|
|
720
|
+
return this.collection;
|
|
721
|
+
}
|
|
722
|
+
/**
|
|
723
|
+
* Set the vault path for file resolution
|
|
724
|
+
*/
|
|
725
|
+
setVaultPath(vaultPath) {
|
|
726
|
+
this.vaultPath = vaultPath;
|
|
727
|
+
this.inProcess.setVaultPath(vaultPath);
|
|
728
|
+
}
|
|
729
|
+
/**
|
|
730
|
+
* Set the collection root for qmd:// URI resolution
|
|
731
|
+
*/
|
|
732
|
+
setCollectionRoot(root) {
|
|
733
|
+
this.collectionRoot = path.resolve(root);
|
|
734
|
+
}
|
|
735
|
+
/**
|
|
736
|
+
* Set qmd index name (defaults to qmd global default when omitted)
|
|
737
|
+
*/
|
|
738
|
+
setIndexName(indexName) {
|
|
739
|
+
this.qmdIndexName = indexName;
|
|
740
|
+
}
|
|
741
|
+
/**
|
|
742
|
+
* Add or update a document in the local cache
|
|
743
|
+
* Note: qmd indexing happens via qmd update command
|
|
744
|
+
*/
|
|
745
|
+
addDocument(doc) {
|
|
746
|
+
this.inProcess.addDocument(doc);
|
|
747
|
+
}
|
|
748
|
+
/**
|
|
749
|
+
* Remove a document from the local cache
|
|
750
|
+
*/
|
|
751
|
+
removeDocument(id) {
|
|
752
|
+
this.inProcess.removeDocument(id);
|
|
753
|
+
}
|
|
754
|
+
/**
|
|
755
|
+
* No-op for qmd - indexing is managed externally
|
|
756
|
+
*/
|
|
757
|
+
rebuildIDF() {
|
|
758
|
+
}
|
|
759
|
+
/**
|
|
760
|
+
* BM25 search via qmd
|
|
761
|
+
*/
|
|
762
|
+
async search(query, options = {}) {
|
|
763
|
+
if (!query.trim()) return [];
|
|
764
|
+
return this.runSearchWithFallback("search", query, options);
|
|
765
|
+
}
|
|
766
|
+
/**
|
|
767
|
+
* Vector/semantic search via qmd vsearch
|
|
768
|
+
*/
|
|
769
|
+
async vsearch(query, options = {}) {
|
|
770
|
+
if (!query.trim()) return [];
|
|
771
|
+
return this.runSearchWithFallback("vsearch", query, options);
|
|
772
|
+
}
|
|
773
|
+
/**
|
|
774
|
+
* Combined search with query expansion (qmd query command)
|
|
775
|
+
*/
|
|
776
|
+
async query(query, options = {}) {
|
|
777
|
+
if (!query.trim()) return [];
|
|
778
|
+
return this.runSearchWithFallback("query", query, options);
|
|
779
|
+
}
|
|
780
|
+
async runSearchWithFallback(command, query, options) {
|
|
781
|
+
const preferQmd = this.searchConfig.backend === "qmd";
|
|
782
|
+
const qmdFallbackEnabled = this.searchConfig.qmdFallback ?? true;
|
|
783
|
+
if (preferQmd) {
|
|
784
|
+
if (hasQmd()) {
|
|
785
|
+
return this.runQmdQuery(command, query, options);
|
|
786
|
+
}
|
|
787
|
+
return this.runInProcessQuery(command, query, options);
|
|
788
|
+
}
|
|
789
|
+
try {
|
|
790
|
+
const inProcessResults = await this.runInProcessQuery(command, query, options);
|
|
791
|
+
if (inProcessResults.length > 0 || command === "search" || !qmdFallbackEnabled || !hasQmd()) {
|
|
792
|
+
return inProcessResults;
|
|
793
|
+
}
|
|
794
|
+
return this.runQmdQuery(command, query, options);
|
|
795
|
+
} catch (error) {
|
|
796
|
+
if (qmdFallbackEnabled && hasQmd()) {
|
|
797
|
+
return this.runQmdQuery(command, query, options);
|
|
798
|
+
}
|
|
799
|
+
throw error;
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
async runInProcessQuery(command, query, options) {
|
|
803
|
+
if (command === "vsearch") {
|
|
804
|
+
return this.inProcess.vsearch(query, options);
|
|
805
|
+
}
|
|
806
|
+
if (command === "query") {
|
|
807
|
+
return this.inProcess.query(query, options);
|
|
808
|
+
}
|
|
809
|
+
return this.inProcess.search(query, options);
|
|
810
|
+
}
|
|
811
|
+
runQmdQuery(command, query, options) {
|
|
812
|
+
const { limit = 10, minScore = 0, category, tags, fullContent = false, temporalBoost = false } = options;
|
|
813
|
+
const args = [command, query, "-n", String(limit * 2), "--json"];
|
|
814
|
+
if (this.collection) {
|
|
815
|
+
args.push("-c", this.collection);
|
|
816
|
+
}
|
|
817
|
+
return this.convertResults(execQmd(args, this.qmdIndexName), {
|
|
818
|
+
limit,
|
|
819
|
+
minScore,
|
|
820
|
+
category,
|
|
821
|
+
tags,
|
|
822
|
+
fullContent,
|
|
823
|
+
temporalBoost
|
|
824
|
+
});
|
|
825
|
+
}
|
|
826
|
+
/**
|
|
827
|
+
* Convert qmd results to ClawVault SearchResult format
|
|
828
|
+
*/
|
|
829
|
+
convertResults(qmdResults, options) {
|
|
830
|
+
const { limit = 10, minScore = 0, category, tags, fullContent = false, temporalBoost = false } = options;
|
|
831
|
+
const results = [];
|
|
832
|
+
const docs = this.inProcess.getAllDocuments();
|
|
833
|
+
const docsById = new Map(docs.map((doc) => [doc.id, doc]));
|
|
834
|
+
const maxScore = qmdResults[0]?.score || 1;
|
|
835
|
+
for (const qr of qmdResults) {
|
|
836
|
+
const filePath = this.qmdUriToPath(qr.file);
|
|
837
|
+
const relativePath = this.vaultPath ? path.relative(this.vaultPath, filePath) : filePath;
|
|
838
|
+
const normalizedRelativePath = relativePath.replace(/\\/g, "/");
|
|
839
|
+
if (normalizedRelativePath.startsWith("ledger/archive/") || normalizedRelativePath.includes("/ledger/archive/")) {
|
|
840
|
+
continue;
|
|
841
|
+
}
|
|
842
|
+
const docId = normalizedRelativePath.replace(/\.md$/, "");
|
|
843
|
+
let doc = docsById.get(docId) ?? docsById.get(docId.split("/").join(path.sep));
|
|
844
|
+
const modifiedAt = this.resolveModifiedAt(doc, filePath);
|
|
845
|
+
const parts = normalizedRelativePath.split("/");
|
|
846
|
+
const docCategory = parts.length > 1 ? parts[0] : "root";
|
|
847
|
+
if (category && docCategory !== category) continue;
|
|
848
|
+
if (tags && tags.length > 0 && doc) {
|
|
849
|
+
const docTags = new Set(doc.tags);
|
|
850
|
+
if (!tags.some((t) => docTags.has(t))) continue;
|
|
851
|
+
}
|
|
852
|
+
const normalizedScore = maxScore > 0 ? qr.score / maxScore : 0;
|
|
853
|
+
const finalScore = temporalBoost ? normalizedScore * this.getRecencyFactor(modifiedAt) : normalizedScore;
|
|
854
|
+
if (finalScore < minScore) continue;
|
|
855
|
+
if (!doc) {
|
|
856
|
+
doc = {
|
|
857
|
+
id: docId,
|
|
858
|
+
path: filePath,
|
|
859
|
+
category: docCategory,
|
|
860
|
+
title: qr.title || path.basename(relativePath, ".md"),
|
|
861
|
+
content: "",
|
|
862
|
+
// Content loaded separately if needed
|
|
863
|
+
frontmatter: {},
|
|
864
|
+
links: [],
|
|
865
|
+
tags: [],
|
|
866
|
+
modified: modifiedAt
|
|
867
|
+
};
|
|
868
|
+
}
|
|
869
|
+
results.push({
|
|
870
|
+
document: fullContent ? doc : { ...doc, content: "" },
|
|
871
|
+
score: finalScore,
|
|
872
|
+
snippet: this.cleanSnippet(qr.snippet),
|
|
873
|
+
matchedTerms: []
|
|
874
|
+
// qmd doesn't provide this
|
|
875
|
+
});
|
|
876
|
+
}
|
|
877
|
+
return results.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
878
|
+
}
|
|
879
|
+
resolveModifiedAt(doc, filePath) {
|
|
880
|
+
if (doc) return doc.modified;
|
|
881
|
+
try {
|
|
882
|
+
return fs.statSync(filePath).mtime;
|
|
883
|
+
} catch {
|
|
884
|
+
return /* @__PURE__ */ new Date(0);
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
getRecencyFactor(modifiedAt) {
|
|
888
|
+
const ageMs = Math.max(0, Date.now() - modifiedAt.getTime());
|
|
889
|
+
const ageDays = ageMs / (24 * 60 * 60 * 1e3);
|
|
890
|
+
if (ageDays < 1) return 1;
|
|
891
|
+
if (ageDays <= 7) return 0.9;
|
|
892
|
+
return 0.7;
|
|
893
|
+
}
|
|
894
|
+
/**
|
|
895
|
+
* Convert qmd:// URI to file path
|
|
896
|
+
*/
|
|
897
|
+
qmdUriToPath(uri) {
|
|
898
|
+
if (uri.startsWith("qmd://")) {
|
|
899
|
+
const withoutScheme = uri.slice(6);
|
|
900
|
+
const slashIndex = withoutScheme.indexOf("/");
|
|
901
|
+
if (slashIndex > -1) {
|
|
902
|
+
const relativePath = withoutScheme.slice(slashIndex + 1);
|
|
903
|
+
const root = this.collectionRoot || this.vaultPath;
|
|
904
|
+
if (root) {
|
|
905
|
+
return path.join(root, relativePath);
|
|
906
|
+
}
|
|
907
|
+
return relativePath;
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
return uri;
|
|
911
|
+
}
|
|
912
|
+
/**
|
|
913
|
+
* Clean up qmd snippet format
|
|
914
|
+
*/
|
|
915
|
+
cleanSnippet(snippet) {
|
|
916
|
+
if (!snippet) return "";
|
|
917
|
+
return snippet.replace(/@@ [-+]?\d+,?\d* @@ \([^)]+\)/g, "").trim().split("\n").slice(0, 3).join("\n").slice(0, 300);
|
|
918
|
+
}
|
|
919
|
+
/**
|
|
920
|
+
* Get all cached documents
|
|
921
|
+
*/
|
|
922
|
+
getAllDocuments() {
|
|
923
|
+
return this.inProcess.getAllDocuments();
|
|
924
|
+
}
|
|
925
|
+
/**
|
|
926
|
+
* Get document count
|
|
927
|
+
*/
|
|
928
|
+
get size() {
|
|
929
|
+
return this.inProcess.size;
|
|
930
|
+
}
|
|
931
|
+
/**
|
|
932
|
+
* Clear the local document cache
|
|
933
|
+
*/
|
|
934
|
+
clear() {
|
|
935
|
+
this.inProcess.clear();
|
|
936
|
+
}
|
|
937
|
+
/**
|
|
938
|
+
* Export documents for persistence
|
|
939
|
+
*/
|
|
940
|
+
export() {
|
|
941
|
+
return this.inProcess.export();
|
|
942
|
+
}
|
|
943
|
+
/**
|
|
944
|
+
* Import from persisted data
|
|
945
|
+
*/
|
|
946
|
+
import(data) {
|
|
947
|
+
this.inProcess.import(data);
|
|
948
|
+
}
|
|
949
|
+
};
|
|
950
|
+
function extractWikiLinks(content) {
|
|
951
|
+
const matches = content.match(/\[\[([^\]]+)\]\]/g) || [];
|
|
952
|
+
return matches.map((m) => m.slice(2, -2).toLowerCase());
|
|
953
|
+
}
|
|
954
|
+
function extractTags(content) {
|
|
955
|
+
const matches = content.match(/#[\w-]+/g) || [];
|
|
956
|
+
return [...new Set(matches.map((m) => m.slice(1).toLowerCase()))];
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
export {
|
|
960
|
+
QMD_INSTALL_URL,
|
|
961
|
+
QMD_INSTALL_COMMAND,
|
|
962
|
+
QmdUnavailableError,
|
|
963
|
+
getQmdErrorDetails,
|
|
964
|
+
QmdConfigurationError,
|
|
965
|
+
withQmdIndexArgs,
|
|
966
|
+
hasQmd,
|
|
967
|
+
qmdUpdate,
|
|
968
|
+
qmdEmbed,
|
|
969
|
+
SearchEngine,
|
|
970
|
+
extractWikiLinks,
|
|
971
|
+
extractTags
|
|
972
|
+
};
|