minimem 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +166 -133
- package/dist/cli/index.js.map +1 -1
- package/dist/index.cjs +3993 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +858 -0
- package/dist/index.js +10 -1
- package/dist/index.js.map +1 -1
- package/dist/internal.cjs +286 -0
- package/dist/internal.cjs.map +1 -0
- package/dist/internal.d.cts +65 -0
- package/dist/session.cjs +298 -0
- package/dist/session.cjs.map +1 -0
- package/dist/session.d.cts +96 -0
- package/package.json +9 -5
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,3993 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
|
|
30
|
+
// src/index.ts
|
|
31
|
+
var index_exports = {};
|
|
32
|
+
__export(index_exports, {
|
|
33
|
+
KNOWLEDGE_GRAPH_TOOL: () => KNOWLEDGE_GRAPH_TOOL,
|
|
34
|
+
KNOWLEDGE_PATH_TOOL: () => KNOWLEDGE_PATH_TOOL,
|
|
35
|
+
KNOWLEDGE_SEARCH_TOOL: () => KNOWLEDGE_SEARCH_TOOL,
|
|
36
|
+
MEMORY_GET_DETAILS_TOOL: () => MEMORY_GET_DETAILS_TOOL,
|
|
37
|
+
MEMORY_SEARCH_TOOL: () => MEMORY_SEARCH_TOOL,
|
|
38
|
+
MEMORY_TOOLS: () => MEMORY_TOOLS,
|
|
39
|
+
McpServer: () => McpServer,
|
|
40
|
+
MemoryIndexer: () => MemoryIndexer,
|
|
41
|
+
MemorySearcher: () => MemorySearcher,
|
|
42
|
+
MemoryToolExecutor: () => MemoryToolExecutor,
|
|
43
|
+
Minimem: () => Minimem,
|
|
44
|
+
addFrontmatter: () => addFrontmatter,
|
|
45
|
+
addSessionToContent: () => addSessionToContent,
|
|
46
|
+
buildFileEntry: () => buildFileEntry,
|
|
47
|
+
buildKnowledgeFilterSql: () => buildKnowledgeFilterSql,
|
|
48
|
+
chunkMarkdown: () => chunkMarkdown,
|
|
49
|
+
cosineSimilarity: () => cosineSimilarity,
|
|
50
|
+
createEmbeddingProvider: () => createEmbeddingProvider,
|
|
51
|
+
createGeminiEmbeddingProvider: () => createGeminiEmbeddingProvider,
|
|
52
|
+
createMcpServer: () => createMcpServer,
|
|
53
|
+
createOpenAiEmbeddingProvider: () => createOpenAiEmbeddingProvider,
|
|
54
|
+
createToolExecutor: () => createToolExecutor,
|
|
55
|
+
extractChunkMetadata: () => extractChunkMetadata,
|
|
56
|
+
extractSession: () => extractSession,
|
|
57
|
+
generateMcpConfig: () => generateMcpConfig,
|
|
58
|
+
getLinksFrom: () => getLinksFrom,
|
|
59
|
+
getLinksTo: () => getLinksTo,
|
|
60
|
+
getNeighbors: () => getNeighbors,
|
|
61
|
+
getPathBetween: () => getPathBetween,
|
|
62
|
+
getToolDefinitions: () => getToolDefinitions,
|
|
63
|
+
hashText: () => hashText,
|
|
64
|
+
isMemoryPath: () => isMemoryPath,
|
|
65
|
+
listMemoryFiles: () => listMemoryFiles,
|
|
66
|
+
parseFrontmatter: () => parseFrontmatter,
|
|
67
|
+
runGeminiEmbeddingBatches: () => runGeminiEmbeddingBatches,
|
|
68
|
+
runMcpServer: () => runMcpServer,
|
|
69
|
+
runOpenAiEmbeddingBatches: () => runOpenAiEmbeddingBatches,
|
|
70
|
+
serializeFrontmatter: () => serializeFrontmatter,
|
|
71
|
+
stripPrivateContent: () => stripPrivateContent
|
|
72
|
+
});
|
|
73
|
+
module.exports = __toCommonJS(index_exports);
|
|
74
|
+
|
|
75
|
+
// src/minimem.ts
|
|
76
|
+
var import_node_crypto2 = require("crypto");
|
|
77
|
+
var import_promises2 = __toESM(require("fs/promises"), 1);
|
|
78
|
+
var import_node_fs3 = __toESM(require("fs"), 1);
|
|
79
|
+
var import_node_path3 = __toESM(require("path"), 1);
|
|
80
|
+
var import_node_sqlite = require("node:sqlite");
|
|
81
|
+
var import_chokidar = __toESM(require("chokidar"), 1);
|
|
82
|
+
|
|
83
|
+
// src/internal.ts
|
|
84
|
+
var import_node_crypto = __toESM(require("crypto"), 1);
|
|
85
|
+
var import_node_fs = __toESM(require("fs"), 1);
|
|
86
|
+
var import_promises = __toESM(require("fs/promises"), 1);
|
|
87
|
+
var import_node_path = __toESM(require("path"), 1);
|
|
88
|
+
function logError(context, error, debug) {
|
|
89
|
+
if (!debug) return;
|
|
90
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
91
|
+
debug(`[${context}] Error: ${message}`);
|
|
92
|
+
}
|
|
93
|
+
function ensureDir(dir, debug) {
|
|
94
|
+
try {
|
|
95
|
+
import_node_fs.default.mkdirSync(dir, { recursive: true });
|
|
96
|
+
} catch (error) {
|
|
97
|
+
const nodeError = error;
|
|
98
|
+
if (nodeError.code !== "EEXIST") {
|
|
99
|
+
logError("ensureDir", error, debug);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
return dir;
|
|
103
|
+
}
|
|
104
|
+
function normalizeRelPath(value) {
|
|
105
|
+
const trimmed = value.trim().replace(/^[./]+/, "");
|
|
106
|
+
return trimmed.replace(/\\/g, "/");
|
|
107
|
+
}
|
|
108
|
+
function isMemoryPath(relPath) {
|
|
109
|
+
const normalized = normalizeRelPath(relPath);
|
|
110
|
+
if (!normalized) return false;
|
|
111
|
+
if (normalized === "MEMORY.md" || normalized === "memory.md") return true;
|
|
112
|
+
return normalized.startsWith("memory/");
|
|
113
|
+
}
|
|
114
|
+
async function exists(filePath) {
|
|
115
|
+
try {
|
|
116
|
+
await import_promises.default.access(filePath);
|
|
117
|
+
return true;
|
|
118
|
+
} catch {
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
async function walkDir(dir, files) {
|
|
123
|
+
const entries = await import_promises.default.readdir(dir, { withFileTypes: true });
|
|
124
|
+
for (const entry of entries) {
|
|
125
|
+
const full = import_node_path.default.join(dir, entry.name);
|
|
126
|
+
if (entry.isDirectory()) {
|
|
127
|
+
await walkDir(full, files);
|
|
128
|
+
continue;
|
|
129
|
+
}
|
|
130
|
+
if (!entry.isFile()) continue;
|
|
131
|
+
if (!entry.name.endsWith(".md")) continue;
|
|
132
|
+
files.push(full);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
async function listMemoryFiles(memoryDir) {
|
|
136
|
+
const result = [];
|
|
137
|
+
const memoryFile = import_node_path.default.join(memoryDir, "MEMORY.md");
|
|
138
|
+
const altMemoryFile = import_node_path.default.join(memoryDir, "memory.md");
|
|
139
|
+
const hasUpper = await exists(memoryFile);
|
|
140
|
+
const hasLower = await exists(altMemoryFile);
|
|
141
|
+
if (hasUpper && hasLower) {
|
|
142
|
+
let upperReal = memoryFile;
|
|
143
|
+
let lowerReal = altMemoryFile;
|
|
144
|
+
try {
|
|
145
|
+
upperReal = await import_promises.default.realpath(memoryFile);
|
|
146
|
+
} catch {
|
|
147
|
+
}
|
|
148
|
+
try {
|
|
149
|
+
lowerReal = await import_promises.default.realpath(altMemoryFile);
|
|
150
|
+
} catch {
|
|
151
|
+
}
|
|
152
|
+
if (upperReal !== lowerReal) {
|
|
153
|
+
throw new Error(
|
|
154
|
+
`Both MEMORY.md and memory.md exist in ${memoryDir}. Please remove one to avoid ambiguity.`
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
result.push(memoryFile);
|
|
158
|
+
} else if (hasUpper) {
|
|
159
|
+
result.push(memoryFile);
|
|
160
|
+
} else if (hasLower) {
|
|
161
|
+
result.push(altMemoryFile);
|
|
162
|
+
}
|
|
163
|
+
const memorySubDir = import_node_path.default.join(memoryDir, "memory");
|
|
164
|
+
if (await exists(memorySubDir)) {
|
|
165
|
+
await walkDir(memorySubDir, result);
|
|
166
|
+
}
|
|
167
|
+
if (result.length <= 1) return result;
|
|
168
|
+
const seen = /* @__PURE__ */ new Set();
|
|
169
|
+
const deduped = [];
|
|
170
|
+
for (const entry of result) {
|
|
171
|
+
let key = entry;
|
|
172
|
+
try {
|
|
173
|
+
key = await import_promises.default.realpath(entry);
|
|
174
|
+
} catch {
|
|
175
|
+
}
|
|
176
|
+
if (seen.has(key)) continue;
|
|
177
|
+
seen.add(key);
|
|
178
|
+
deduped.push(entry);
|
|
179
|
+
}
|
|
180
|
+
return deduped;
|
|
181
|
+
}
|
|
182
|
+
function hashText(value) {
|
|
183
|
+
return import_node_crypto.default.createHash("sha256").update(value).digest("hex");
|
|
184
|
+
}
|
|
185
|
+
async function buildFileEntry(absPath, memoryDir) {
|
|
186
|
+
const stat = await import_promises.default.stat(absPath);
|
|
187
|
+
const content = await import_promises.default.readFile(absPath, "utf-8");
|
|
188
|
+
const hash = hashText(content);
|
|
189
|
+
return {
|
|
190
|
+
path: import_node_path.default.relative(memoryDir, absPath).replace(/\\/g, "/"),
|
|
191
|
+
absPath,
|
|
192
|
+
mtimeMs: stat.mtimeMs,
|
|
193
|
+
size: stat.size,
|
|
194
|
+
hash
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
function stripPrivateContent(content) {
|
|
198
|
+
return content.replace(/<private>[\s\S]*?<\/private>/gi, (match) => {
|
|
199
|
+
const lineCount = match.split("\n").length;
|
|
200
|
+
return "\n".repeat(lineCount - 1);
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
function chunkMarkdown(content, chunking) {
|
|
204
|
+
const stripped = stripPrivateContent(content);
|
|
205
|
+
const lines = stripped.split("\n");
|
|
206
|
+
if (lines.length === 0) return [];
|
|
207
|
+
const maxChars = Math.max(32, chunking.tokens * 4);
|
|
208
|
+
const overlapChars = Math.max(0, chunking.overlap * 4);
|
|
209
|
+
const chunks = [];
|
|
210
|
+
let current = [];
|
|
211
|
+
let currentChars = 0;
|
|
212
|
+
const flush = () => {
|
|
213
|
+
if (current.length === 0) return;
|
|
214
|
+
const firstEntry = current[0];
|
|
215
|
+
const lastEntry = current[current.length - 1];
|
|
216
|
+
if (!firstEntry || !lastEntry) return;
|
|
217
|
+
const text = current.map((entry) => entry.line).join("\n");
|
|
218
|
+
const startLine = firstEntry.lineNo;
|
|
219
|
+
const endLine = lastEntry.lineNo;
|
|
220
|
+
chunks.push({
|
|
221
|
+
startLine,
|
|
222
|
+
endLine,
|
|
223
|
+
text,
|
|
224
|
+
hash: hashText(text)
|
|
225
|
+
});
|
|
226
|
+
};
|
|
227
|
+
const carryOverlap = () => {
|
|
228
|
+
if (overlapChars <= 0 || current.length === 0) {
|
|
229
|
+
current = [];
|
|
230
|
+
currentChars = 0;
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
let acc = 0;
|
|
234
|
+
const kept = [];
|
|
235
|
+
for (let i = current.length - 1; i >= 0; i -= 1) {
|
|
236
|
+
const entry = current[i];
|
|
237
|
+
if (!entry) continue;
|
|
238
|
+
acc += entry.line.length + 1;
|
|
239
|
+
kept.unshift(entry);
|
|
240
|
+
if (acc >= overlapChars) break;
|
|
241
|
+
}
|
|
242
|
+
current = kept;
|
|
243
|
+
currentChars = kept.reduce((sum, entry) => sum + entry.line.length + 1, 0);
|
|
244
|
+
};
|
|
245
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
246
|
+
const line = lines[i] ?? "";
|
|
247
|
+
const lineNo = i + 1;
|
|
248
|
+
const segments = [];
|
|
249
|
+
if (line.length === 0) {
|
|
250
|
+
segments.push("");
|
|
251
|
+
} else {
|
|
252
|
+
for (let start = 0; start < line.length; start += maxChars) {
|
|
253
|
+
segments.push(line.slice(start, start + maxChars));
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
for (const segment of segments) {
|
|
257
|
+
const lineSize = segment.length + 1;
|
|
258
|
+
if (currentChars + lineSize > maxChars && current.length > 0) {
|
|
259
|
+
flush();
|
|
260
|
+
carryOverlap();
|
|
261
|
+
}
|
|
262
|
+
current.push({ line: segment, lineNo });
|
|
263
|
+
currentChars += lineSize;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
flush();
|
|
267
|
+
return chunks;
|
|
268
|
+
}
|
|
269
|
+
function extractChunkMetadata(text) {
|
|
270
|
+
const typeMatch = text.match(/<!--\s*type:\s*([\w-]+)\s*-->/i);
|
|
271
|
+
return typeMatch ? { type: typeMatch[1].toLowerCase() } : {};
|
|
272
|
+
}
|
|
273
|
+
function parseEmbedding(raw) {
|
|
274
|
+
try {
|
|
275
|
+
const parsed = JSON.parse(raw);
|
|
276
|
+
return Array.isArray(parsed) ? parsed : [];
|
|
277
|
+
} catch {
|
|
278
|
+
return [];
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
function cosineSimilarity(a, b) {
|
|
282
|
+
if (a.length === 0 || b.length === 0) return 0;
|
|
283
|
+
const len = Math.min(a.length, b.length);
|
|
284
|
+
let dot = 0;
|
|
285
|
+
let normA = 0;
|
|
286
|
+
let normB = 0;
|
|
287
|
+
for (let i = 0; i < len; i += 1) {
|
|
288
|
+
const av = a[i] ?? 0;
|
|
289
|
+
const bv = b[i] ?? 0;
|
|
290
|
+
dot += av * bv;
|
|
291
|
+
normA += av * av;
|
|
292
|
+
normB += bv * bv;
|
|
293
|
+
}
|
|
294
|
+
if (normA === 0 || normB === 0) return 0;
|
|
295
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
296
|
+
}
|
|
297
|
+
function truncateUtf16Safe(text, maxChars) {
|
|
298
|
+
if (text.length <= maxChars) return text;
|
|
299
|
+
return text.slice(0, maxChars);
|
|
300
|
+
}
|
|
301
|
+
function vectorToBlob(embedding) {
|
|
302
|
+
return Buffer.from(new Float32Array(embedding).buffer);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// src/search/hybrid.ts
|
|
306
|
+
function buildFtsQuery(raw) {
|
|
307
|
+
const tokens = raw.match(/[A-Za-z0-9_]+/g)?.map((t) => t.trim()).filter(Boolean) ?? [];
|
|
308
|
+
if (tokens.length === 0) return null;
|
|
309
|
+
const quoted = tokens.map((t) => `"${t.replaceAll('"', "")}"`);
|
|
310
|
+
return quoted.join(" AND ");
|
|
311
|
+
}
|
|
312
|
+
function bm25RankToScore(rank) {
|
|
313
|
+
if (!Number.isFinite(rank)) {
|
|
314
|
+
return 0;
|
|
315
|
+
}
|
|
316
|
+
const absRank = Math.abs(rank);
|
|
317
|
+
return 1 / (1 + absRank);
|
|
318
|
+
}
|
|
319
|
+
function mergeHybridResults(params) {
|
|
320
|
+
const byId = /* @__PURE__ */ new Map();
|
|
321
|
+
for (const r of params.vector) {
|
|
322
|
+
byId.set(r.id, {
|
|
323
|
+
id: r.id,
|
|
324
|
+
path: r.path,
|
|
325
|
+
startLine: r.startLine,
|
|
326
|
+
endLine: r.endLine,
|
|
327
|
+
source: r.source,
|
|
328
|
+
snippet: r.snippet,
|
|
329
|
+
vectorScore: r.vectorScore,
|
|
330
|
+
textScore: 0
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
for (const r of params.keyword) {
|
|
334
|
+
const existing = byId.get(r.id);
|
|
335
|
+
if (existing) {
|
|
336
|
+
existing.textScore = r.textScore;
|
|
337
|
+
if (r.snippet && r.snippet.length > 0) existing.snippet = r.snippet;
|
|
338
|
+
} else {
|
|
339
|
+
byId.set(r.id, {
|
|
340
|
+
id: r.id,
|
|
341
|
+
path: r.path,
|
|
342
|
+
startLine: r.startLine,
|
|
343
|
+
endLine: r.endLine,
|
|
344
|
+
source: r.source,
|
|
345
|
+
snippet: r.snippet,
|
|
346
|
+
vectorScore: 0,
|
|
347
|
+
textScore: r.textScore
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
let vw = params.vectorWeight;
|
|
352
|
+
let tw = params.textWeight;
|
|
353
|
+
if (params.vector.length === 0 && params.keyword.length > 0) {
|
|
354
|
+
vw = 0;
|
|
355
|
+
tw = 1;
|
|
356
|
+
} else if (params.keyword.length === 0 && params.vector.length > 0) {
|
|
357
|
+
vw = 1;
|
|
358
|
+
tw = 0;
|
|
359
|
+
}
|
|
360
|
+
const merged = Array.from(byId.values()).map((entry) => {
|
|
361
|
+
const score = vw * entry.vectorScore + tw * entry.textScore;
|
|
362
|
+
return {
|
|
363
|
+
path: entry.path,
|
|
364
|
+
startLine: entry.startLine,
|
|
365
|
+
endLine: entry.endLine,
|
|
366
|
+
score,
|
|
367
|
+
snippet: entry.snippet,
|
|
368
|
+
source: entry.source
|
|
369
|
+
};
|
|
370
|
+
});
|
|
371
|
+
return merged.sort((a, b) => b.score - a.score);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// src/search/search.ts
|
|
375
|
+
function buildKnowledgeFilterSql(opts) {
|
|
376
|
+
const clauses = [];
|
|
377
|
+
const params = [];
|
|
378
|
+
if (opts.knowledgeType) {
|
|
379
|
+
clauses.push(` AND c.knowledge_type = ?`);
|
|
380
|
+
params.push(opts.knowledgeType);
|
|
381
|
+
}
|
|
382
|
+
if (opts.minConfidence !== void 0) {
|
|
383
|
+
clauses.push(` AND c.confidence >= ?`);
|
|
384
|
+
params.push(opts.minConfidence);
|
|
385
|
+
}
|
|
386
|
+
if (opts.domain && opts.domain.length > 0) {
|
|
387
|
+
const domainPlaceholders = opts.domain.map(() => "?").join(", ");
|
|
388
|
+
clauses.push(
|
|
389
|
+
` AND EXISTS (SELECT 1 FROM json_each(c.domains) AS d WHERE d.value IN (${domainPlaceholders}))`
|
|
390
|
+
);
|
|
391
|
+
params.push(...opts.domain);
|
|
392
|
+
}
|
|
393
|
+
if (opts.entities && opts.entities.length > 0) {
|
|
394
|
+
const entityPlaceholders = opts.entities.map(() => "?").join(", ");
|
|
395
|
+
clauses.push(
|
|
396
|
+
` AND EXISTS (SELECT 1 FROM json_each(c.entities) AS e WHERE e.value IN (${entityPlaceholders}))`
|
|
397
|
+
);
|
|
398
|
+
params.push(...opts.entities);
|
|
399
|
+
}
|
|
400
|
+
return { sql: clauses.join(""), params };
|
|
401
|
+
}
|
|
402
|
+
async function searchVector(params) {
|
|
403
|
+
if (params.queryVec.length === 0 || params.limit <= 0) return [];
|
|
404
|
+
if (await params.ensureVectorReady(params.queryVec.length)) {
|
|
405
|
+
const rows = params.db.prepare(
|
|
406
|
+
`SELECT c.id, c.path, c.start_line, c.end_line, c.text,
|
|
407
|
+
c.source,
|
|
408
|
+
vec_distance_cosine(v.embedding, ?) AS dist
|
|
409
|
+
FROM ${params.vectorTable} v
|
|
410
|
+
JOIN chunks c ON c.id = v.id
|
|
411
|
+
WHERE c.model = ?${params.sourceFilterVec.sql}
|
|
412
|
+
ORDER BY dist ASC
|
|
413
|
+
LIMIT ?`
|
|
414
|
+
).all(
|
|
415
|
+
vectorToBlob(params.queryVec),
|
|
416
|
+
params.providerModel,
|
|
417
|
+
...params.sourceFilterVec.params,
|
|
418
|
+
params.limit
|
|
419
|
+
);
|
|
420
|
+
return rows.map((row) => ({
|
|
421
|
+
id: row.id,
|
|
422
|
+
path: row.path,
|
|
423
|
+
startLine: row.start_line,
|
|
424
|
+
endLine: row.end_line,
|
|
425
|
+
score: 1 - row.dist,
|
|
426
|
+
snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
|
|
427
|
+
source: row.source
|
|
428
|
+
}));
|
|
429
|
+
}
|
|
430
|
+
const candidates = listChunks({
|
|
431
|
+
db: params.db,
|
|
432
|
+
providerModel: params.providerModel,
|
|
433
|
+
sourceFilter: params.sourceFilterChunks
|
|
434
|
+
});
|
|
435
|
+
const scored = candidates.map((chunk) => ({
|
|
436
|
+
chunk,
|
|
437
|
+
score: cosineSimilarity(params.queryVec, chunk.embedding)
|
|
438
|
+
})).filter((entry) => Number.isFinite(entry.score));
|
|
439
|
+
return scored.sort((a, b) => b.score - a.score).slice(0, params.limit).map((entry) => ({
|
|
440
|
+
id: entry.chunk.id,
|
|
441
|
+
path: entry.chunk.path,
|
|
442
|
+
startLine: entry.chunk.startLine,
|
|
443
|
+
endLine: entry.chunk.endLine,
|
|
444
|
+
score: entry.score,
|
|
445
|
+
snippet: truncateUtf16Safe(entry.chunk.text, params.snippetMaxChars),
|
|
446
|
+
source: entry.chunk.source
|
|
447
|
+
}));
|
|
448
|
+
}
|
|
449
|
+
function listChunks(params) {
|
|
450
|
+
const rows = params.db.prepare(
|
|
451
|
+
`SELECT id, path, start_line, end_line, text, embedding, source
|
|
452
|
+
FROM chunks
|
|
453
|
+
WHERE model = ?${params.sourceFilter.sql}`
|
|
454
|
+
).all(params.providerModel, ...params.sourceFilter.params);
|
|
455
|
+
return rows.map((row) => ({
|
|
456
|
+
id: row.id,
|
|
457
|
+
path: row.path,
|
|
458
|
+
startLine: row.start_line,
|
|
459
|
+
endLine: row.end_line,
|
|
460
|
+
text: row.text,
|
|
461
|
+
embedding: parseEmbedding(row.embedding),
|
|
462
|
+
source: row.source
|
|
463
|
+
}));
|
|
464
|
+
}
|
|
465
|
+
async function searchKeyword(params) {
|
|
466
|
+
if (params.limit <= 0) return [];
|
|
467
|
+
const ftsQuery = params.buildFtsQuery(params.query);
|
|
468
|
+
if (!ftsQuery) return [];
|
|
469
|
+
const rows = params.db.prepare(
|
|
470
|
+
`SELECT id, path, source, start_line, end_line, text,
|
|
471
|
+
bm25(${params.ftsTable}) AS rank
|
|
472
|
+
FROM ${params.ftsTable}
|
|
473
|
+
WHERE ${params.ftsTable} MATCH ? AND model = ?${params.sourceFilter.sql}
|
|
474
|
+
ORDER BY rank ASC
|
|
475
|
+
LIMIT ?`
|
|
476
|
+
).all(ftsQuery, params.providerModel, ...params.sourceFilter.params, params.limit);
|
|
477
|
+
return rows.map((row) => {
|
|
478
|
+
const textScore = params.bm25RankToScore(row.rank);
|
|
479
|
+
return {
|
|
480
|
+
id: row.id,
|
|
481
|
+
path: row.path,
|
|
482
|
+
startLine: row.start_line,
|
|
483
|
+
endLine: row.end_line,
|
|
484
|
+
score: textScore,
|
|
485
|
+
textScore,
|
|
486
|
+
snippet: truncateUtf16Safe(row.text, params.snippetMaxChars),
|
|
487
|
+
source: row.source
|
|
488
|
+
};
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
// src/db/schema.ts
|
|
493
|
+
var SCHEMA_VERSION = 4;
|
|
494
|
+
function ensureMemoryIndexSchema(params) {
|
|
495
|
+
params.db.exec(`
|
|
496
|
+
CREATE TABLE IF NOT EXISTS meta (
|
|
497
|
+
key TEXT PRIMARY KEY,
|
|
498
|
+
value TEXT NOT NULL
|
|
499
|
+
);
|
|
500
|
+
`);
|
|
501
|
+
const migrated = migrateIfNeeded(params.db, params.ftsTable);
|
|
502
|
+
params.db.exec(`
|
|
503
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
504
|
+
path TEXT PRIMARY KEY,
|
|
505
|
+
source TEXT NOT NULL DEFAULT 'memory',
|
|
506
|
+
hash TEXT NOT NULL,
|
|
507
|
+
mtime INTEGER NOT NULL,
|
|
508
|
+
size INTEGER NOT NULL
|
|
509
|
+
);
|
|
510
|
+
`);
|
|
511
|
+
params.db.exec(`
|
|
512
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
513
|
+
id TEXT PRIMARY KEY,
|
|
514
|
+
path TEXT NOT NULL,
|
|
515
|
+
source TEXT NOT NULL DEFAULT 'memory',
|
|
516
|
+
start_line INTEGER NOT NULL,
|
|
517
|
+
end_line INTEGER NOT NULL,
|
|
518
|
+
hash TEXT NOT NULL,
|
|
519
|
+
model TEXT NOT NULL,
|
|
520
|
+
text TEXT NOT NULL,
|
|
521
|
+
embedding TEXT NOT NULL,
|
|
522
|
+
updated_at INTEGER NOT NULL
|
|
523
|
+
);
|
|
524
|
+
`);
|
|
525
|
+
params.db.exec(`
|
|
526
|
+
CREATE TABLE IF NOT EXISTS ${params.embeddingCacheTable} (
|
|
527
|
+
provider TEXT NOT NULL,
|
|
528
|
+
model TEXT NOT NULL,
|
|
529
|
+
provider_key TEXT NOT NULL,
|
|
530
|
+
hash TEXT NOT NULL,
|
|
531
|
+
embedding TEXT NOT NULL,
|
|
532
|
+
dims INTEGER,
|
|
533
|
+
updated_at INTEGER NOT NULL,
|
|
534
|
+
PRIMARY KEY (provider, model, provider_key, hash)
|
|
535
|
+
);
|
|
536
|
+
`);
|
|
537
|
+
params.db.exec(
|
|
538
|
+
`CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at ON ${params.embeddingCacheTable}(updated_at);`
|
|
539
|
+
);
|
|
540
|
+
let ftsAvailable = false;
|
|
541
|
+
let ftsError;
|
|
542
|
+
if (params.ftsEnabled) {
|
|
543
|
+
try {
|
|
544
|
+
params.db.exec(
|
|
545
|
+
`CREATE VIRTUAL TABLE IF NOT EXISTS ${params.ftsTable} USING fts5(
|
|
546
|
+
text,
|
|
547
|
+
id UNINDEXED,
|
|
548
|
+
path UNINDEXED,
|
|
549
|
+
source UNINDEXED,
|
|
550
|
+
model UNINDEXED,
|
|
551
|
+
start_line UNINDEXED,
|
|
552
|
+
end_line UNINDEXED
|
|
553
|
+
);`
|
|
554
|
+
);
|
|
555
|
+
ftsAvailable = true;
|
|
556
|
+
} catch (err) {
|
|
557
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
558
|
+
ftsAvailable = false;
|
|
559
|
+
ftsError = message;
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
ensureColumn(params.db, "files", "source", "TEXT NOT NULL DEFAULT 'memory'");
|
|
563
|
+
ensureColumn(params.db, "chunks", "source", "TEXT NOT NULL DEFAULT 'memory'");
|
|
564
|
+
ensureColumn(params.db, "chunks", "type", "TEXT");
|
|
565
|
+
ensureColumn(params.db, "chunks", "knowledge_type", "TEXT");
|
|
566
|
+
ensureColumn(params.db, "chunks", "knowledge_id", "TEXT");
|
|
567
|
+
ensureColumn(params.db, "chunks", "domains", "TEXT");
|
|
568
|
+
ensureColumn(params.db, "chunks", "entities", "TEXT");
|
|
569
|
+
ensureColumn(params.db, "chunks", "confidence", "REAL");
|
|
570
|
+
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);`);
|
|
571
|
+
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source);`);
|
|
572
|
+
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_type ON chunks(type);`);
|
|
573
|
+
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_knowledge_type ON chunks(knowledge_type);`);
|
|
574
|
+
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_chunks_knowledge_id ON chunks(knowledge_id);`);
|
|
575
|
+
params.db.exec(`
|
|
576
|
+
CREATE TABLE IF NOT EXISTS knowledge_links (
|
|
577
|
+
from_id TEXT NOT NULL,
|
|
578
|
+
to_id TEXT NOT NULL,
|
|
579
|
+
relation TEXT NOT NULL,
|
|
580
|
+
layer TEXT,
|
|
581
|
+
weight REAL DEFAULT 0.5,
|
|
582
|
+
source_path TEXT,
|
|
583
|
+
created_at INTEGER,
|
|
584
|
+
PRIMARY KEY (from_id, to_id, relation)
|
|
585
|
+
);
|
|
586
|
+
`);
|
|
587
|
+
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_kl_from ON knowledge_links(from_id);`);
|
|
588
|
+
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_kl_to ON knowledge_links(to_id);`);
|
|
589
|
+
params.db.exec(`CREATE INDEX IF NOT EXISTS idx_kl_layer ON knowledge_links(layer);`);
|
|
590
|
+
params.db.prepare(
|
|
591
|
+
`INSERT OR REPLACE INTO meta (key, value) VALUES ('schema_version', ?)`
|
|
592
|
+
).run(String(SCHEMA_VERSION));
|
|
593
|
+
return { ftsAvailable, ...ftsError ? { ftsError } : {}, ...migrated ? { migrated } : {} };
|
|
594
|
+
}
|
|
595
|
+
function migrateIfNeeded(db, ftsTable) {
|
|
596
|
+
let storedVersion = 0;
|
|
597
|
+
try {
|
|
598
|
+
const row = db.prepare(
|
|
599
|
+
`SELECT value FROM meta WHERE key = 'schema_version'`
|
|
600
|
+
).get();
|
|
601
|
+
if (row) {
|
|
602
|
+
storedVersion = parseInt(row.value, 10) || 0;
|
|
603
|
+
}
|
|
604
|
+
} catch {
|
|
605
|
+
storedVersion = 0;
|
|
606
|
+
}
|
|
607
|
+
if (storedVersion >= SCHEMA_VERSION) return false;
|
|
608
|
+
if (storedVersion > 0 && storedVersion < SCHEMA_VERSION) {
|
|
609
|
+
db.exec(`DROP TABLE IF EXISTS files`);
|
|
610
|
+
db.exec(`DROP TABLE IF EXISTS chunks`);
|
|
611
|
+
db.exec(`DROP TABLE IF EXISTS knowledge_links`);
|
|
612
|
+
db.exec(`DROP TABLE IF EXISTS ${ftsTable}`);
|
|
613
|
+
try {
|
|
614
|
+
db.exec(`DROP TABLE IF EXISTS chunks_vec`);
|
|
615
|
+
} catch {
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
return storedVersion > 0;
|
|
619
|
+
}
|
|
620
|
+
function ensureColumn(db, table, column, definition) {
|
|
621
|
+
const rows = db.prepare(`PRAGMA table_info(${table})`).all();
|
|
622
|
+
if (rows.some((row) => row.name === column)) return;
|
|
623
|
+
db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`);
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
// src/session.ts
|
|
627
|
+
var os = __toESM(require("os"), 1);
|
|
628
|
+
function parseFrontmatter(content) {
|
|
629
|
+
const frontmatterRegex = /^---\n([\s\S]*?)\n---\n/;
|
|
630
|
+
const match = content.match(frontmatterRegex);
|
|
631
|
+
if (!match) {
|
|
632
|
+
return { frontmatter: void 0, body: content };
|
|
633
|
+
}
|
|
634
|
+
const yamlContent = match[1];
|
|
635
|
+
const body = content.slice(match[0].length);
|
|
636
|
+
try {
|
|
637
|
+
const frontmatter = parseSimpleYaml(yamlContent);
|
|
638
|
+
return { frontmatter, body };
|
|
639
|
+
} catch {
|
|
640
|
+
return { frontmatter: void 0, body: content };
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
function parseSimpleYaml(yaml) {
|
|
644
|
+
const lines = yaml.split("\n");
|
|
645
|
+
return parseYamlBlock(lines, 0, 0, lines.length).value;
|
|
646
|
+
}
|
|
647
|
+
function parseYamlBlock(lines, indent, startIdx, endIdx) {
|
|
648
|
+
const result = {};
|
|
649
|
+
let i = startIdx;
|
|
650
|
+
while (i < endIdx) {
|
|
651
|
+
const line = lines[i];
|
|
652
|
+
if (!line || !line.trim()) {
|
|
653
|
+
i++;
|
|
654
|
+
continue;
|
|
655
|
+
}
|
|
656
|
+
const lineIndent = getIndent(line);
|
|
657
|
+
if (lineIndent < indent) break;
|
|
658
|
+
if (lineIndent > indent) {
|
|
659
|
+
i++;
|
|
660
|
+
continue;
|
|
661
|
+
}
|
|
662
|
+
const keyMatch = line.match(/^(\s*)([\w-]+):\s*(.*)?$/);
|
|
663
|
+
if (!keyMatch) {
|
|
664
|
+
i++;
|
|
665
|
+
continue;
|
|
666
|
+
}
|
|
667
|
+
const [, , key, rawValue] = keyMatch;
|
|
668
|
+
const value = rawValue?.trim() ?? "";
|
|
669
|
+
if (value === "" || value === void 0) {
|
|
670
|
+
const nextNonEmpty = findNextNonEmptyLine(lines, i + 1, endIdx);
|
|
671
|
+
if (nextNonEmpty < endIdx) {
|
|
672
|
+
const nextLine = lines[nextNonEmpty];
|
|
673
|
+
const nextIndent = getIndent(nextLine);
|
|
674
|
+
if (nextIndent > indent) {
|
|
675
|
+
if (nextLine.trimStart().startsWith("- ")) {
|
|
676
|
+
const listResult = parseYamlList(lines, nextIndent, i + 1, endIdx);
|
|
677
|
+
result[key] = listResult.value;
|
|
678
|
+
i = listResult.nextIdx;
|
|
679
|
+
} else {
|
|
680
|
+
const blockResult = parseYamlBlock(lines, nextIndent, i + 1, endIdx);
|
|
681
|
+
result[key] = blockResult.value;
|
|
682
|
+
i = blockResult.nextIdx;
|
|
683
|
+
}
|
|
684
|
+
continue;
|
|
685
|
+
}
|
|
686
|
+
}
|
|
687
|
+
result[key] = null;
|
|
688
|
+
i++;
|
|
689
|
+
} else {
|
|
690
|
+
result[key] = parseYamlValue(value);
|
|
691
|
+
i++;
|
|
692
|
+
}
|
|
693
|
+
}
|
|
694
|
+
return { value: result, nextIdx: i };
|
|
695
|
+
}
|
|
696
|
+
function parseYamlList(lines, indent, startIdx, endIdx) {
|
|
697
|
+
const result = [];
|
|
698
|
+
let i = startIdx;
|
|
699
|
+
while (i < endIdx) {
|
|
700
|
+
const line = lines[i];
|
|
701
|
+
if (!line || !line.trim()) {
|
|
702
|
+
i++;
|
|
703
|
+
continue;
|
|
704
|
+
}
|
|
705
|
+
const lineIndent = getIndent(line);
|
|
706
|
+
if (lineIndent < indent) break;
|
|
707
|
+
if (lineIndent > indent) {
|
|
708
|
+
i++;
|
|
709
|
+
continue;
|
|
710
|
+
}
|
|
711
|
+
const trimmed = line.trimStart();
|
|
712
|
+
if (!trimmed.startsWith("- ")) break;
|
|
713
|
+
const itemContent = trimmed.slice(2).trim();
|
|
714
|
+
if (itemContent === "" || itemContent === void 0) {
|
|
715
|
+
const nextNonEmpty = findNextNonEmptyLine(lines, i + 1, endIdx);
|
|
716
|
+
if (nextNonEmpty < endIdx) {
|
|
717
|
+
const nextIndent = getIndent(lines[nextNonEmpty]);
|
|
718
|
+
if (nextIndent > indent) {
|
|
719
|
+
const blockResult = parseYamlBlock(lines, nextIndent, i + 1, endIdx);
|
|
720
|
+
result.push(blockResult.value);
|
|
721
|
+
i = blockResult.nextIdx;
|
|
722
|
+
continue;
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
result.push(null);
|
|
726
|
+
i++;
|
|
727
|
+
} else {
|
|
728
|
+
const kvMatch = itemContent.match(/^([\w-]+):\s*(.*)$/);
|
|
729
|
+
if (kvMatch) {
|
|
730
|
+
const obj = {};
|
|
731
|
+
const [, firstKey, firstVal] = kvMatch;
|
|
732
|
+
obj[firstKey] = parseYamlValue(firstVal?.trim() ?? "");
|
|
733
|
+
const itemKeyIndent = indent + 2;
|
|
734
|
+
let j = i + 1;
|
|
735
|
+
while (j < endIdx) {
|
|
736
|
+
const nextLine = lines[j];
|
|
737
|
+
if (!nextLine || !nextLine.trim()) {
|
|
738
|
+
j++;
|
|
739
|
+
continue;
|
|
740
|
+
}
|
|
741
|
+
const nextLineIndent = getIndent(nextLine);
|
|
742
|
+
if (nextLineIndent < itemKeyIndent) break;
|
|
743
|
+
if (nextLineIndent === itemKeyIndent) {
|
|
744
|
+
const nextKv = nextLine.match(/^\s*([\w-]+):\s*(.*)$/);
|
|
745
|
+
if (nextKv) {
|
|
746
|
+
const [, nk, nv] = nextKv;
|
|
747
|
+
obj[nk] = parseYamlValue(nv?.trim() ?? "");
|
|
748
|
+
j++;
|
|
749
|
+
continue;
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
break;
|
|
753
|
+
}
|
|
754
|
+
result.push(obj);
|
|
755
|
+
i = j;
|
|
756
|
+
} else {
|
|
757
|
+
result.push(parseYamlValue(itemContent));
|
|
758
|
+
i++;
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
return { value: result, nextIdx: i };
|
|
763
|
+
}
|
|
764
|
+
function getIndent(line) {
|
|
765
|
+
const match = line.match(/^(\s*)/);
|
|
766
|
+
return match ? match[1].length : 0;
|
|
767
|
+
}
|
|
768
|
+
function findNextNonEmptyLine(lines, from, end) {
|
|
769
|
+
for (let i = from; i < end; i++) {
|
|
770
|
+
if (lines[i]?.trim()) return i;
|
|
771
|
+
}
|
|
772
|
+
return end;
|
|
773
|
+
}
|
|
774
|
+
function parseYamlValue(value) {
|
|
775
|
+
if (value === "") return null;
|
|
776
|
+
if (value.startsWith('"') && value.endsWith('"') || value.startsWith("'") && value.endsWith("'")) {
|
|
777
|
+
return value.slice(1, -1);
|
|
778
|
+
}
|
|
779
|
+
if (value === "null" || value === "~") return null;
|
|
780
|
+
if (value === "true") return true;
|
|
781
|
+
if (value === "false") return false;
|
|
782
|
+
const num = Number(value);
|
|
783
|
+
if (!isNaN(num) && value !== "") return num;
|
|
784
|
+
if (value.startsWith("[") && value.endsWith("]")) {
|
|
785
|
+
const inner = value.slice(1, -1);
|
|
786
|
+
if (inner.trim() === "") return [];
|
|
787
|
+
return inner.split(",").map((s) => parseYamlValue(s.trim()));
|
|
788
|
+
}
|
|
789
|
+
return value;
|
|
790
|
+
}
|
|
791
|
+
function serializeFrontmatter(frontmatter) {
|
|
792
|
+
const lines = ["---"];
|
|
793
|
+
if (frontmatter.id) {
|
|
794
|
+
lines.push(`id: ${frontmatter.id}`);
|
|
795
|
+
}
|
|
796
|
+
if (frontmatter.type) {
|
|
797
|
+
lines.push(`type: ${frontmatter.type}`);
|
|
798
|
+
}
|
|
799
|
+
if (frontmatter.session) {
|
|
800
|
+
lines.push("session:");
|
|
801
|
+
const session = frontmatter.session;
|
|
802
|
+
if (session.id) lines.push(` id: ${session.id}`);
|
|
803
|
+
if (session.source) lines.push(` source: ${session.source}`);
|
|
804
|
+
if (session.project) lines.push(` project: ${formatPath(session.project)}`);
|
|
805
|
+
if (session.transcript) lines.push(` transcript: ${formatPath(session.transcript)}`);
|
|
806
|
+
}
|
|
807
|
+
if (frontmatter.created) {
|
|
808
|
+
lines.push(`created: ${frontmatter.created}`);
|
|
809
|
+
}
|
|
810
|
+
if (frontmatter.updated) {
|
|
811
|
+
lines.push(`updated: ${frontmatter.updated}`);
|
|
812
|
+
}
|
|
813
|
+
if (frontmatter.tags && frontmatter.tags.length > 0) {
|
|
814
|
+
lines.push(`tags: [${frontmatter.tags.join(", ")}]`);
|
|
815
|
+
}
|
|
816
|
+
if (frontmatter.domain && frontmatter.domain.length > 0) {
|
|
817
|
+
lines.push(`domain: [${frontmatter.domain.join(", ")}]`);
|
|
818
|
+
}
|
|
819
|
+
if (frontmatter.entities && frontmatter.entities.length > 0) {
|
|
820
|
+
lines.push(`entities: [${frontmatter.entities.join(", ")}]`);
|
|
821
|
+
}
|
|
822
|
+
if (frontmatter.confidence !== void 0) {
|
|
823
|
+
lines.push(`confidence: ${frontmatter.confidence}`);
|
|
824
|
+
}
|
|
825
|
+
if (frontmatter.source) {
|
|
826
|
+
lines.push("source:");
|
|
827
|
+
if (frontmatter.source.origin) lines.push(` origin: ${frontmatter.source.origin}`);
|
|
828
|
+
if (frontmatter.source.trajectories && frontmatter.source.trajectories.length > 0) {
|
|
829
|
+
lines.push(` trajectories: [${frontmatter.source.trajectories.join(", ")}]`);
|
|
830
|
+
}
|
|
831
|
+
if (frontmatter.source.agentId) lines.push(` agentId: ${frontmatter.source.agentId}`);
|
|
832
|
+
}
|
|
833
|
+
if (frontmatter.links && frontmatter.links.length > 0) {
|
|
834
|
+
lines.push("links:");
|
|
835
|
+
for (const link of frontmatter.links) {
|
|
836
|
+
lines.push(` - target: ${link.target}`);
|
|
837
|
+
lines.push(` relation: ${link.relation}`);
|
|
838
|
+
if (link.layer) lines.push(` layer: ${link.layer}`);
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
if (frontmatter.supersedes !== void 0) {
|
|
842
|
+
lines.push(`supersedes: ${frontmatter.supersedes === null ? "~" : frontmatter.supersedes}`);
|
|
843
|
+
}
|
|
844
|
+
lines.push("---");
|
|
845
|
+
return lines.join("\n") + "\n";
|
|
846
|
+
}
|
|
847
|
+
function addFrontmatter(content, frontmatter) {
|
|
848
|
+
const { frontmatter: existing, body } = parseFrontmatter(content);
|
|
849
|
+
const merged = {
|
|
850
|
+
...existing,
|
|
851
|
+
...frontmatter,
|
|
852
|
+
session: {
|
|
853
|
+
...existing?.session,
|
|
854
|
+
...frontmatter.session
|
|
855
|
+
}
|
|
856
|
+
};
|
|
857
|
+
if (!merged.created) {
|
|
858
|
+
merged.created = (/* @__PURE__ */ new Date()).toISOString();
|
|
859
|
+
}
|
|
860
|
+
merged.updated = (/* @__PURE__ */ new Date()).toISOString();
|
|
861
|
+
return serializeFrontmatter(merged) + body;
|
|
862
|
+
}
|
|
863
|
+
function addSessionToContent(content, session) {
|
|
864
|
+
return addFrontmatter(content, { session });
|
|
865
|
+
}
|
|
866
|
+
function formatPath(filePath) {
|
|
867
|
+
const home = os.homedir();
|
|
868
|
+
if (filePath.startsWith(home)) {
|
|
869
|
+
return "~" + filePath.slice(home.length);
|
|
870
|
+
}
|
|
871
|
+
return filePath;
|
|
872
|
+
}
|
|
873
|
+
function extractSession(content) {
|
|
874
|
+
const { frontmatter } = parseFrontmatter(content);
|
|
875
|
+
return frontmatter?.session;
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
// src/search/graph.ts
|
|
879
|
+
function getLinksFrom(db, fromId, opts) {
|
|
880
|
+
let sql = `SELECT from_id, to_id, relation, layer, weight, source_path FROM knowledge_links WHERE from_id = ?`;
|
|
881
|
+
const params = [fromId];
|
|
882
|
+
if (opts?.relation) {
|
|
883
|
+
sql += ` AND relation = ?`;
|
|
884
|
+
params.push(opts.relation);
|
|
885
|
+
}
|
|
886
|
+
if (opts?.layer) {
|
|
887
|
+
sql += ` AND layer = ?`;
|
|
888
|
+
params.push(opts.layer);
|
|
889
|
+
}
|
|
890
|
+
const rows = db.prepare(sql).all(...params);
|
|
891
|
+
return rows.map(toGraphLink);
|
|
892
|
+
}
|
|
893
|
+
function getLinksTo(db, toId, opts) {
|
|
894
|
+
let sql = `SELECT from_id, to_id, relation, layer, weight, source_path FROM knowledge_links WHERE to_id = ?`;
|
|
895
|
+
const params = [toId];
|
|
896
|
+
if (opts?.relation) {
|
|
897
|
+
sql += ` AND relation = ?`;
|
|
898
|
+
params.push(opts.relation);
|
|
899
|
+
}
|
|
900
|
+
if (opts?.layer) {
|
|
901
|
+
sql += ` AND layer = ?`;
|
|
902
|
+
params.push(opts.layer);
|
|
903
|
+
}
|
|
904
|
+
const rows = db.prepare(sql).all(...params);
|
|
905
|
+
return rows.map(toGraphLink);
|
|
906
|
+
}
|
|
907
|
+
function getNeighbors(db, startId, depth = 1, opts) {
|
|
908
|
+
const visited = /* @__PURE__ */ new Set([startId]);
|
|
909
|
+
const result = [];
|
|
910
|
+
let frontier = [startId];
|
|
911
|
+
for (let d = 1; d <= depth; d++) {
|
|
912
|
+
const nextFrontier = [];
|
|
913
|
+
for (const nodeId of frontier) {
|
|
914
|
+
const outgoing = getLinksFrom(db, nodeId, opts);
|
|
915
|
+
for (const link of outgoing) {
|
|
916
|
+
if (!visited.has(link.toId)) {
|
|
917
|
+
visited.add(link.toId);
|
|
918
|
+
nextFrontier.push(link.toId);
|
|
919
|
+
result.push({ id: link.toId, depth: d, link });
|
|
920
|
+
}
|
|
921
|
+
}
|
|
922
|
+
const incoming = getLinksTo(db, nodeId, opts);
|
|
923
|
+
for (const link of incoming) {
|
|
924
|
+
if (!visited.has(link.fromId)) {
|
|
925
|
+
visited.add(link.fromId);
|
|
926
|
+
nextFrontier.push(link.fromId);
|
|
927
|
+
result.push({ id: link.fromId, depth: d, link });
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
frontier = nextFrontier;
|
|
932
|
+
if (frontier.length === 0) break;
|
|
933
|
+
}
|
|
934
|
+
return result;
|
|
935
|
+
}
|
|
936
|
+
function getPathBetween(db, fromId, toId, maxDepth = 3) {
|
|
937
|
+
if (fromId === toId) return [];
|
|
938
|
+
const visited = /* @__PURE__ */ new Set([fromId]);
|
|
939
|
+
const parentLink = /* @__PURE__ */ new Map();
|
|
940
|
+
let frontier = [fromId];
|
|
941
|
+
for (let d = 0; d < maxDepth; d++) {
|
|
942
|
+
const nextFrontier = [];
|
|
943
|
+
for (const nodeId of frontier) {
|
|
944
|
+
const outgoing = getLinksFrom(db, nodeId);
|
|
945
|
+
for (const link of outgoing) {
|
|
946
|
+
if (!visited.has(link.toId)) {
|
|
947
|
+
visited.add(link.toId);
|
|
948
|
+
parentLink.set(link.toId, link);
|
|
949
|
+
if (link.toId === toId) {
|
|
950
|
+
return reconstructPath(parentLink, fromId, toId);
|
|
951
|
+
}
|
|
952
|
+
nextFrontier.push(link.toId);
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
const incoming = getLinksTo(db, nodeId);
|
|
956
|
+
for (const link of incoming) {
|
|
957
|
+
if (!visited.has(link.fromId)) {
|
|
958
|
+
visited.add(link.fromId);
|
|
959
|
+
parentLink.set(link.fromId, link);
|
|
960
|
+
if (link.fromId === toId) {
|
|
961
|
+
return reconstructPath(parentLink, fromId, toId);
|
|
962
|
+
}
|
|
963
|
+
nextFrontier.push(link.fromId);
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
}
|
|
967
|
+
frontier = nextFrontier;
|
|
968
|
+
if (frontier.length === 0) break;
|
|
969
|
+
}
|
|
970
|
+
return [];
|
|
971
|
+
}
|
|
972
|
+
function reconstructPath(parentLink, fromId, toId) {
|
|
973
|
+
const path5 = [];
|
|
974
|
+
let current = toId;
|
|
975
|
+
while (current !== fromId) {
|
|
976
|
+
const link = parentLink.get(current);
|
|
977
|
+
if (!link) break;
|
|
978
|
+
path5.unshift(link);
|
|
979
|
+
current = link.toId === current ? link.fromId : link.toId;
|
|
980
|
+
}
|
|
981
|
+
return path5;
|
|
982
|
+
}
|
|
983
|
+
function toGraphLink(row) {
|
|
984
|
+
return {
|
|
985
|
+
fromId: row.from_id,
|
|
986
|
+
toId: row.to_id,
|
|
987
|
+
relation: row.relation,
|
|
988
|
+
layer: row.layer,
|
|
989
|
+
weight: row.weight,
|
|
990
|
+
sourcePath: row.source_path
|
|
991
|
+
};
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
// src/db/sqlite-vec.ts
|
|
995
|
+
async function loadSqliteVecExtension(params) {
|
|
996
|
+
try {
|
|
997
|
+
const sqliteVec = await import("sqlite-vec");
|
|
998
|
+
const resolvedPath = params.extensionPath?.trim() ? params.extensionPath.trim() : void 0;
|
|
999
|
+
const extensionPath = resolvedPath ?? sqliteVec.getLoadablePath();
|
|
1000
|
+
params.db.enableLoadExtension(true);
|
|
1001
|
+
if (resolvedPath) {
|
|
1002
|
+
params.db.loadExtension(extensionPath);
|
|
1003
|
+
} else {
|
|
1004
|
+
sqliteVec.load(params.db);
|
|
1005
|
+
}
|
|
1006
|
+
return { ok: true, extensionPath };
|
|
1007
|
+
} catch (err) {
|
|
1008
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1009
|
+
return { ok: false, error: message };
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
// src/embeddings/embeddings.ts
|
|
1014
|
+
var import_node_fs2 = __toESM(require("fs"), 1);
|
|
1015
|
+
var import_node_path2 = __toESM(require("path"), 1);
|
|
1016
|
+
var import_node_os = __toESM(require("os"), 1);
|
|
1017
|
+
var DEFAULT_LOCAL_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
|
1018
|
+
var DEFAULT_OPENAI_EMBEDDING_MODEL = "text-embedding-3-small";
|
|
1019
|
+
var DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1";
|
|
1020
|
+
var DEFAULT_GEMINI_EMBEDDING_MODEL = "gemini-embedding-001";
|
|
1021
|
+
var DEFAULT_GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta";
|
|
1022
|
+
function createNoOpEmbeddingProvider() {
|
|
1023
|
+
return {
|
|
1024
|
+
id: "none",
|
|
1025
|
+
model: "bm25-only",
|
|
1026
|
+
embedQuery: async () => [],
|
|
1027
|
+
embedBatch: async (texts) => texts.map(() => [])
|
|
1028
|
+
};
|
|
1029
|
+
}
|
|
1030
|
+
function resolveUserPath(filePath) {
|
|
1031
|
+
if (filePath.startsWith("~/")) {
|
|
1032
|
+
return import_node_path2.default.join(import_node_os.default.homedir(), filePath.slice(2));
|
|
1033
|
+
}
|
|
1034
|
+
return filePath;
|
|
1035
|
+
}
|
|
1036
|
+
function canAutoSelectLocal(options) {
|
|
1037
|
+
const modelPath = options.local?.modelPath?.trim();
|
|
1038
|
+
if (!modelPath) return false;
|
|
1039
|
+
if (/^(hf:|https?:)/i.test(modelPath)) return false;
|
|
1040
|
+
const resolved = resolveUserPath(modelPath);
|
|
1041
|
+
try {
|
|
1042
|
+
return import_node_fs2.default.statSync(resolved).isFile();
|
|
1043
|
+
} catch {
|
|
1044
|
+
return false;
|
|
1045
|
+
}
|
|
1046
|
+
}
|
|
1047
|
+
function isMissingApiKeyError(err) {
|
|
1048
|
+
const message = formatError(err);
|
|
1049
|
+
return message.includes("API key") || message.includes("apiKey");
|
|
1050
|
+
}
|
|
1051
|
+
async function importNodeLlamaCpp() {
|
|
1052
|
+
const llama = await import("node-llama-cpp");
|
|
1053
|
+
return llama;
|
|
1054
|
+
}
|
|
1055
|
+
async function createLocalEmbeddingProvider(options) {
|
|
1056
|
+
const modelPath = options.local?.modelPath?.trim() || DEFAULT_LOCAL_MODEL;
|
|
1057
|
+
const modelCacheDir = options.local?.modelCacheDir?.trim();
|
|
1058
|
+
const { getLlama, resolveModelFile, LlamaLogLevel } = await importNodeLlamaCpp();
|
|
1059
|
+
let llama = null;
|
|
1060
|
+
let embeddingModel = null;
|
|
1061
|
+
let embeddingContext = null;
|
|
1062
|
+
const ensureContext = async () => {
|
|
1063
|
+
if (!llama) {
|
|
1064
|
+
llama = await getLlama({ logLevel: LlamaLogLevel.error });
|
|
1065
|
+
}
|
|
1066
|
+
if (!embeddingModel) {
|
|
1067
|
+
const resolved = await resolveModelFile(modelPath, modelCacheDir || void 0);
|
|
1068
|
+
embeddingModel = await llama.loadModel({ modelPath: resolved });
|
|
1069
|
+
}
|
|
1070
|
+
if (!embeddingContext) {
|
|
1071
|
+
embeddingContext = await embeddingModel.createEmbeddingContext();
|
|
1072
|
+
}
|
|
1073
|
+
return embeddingContext;
|
|
1074
|
+
};
|
|
1075
|
+
return {
|
|
1076
|
+
id: "local",
|
|
1077
|
+
model: modelPath,
|
|
1078
|
+
embedQuery: async (text) => {
|
|
1079
|
+
const ctx = await ensureContext();
|
|
1080
|
+
const embedding = await ctx.getEmbeddingFor(text);
|
|
1081
|
+
return Array.from(embedding.vector);
|
|
1082
|
+
},
|
|
1083
|
+
embedBatch: async (texts) => {
|
|
1084
|
+
const ctx = await ensureContext();
|
|
1085
|
+
const embeddings = await Promise.all(
|
|
1086
|
+
texts.map(async (text) => {
|
|
1087
|
+
const embedding = await ctx.getEmbeddingFor(text);
|
|
1088
|
+
return Array.from(embedding.vector);
|
|
1089
|
+
})
|
|
1090
|
+
);
|
|
1091
|
+
return embeddings;
|
|
1092
|
+
}
|
|
1093
|
+
};
|
|
1094
|
+
}
|
|
1095
|
+
function normalizeOpenAiModel(model) {
|
|
1096
|
+
const trimmed = model.trim();
|
|
1097
|
+
if (!trimmed) return DEFAULT_OPENAI_EMBEDDING_MODEL;
|
|
1098
|
+
if (trimmed.startsWith("openai/")) return trimmed.slice("openai/".length);
|
|
1099
|
+
return trimmed;
|
|
1100
|
+
}
|
|
1101
|
+
function resolveOpenAiApiKey(options) {
|
|
1102
|
+
const apiKey = options.openai?.apiKey?.trim();
|
|
1103
|
+
if (apiKey) return apiKey;
|
|
1104
|
+
const envKey = process.env.OPENAI_API_KEY?.trim();
|
|
1105
|
+
if (envKey) return envKey;
|
|
1106
|
+
throw new Error("OpenAI API key not found. Set OPENAI_API_KEY env var or pass openai.apiKey option.");
|
|
1107
|
+
}
|
|
1108
|
+
async function createOpenAiEmbeddingProvider(options) {
|
|
1109
|
+
const apiKey = resolveOpenAiApiKey(options);
|
|
1110
|
+
const baseUrl = options.openai?.baseUrl?.trim() || DEFAULT_OPENAI_BASE_URL;
|
|
1111
|
+
const headerOverrides = options.openai?.headers ?? {};
|
|
1112
|
+
const headers = {
|
|
1113
|
+
"Content-Type": "application/json",
|
|
1114
|
+
Authorization: `Bearer ${apiKey}`,
|
|
1115
|
+
...headerOverrides
|
|
1116
|
+
};
|
|
1117
|
+
const model = normalizeOpenAiModel(options.model || "");
|
|
1118
|
+
const client = { baseUrl, headers, model };
|
|
1119
|
+
const url = `${baseUrl.replace(/\/$/, "")}/embeddings`;
|
|
1120
|
+
const embed = async (input) => {
|
|
1121
|
+
if (input.length === 0) return [];
|
|
1122
|
+
const res = await fetch(url, {
|
|
1123
|
+
method: "POST",
|
|
1124
|
+
headers: client.headers,
|
|
1125
|
+
body: JSON.stringify({ model: client.model, input })
|
|
1126
|
+
});
|
|
1127
|
+
if (!res.ok) {
|
|
1128
|
+
const text = await res.text();
|
|
1129
|
+
throw new Error(`openai embeddings failed: ${res.status} ${text}`);
|
|
1130
|
+
}
|
|
1131
|
+
const payload = await res.json();
|
|
1132
|
+
const data = payload.data ?? [];
|
|
1133
|
+
return data.map((entry) => entry.embedding ?? []);
|
|
1134
|
+
};
|
|
1135
|
+
return {
|
|
1136
|
+
provider: {
|
|
1137
|
+
id: "openai",
|
|
1138
|
+
model: client.model,
|
|
1139
|
+
embedQuery: async (text) => {
|
|
1140
|
+
const [vec] = await embed([text]);
|
|
1141
|
+
return vec ?? [];
|
|
1142
|
+
},
|
|
1143
|
+
embedBatch: embed
|
|
1144
|
+
},
|
|
1145
|
+
client
|
|
1146
|
+
};
|
|
1147
|
+
}
|
|
1148
|
+
function normalizeGeminiModel(model) {
|
|
1149
|
+
const trimmed = model.trim();
|
|
1150
|
+
if (!trimmed) return DEFAULT_GEMINI_EMBEDDING_MODEL;
|
|
1151
|
+
const withoutPrefix = trimmed.replace(/^models\//, "");
|
|
1152
|
+
if (withoutPrefix.startsWith("gemini/")) return withoutPrefix.slice("gemini/".length);
|
|
1153
|
+
if (withoutPrefix.startsWith("google/")) return withoutPrefix.slice("google/".length);
|
|
1154
|
+
return withoutPrefix;
|
|
1155
|
+
}
|
|
1156
|
+
function normalizeGeminiBaseUrl(raw) {
|
|
1157
|
+
const trimmed = raw.replace(/\/+$/, "");
|
|
1158
|
+
const openAiIndex = trimmed.indexOf("/openai");
|
|
1159
|
+
if (openAiIndex > -1) return trimmed.slice(0, openAiIndex);
|
|
1160
|
+
return trimmed;
|
|
1161
|
+
}
|
|
1162
|
+
function buildGeminiModelPath(model) {
|
|
1163
|
+
return model.startsWith("models/") ? model : `models/${model}`;
|
|
1164
|
+
}
|
|
1165
|
+
function resolveGeminiApiKey(options) {
|
|
1166
|
+
const apiKey = options.gemini?.apiKey?.trim();
|
|
1167
|
+
if (apiKey) return apiKey;
|
|
1168
|
+
const googleKey = process.env.GOOGLE_API_KEY?.trim();
|
|
1169
|
+
if (googleKey) return googleKey;
|
|
1170
|
+
const geminiKey = process.env.GEMINI_API_KEY?.trim();
|
|
1171
|
+
if (geminiKey) return geminiKey;
|
|
1172
|
+
throw new Error("Gemini API key not found. Set GOOGLE_API_KEY or GEMINI_API_KEY env var or pass gemini.apiKey option.");
|
|
1173
|
+
}
|
|
1174
|
+
async function createGeminiEmbeddingProvider(options) {
|
|
1175
|
+
const apiKey = resolveGeminiApiKey(options);
|
|
1176
|
+
const rawBaseUrl = options.gemini?.baseUrl?.trim() || DEFAULT_GEMINI_BASE_URL;
|
|
1177
|
+
const baseUrl = normalizeGeminiBaseUrl(rawBaseUrl);
|
|
1178
|
+
const headerOverrides = options.gemini?.headers ?? {};
|
|
1179
|
+
const headers = {
|
|
1180
|
+
"Content-Type": "application/json",
|
|
1181
|
+
"x-goog-api-key": apiKey,
|
|
1182
|
+
...headerOverrides
|
|
1183
|
+
};
|
|
1184
|
+
const model = normalizeGeminiModel(options.model || "");
|
|
1185
|
+
const modelPath = buildGeminiModelPath(model);
|
|
1186
|
+
const client = { baseUrl, headers, model, modelPath };
|
|
1187
|
+
const embedUrl = `${baseUrl}/${modelPath}:embedContent`;
|
|
1188
|
+
const batchUrl = `${baseUrl}/${modelPath}:batchEmbedContents`;
|
|
1189
|
+
const embedQuery = async (text) => {
|
|
1190
|
+
if (!text.trim()) return [];
|
|
1191
|
+
const res = await fetch(embedUrl, {
|
|
1192
|
+
method: "POST",
|
|
1193
|
+
headers: client.headers,
|
|
1194
|
+
body: JSON.stringify({
|
|
1195
|
+
content: { parts: [{ text }] },
|
|
1196
|
+
taskType: "RETRIEVAL_QUERY"
|
|
1197
|
+
})
|
|
1198
|
+
});
|
|
1199
|
+
if (!res.ok) {
|
|
1200
|
+
const payload2 = await res.text();
|
|
1201
|
+
throw new Error(`gemini embeddings failed: ${res.status} ${payload2}`);
|
|
1202
|
+
}
|
|
1203
|
+
const payload = await res.json();
|
|
1204
|
+
return payload.embedding?.values ?? [];
|
|
1205
|
+
};
|
|
1206
|
+
const embedBatch = async (texts) => {
|
|
1207
|
+
if (texts.length === 0) return [];
|
|
1208
|
+
const requests = texts.map((text) => ({
|
|
1209
|
+
model: modelPath,
|
|
1210
|
+
content: { parts: [{ text }] },
|
|
1211
|
+
taskType: "RETRIEVAL_DOCUMENT"
|
|
1212
|
+
}));
|
|
1213
|
+
const res = await fetch(batchUrl, {
|
|
1214
|
+
method: "POST",
|
|
1215
|
+
headers: client.headers,
|
|
1216
|
+
body: JSON.stringify({ requests })
|
|
1217
|
+
});
|
|
1218
|
+
if (!res.ok) {
|
|
1219
|
+
const payload2 = await res.text();
|
|
1220
|
+
throw new Error(`gemini embeddings failed: ${res.status} ${payload2}`);
|
|
1221
|
+
}
|
|
1222
|
+
const payload = await res.json();
|
|
1223
|
+
const embeddings = Array.isArray(payload.embeddings) ? payload.embeddings : [];
|
|
1224
|
+
return texts.map((_, index) => embeddings[index]?.values ?? []);
|
|
1225
|
+
};
|
|
1226
|
+
return {
|
|
1227
|
+
provider: {
|
|
1228
|
+
id: "gemini",
|
|
1229
|
+
model: client.model,
|
|
1230
|
+
embedQuery,
|
|
1231
|
+
embedBatch
|
|
1232
|
+
},
|
|
1233
|
+
client
|
|
1234
|
+
};
|
|
1235
|
+
}
|
|
1236
|
+
async function createEmbeddingProvider(options) {
|
|
1237
|
+
const requestedProvider = options.provider;
|
|
1238
|
+
const fallback = options.fallback ?? "none";
|
|
1239
|
+
if (requestedProvider === "none") {
|
|
1240
|
+
return {
|
|
1241
|
+
provider: createNoOpEmbeddingProvider(),
|
|
1242
|
+
requestedProvider: "none"
|
|
1243
|
+
};
|
|
1244
|
+
}
|
|
1245
|
+
const createProvider = async (id) => {
|
|
1246
|
+
if (id === "local") {
|
|
1247
|
+
const provider2 = await createLocalEmbeddingProvider(options);
|
|
1248
|
+
return { provider: provider2 };
|
|
1249
|
+
}
|
|
1250
|
+
if (id === "gemini") {
|
|
1251
|
+
const { provider: provider2, client: client2 } = await createGeminiEmbeddingProvider(options);
|
|
1252
|
+
return { provider: provider2, gemini: client2 };
|
|
1253
|
+
}
|
|
1254
|
+
const { provider, client } = await createOpenAiEmbeddingProvider(options);
|
|
1255
|
+
return { provider, openAi: client };
|
|
1256
|
+
};
|
|
1257
|
+
const formatPrimaryError = (err, provider) => provider === "local" ? formatLocalSetupError(err) : formatError(err);
|
|
1258
|
+
if (requestedProvider === "auto") {
|
|
1259
|
+
const missingKeyErrors = [];
|
|
1260
|
+
let localError = null;
|
|
1261
|
+
if (canAutoSelectLocal(options)) {
|
|
1262
|
+
try {
|
|
1263
|
+
const local = await createProvider("local");
|
|
1264
|
+
return { ...local, requestedProvider };
|
|
1265
|
+
} catch (err) {
|
|
1266
|
+
localError = formatLocalSetupError(err);
|
|
1267
|
+
}
|
|
1268
|
+
}
|
|
1269
|
+
for (const provider of ["openai", "gemini"]) {
|
|
1270
|
+
try {
|
|
1271
|
+
const result = await createProvider(provider);
|
|
1272
|
+
return { ...result, requestedProvider };
|
|
1273
|
+
} catch (err) {
|
|
1274
|
+
const message = formatPrimaryError(err, provider);
|
|
1275
|
+
if (isMissingApiKeyError(err)) {
|
|
1276
|
+
missingKeyErrors.push(message);
|
|
1277
|
+
continue;
|
|
1278
|
+
}
|
|
1279
|
+
throw new Error(message);
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
return {
|
|
1283
|
+
provider: createNoOpEmbeddingProvider(),
|
|
1284
|
+
requestedProvider,
|
|
1285
|
+
fallbackFrom: "auto",
|
|
1286
|
+
fallbackReason: "No embedding API available. Using BM25 full-text search only."
|
|
1287
|
+
};
|
|
1288
|
+
}
|
|
1289
|
+
try {
|
|
1290
|
+
const primary = await createProvider(requestedProvider);
|
|
1291
|
+
return { ...primary, requestedProvider };
|
|
1292
|
+
} catch (primaryErr) {
|
|
1293
|
+
const reason = formatPrimaryError(primaryErr, requestedProvider);
|
|
1294
|
+
if (fallback && fallback !== "none" && fallback !== requestedProvider) {
|
|
1295
|
+
try {
|
|
1296
|
+
const fallbackResult = await createProvider(fallback);
|
|
1297
|
+
return {
|
|
1298
|
+
...fallbackResult,
|
|
1299
|
+
requestedProvider,
|
|
1300
|
+
fallbackFrom: requestedProvider,
|
|
1301
|
+
fallbackReason: reason
|
|
1302
|
+
};
|
|
1303
|
+
} catch (fallbackErr) {
|
|
1304
|
+
throw new Error(`${reason}
|
|
1305
|
+
|
|
1306
|
+
Fallback to ${fallback} failed: ${formatError(fallbackErr)}`);
|
|
1307
|
+
}
|
|
1308
|
+
}
|
|
1309
|
+
throw new Error(reason);
|
|
1310
|
+
}
|
|
1311
|
+
}
|
|
1312
|
+
function formatError(err) {
|
|
1313
|
+
if (err instanceof Error) return err.message;
|
|
1314
|
+
return String(err);
|
|
1315
|
+
}
|
|
1316
|
+
function isNodeLlamaCppMissing(err) {
|
|
1317
|
+
if (!(err instanceof Error)) return false;
|
|
1318
|
+
const code = err.code;
|
|
1319
|
+
if (code === "ERR_MODULE_NOT_FOUND") {
|
|
1320
|
+
return err.message.includes("node-llama-cpp");
|
|
1321
|
+
}
|
|
1322
|
+
return false;
|
|
1323
|
+
}
|
|
1324
|
+
function formatLocalSetupError(err) {
|
|
1325
|
+
const detail = formatError(err);
|
|
1326
|
+
const missing = isNodeLlamaCppMissing(err);
|
|
1327
|
+
return [
|
|
1328
|
+
"Local embeddings unavailable.",
|
|
1329
|
+
missing ? "Reason: optional dependency node-llama-cpp is missing (or failed to install)." : detail ? `Reason: ${detail}` : void 0,
|
|
1330
|
+
missing && detail ? `Detail: ${detail}` : null,
|
|
1331
|
+
"To enable local embeddings:",
|
|
1332
|
+
"1) Use Node 22 LTS (recommended for installs/updates)",
|
|
1333
|
+
missing ? "2) Install node-llama-cpp: npm install node-llama-cpp" : null,
|
|
1334
|
+
"3) If you use pnpm: pnpm approve-builds (select node-llama-cpp), then pnpm rebuild node-llama-cpp",
|
|
1335
|
+
'Or set provider = "openai" or "gemini" (remote).'
|
|
1336
|
+
].filter(Boolean).join("\n");
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
// src/embeddings/batch-openai.ts
|
|
1340
|
+
var OPENAI_BATCH_ENDPOINT = "/v1/embeddings";
|
|
1341
|
+
var OPENAI_BATCH_COMPLETION_WINDOW = "24h";
|
|
1342
|
+
var OPENAI_BATCH_MAX_REQUESTS = 5e4;
|
|
1343
|
+
function getOpenAiBaseUrl(openAi) {
|
|
1344
|
+
return openAi.baseUrl?.replace(/\/$/, "") ?? "";
|
|
1345
|
+
}
|
|
1346
|
+
function getOpenAiHeaders(openAi, params) {
|
|
1347
|
+
const headers = openAi.headers ? { ...openAi.headers } : {};
|
|
1348
|
+
if (params.json) {
|
|
1349
|
+
if (!headers["Content-Type"] && !headers["content-type"]) {
|
|
1350
|
+
headers["Content-Type"] = "application/json";
|
|
1351
|
+
}
|
|
1352
|
+
} else {
|
|
1353
|
+
delete headers["Content-Type"];
|
|
1354
|
+
delete headers["content-type"];
|
|
1355
|
+
}
|
|
1356
|
+
return headers;
|
|
1357
|
+
}
|
|
1358
|
+
function splitOpenAiBatchRequests(requests) {
|
|
1359
|
+
if (requests.length <= OPENAI_BATCH_MAX_REQUESTS) return [requests];
|
|
1360
|
+
const groups = [];
|
|
1361
|
+
for (let i = 0; i < requests.length; i += OPENAI_BATCH_MAX_REQUESTS) {
|
|
1362
|
+
groups.push(requests.slice(i, i + OPENAI_BATCH_MAX_REQUESTS));
|
|
1363
|
+
}
|
|
1364
|
+
return groups;
|
|
1365
|
+
}
|
|
1366
|
+
async function retryAsync(fn, opts) {
|
|
1367
|
+
let lastError;
|
|
1368
|
+
for (let attempt = 0; attempt < opts.attempts; attempt++) {
|
|
1369
|
+
try {
|
|
1370
|
+
return await fn();
|
|
1371
|
+
} catch (err) {
|
|
1372
|
+
lastError = err;
|
|
1373
|
+
if (!opts.shouldRetry(err) || attempt === opts.attempts - 1) {
|
|
1374
|
+
throw err;
|
|
1375
|
+
}
|
|
1376
|
+
const delay = Math.min(
|
|
1377
|
+
opts.maxDelayMs,
|
|
1378
|
+
opts.minDelayMs * Math.pow(2, attempt) * (1 + Math.random() * opts.jitter)
|
|
1379
|
+
);
|
|
1380
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
throw lastError;
|
|
1384
|
+
}
|
|
1385
|
+
async function submitOpenAiBatch(params) {
|
|
1386
|
+
const baseUrl = getOpenAiBaseUrl(params.openAi);
|
|
1387
|
+
const jsonl = params.requests.map((request) => JSON.stringify(request)).join("\n");
|
|
1388
|
+
const form = new FormData();
|
|
1389
|
+
form.append("purpose", "batch");
|
|
1390
|
+
form.append(
|
|
1391
|
+
"file",
|
|
1392
|
+
new Blob([jsonl], { type: "application/jsonl" }),
|
|
1393
|
+
`memory-embeddings.${hashText(String(Date.now()))}.jsonl`
|
|
1394
|
+
);
|
|
1395
|
+
const fileRes = await fetch(`${baseUrl}/files`, {
|
|
1396
|
+
method: "POST",
|
|
1397
|
+
headers: getOpenAiHeaders(params.openAi, { json: false }),
|
|
1398
|
+
body: form
|
|
1399
|
+
});
|
|
1400
|
+
if (!fileRes.ok) {
|
|
1401
|
+
const text = await fileRes.text();
|
|
1402
|
+
throw new Error(`openai batch file upload failed: ${fileRes.status} ${text}`);
|
|
1403
|
+
}
|
|
1404
|
+
const filePayload = await fileRes.json();
|
|
1405
|
+
if (!filePayload.id) {
|
|
1406
|
+
throw new Error("openai batch file upload failed: missing file id");
|
|
1407
|
+
}
|
|
1408
|
+
const batchRes = await retryAsync(
|
|
1409
|
+
async () => {
|
|
1410
|
+
const res = await fetch(`${baseUrl}/batches`, {
|
|
1411
|
+
method: "POST",
|
|
1412
|
+
headers: getOpenAiHeaders(params.openAi, { json: true }),
|
|
1413
|
+
body: JSON.stringify({
|
|
1414
|
+
input_file_id: filePayload.id,
|
|
1415
|
+
endpoint: OPENAI_BATCH_ENDPOINT,
|
|
1416
|
+
completion_window: OPENAI_BATCH_COMPLETION_WINDOW,
|
|
1417
|
+
metadata: {
|
|
1418
|
+
source: params.source
|
|
1419
|
+
}
|
|
1420
|
+
})
|
|
1421
|
+
});
|
|
1422
|
+
if (!res.ok) {
|
|
1423
|
+
const text = await res.text();
|
|
1424
|
+
const err = new Error(`openai batch create failed: ${res.status} ${text}`);
|
|
1425
|
+
err.status = res.status;
|
|
1426
|
+
throw err;
|
|
1427
|
+
}
|
|
1428
|
+
return res;
|
|
1429
|
+
},
|
|
1430
|
+
{
|
|
1431
|
+
attempts: 3,
|
|
1432
|
+
minDelayMs: 300,
|
|
1433
|
+
maxDelayMs: 2e3,
|
|
1434
|
+
jitter: 0.2,
|
|
1435
|
+
shouldRetry: (err) => {
|
|
1436
|
+
const status = err.status;
|
|
1437
|
+
return status === 429 || typeof status === "number" && status >= 500;
|
|
1438
|
+
}
|
|
1439
|
+
}
|
|
1440
|
+
);
|
|
1441
|
+
return await batchRes.json();
|
|
1442
|
+
}
|
|
1443
|
+
async function fetchOpenAiBatchStatus(params) {
|
|
1444
|
+
const baseUrl = getOpenAiBaseUrl(params.openAi);
|
|
1445
|
+
const res = await fetch(`${baseUrl}/batches/${params.batchId}`, {
|
|
1446
|
+
headers: getOpenAiHeaders(params.openAi, { json: true })
|
|
1447
|
+
});
|
|
1448
|
+
if (!res.ok) {
|
|
1449
|
+
const text = await res.text();
|
|
1450
|
+
throw new Error(`openai batch status failed: ${res.status} ${text}`);
|
|
1451
|
+
}
|
|
1452
|
+
return await res.json();
|
|
1453
|
+
}
|
|
1454
|
+
async function fetchOpenAiFileContent(params) {
|
|
1455
|
+
const baseUrl = getOpenAiBaseUrl(params.openAi);
|
|
1456
|
+
const res = await fetch(`${baseUrl}/files/${params.fileId}/content`, {
|
|
1457
|
+
headers: getOpenAiHeaders(params.openAi, { json: true })
|
|
1458
|
+
});
|
|
1459
|
+
if (!res.ok) {
|
|
1460
|
+
const text = await res.text();
|
|
1461
|
+
throw new Error(`openai batch file content failed: ${res.status} ${text}`);
|
|
1462
|
+
}
|
|
1463
|
+
return await res.text();
|
|
1464
|
+
}
|
|
1465
|
+
function parseOpenAiBatchOutput(text) {
|
|
1466
|
+
if (!text.trim()) return [];
|
|
1467
|
+
return text.split("\n").map((line) => line.trim()).filter(Boolean).map((line) => JSON.parse(line));
|
|
1468
|
+
}
|
|
1469
|
+
async function readOpenAiBatchError(params) {
|
|
1470
|
+
try {
|
|
1471
|
+
const content = await fetchOpenAiFileContent({
|
|
1472
|
+
openAi: params.openAi,
|
|
1473
|
+
fileId: params.errorFileId
|
|
1474
|
+
});
|
|
1475
|
+
const lines = parseOpenAiBatchOutput(content);
|
|
1476
|
+
const first = lines.find((line) => line.error?.message || line.response?.body?.error);
|
|
1477
|
+
const message = first?.error?.message ?? (typeof first?.response?.body?.error?.message === "string" ? first?.response?.body?.error?.message : void 0);
|
|
1478
|
+
return message;
|
|
1479
|
+
} catch (err) {
|
|
1480
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1481
|
+
return message ? `error file unavailable: ${message}` : void 0;
|
|
1482
|
+
}
|
|
1483
|
+
}
|
|
1484
|
+
async function waitForOpenAiBatch(params) {
|
|
1485
|
+
const start = Date.now();
|
|
1486
|
+
let current = params.initial;
|
|
1487
|
+
while (true) {
|
|
1488
|
+
const status = current ?? await fetchOpenAiBatchStatus({
|
|
1489
|
+
openAi: params.openAi,
|
|
1490
|
+
batchId: params.batchId
|
|
1491
|
+
});
|
|
1492
|
+
const state = status.status ?? "unknown";
|
|
1493
|
+
if (state === "completed") {
|
|
1494
|
+
if (!status.output_file_id) {
|
|
1495
|
+
throw new Error(`openai batch ${params.batchId} completed without output file`);
|
|
1496
|
+
}
|
|
1497
|
+
return {
|
|
1498
|
+
outputFileId: status.output_file_id,
|
|
1499
|
+
errorFileId: status.error_file_id ?? void 0
|
|
1500
|
+
};
|
|
1501
|
+
}
|
|
1502
|
+
if (["failed", "expired", "cancelled", "canceled"].includes(state)) {
|
|
1503
|
+
const detail = status.error_file_id ? await readOpenAiBatchError({ openAi: params.openAi, errorFileId: status.error_file_id }) : void 0;
|
|
1504
|
+
const suffix = detail ? `: ${detail}` : "";
|
|
1505
|
+
throw new Error(`openai batch ${params.batchId} ${state}${suffix}`);
|
|
1506
|
+
}
|
|
1507
|
+
if (!params.wait) {
|
|
1508
|
+
throw new Error(`openai batch ${params.batchId} still ${state}; wait disabled`);
|
|
1509
|
+
}
|
|
1510
|
+
if (Date.now() - start > params.timeoutMs) {
|
|
1511
|
+
throw new Error(`openai batch ${params.batchId} timed out after ${params.timeoutMs}ms`);
|
|
1512
|
+
}
|
|
1513
|
+
params.debug?.(`openai batch ${params.batchId} ${state}; waiting ${params.pollIntervalMs}ms`);
|
|
1514
|
+
await new Promise((resolve) => setTimeout(resolve, params.pollIntervalMs));
|
|
1515
|
+
current = void 0;
|
|
1516
|
+
}
|
|
1517
|
+
}
|
|
1518
|
+
async function runWithConcurrency(tasks, limit) {
|
|
1519
|
+
if (tasks.length === 0) return [];
|
|
1520
|
+
const resolvedLimit = Math.max(1, Math.min(limit, tasks.length));
|
|
1521
|
+
const results = Array.from({ length: tasks.length });
|
|
1522
|
+
let next = 0;
|
|
1523
|
+
let firstError = null;
|
|
1524
|
+
const workers = Array.from({ length: resolvedLimit }, async () => {
|
|
1525
|
+
while (true) {
|
|
1526
|
+
if (firstError) return;
|
|
1527
|
+
const index = next;
|
|
1528
|
+
next += 1;
|
|
1529
|
+
if (index >= tasks.length) return;
|
|
1530
|
+
try {
|
|
1531
|
+
results[index] = await tasks[index]();
|
|
1532
|
+
} catch (err) {
|
|
1533
|
+
firstError = err;
|
|
1534
|
+
return;
|
|
1535
|
+
}
|
|
1536
|
+
}
|
|
1537
|
+
});
|
|
1538
|
+
await Promise.allSettled(workers);
|
|
1539
|
+
if (firstError) throw firstError;
|
|
1540
|
+
return results;
|
|
1541
|
+
}
|
|
1542
|
+
async function runOpenAiEmbeddingBatches(params) {
|
|
1543
|
+
if (params.requests.length === 0) return /* @__PURE__ */ new Map();
|
|
1544
|
+
const groups = splitOpenAiBatchRequests(params.requests);
|
|
1545
|
+
const byCustomId = /* @__PURE__ */ new Map();
|
|
1546
|
+
const tasks = groups.map((group, groupIndex) => async () => {
|
|
1547
|
+
const batchInfo = await submitOpenAiBatch({
|
|
1548
|
+
openAi: params.openAi,
|
|
1549
|
+
requests: group,
|
|
1550
|
+
source: params.source
|
|
1551
|
+
});
|
|
1552
|
+
if (!batchInfo.id) {
|
|
1553
|
+
throw new Error("openai batch create failed: missing batch id");
|
|
1554
|
+
}
|
|
1555
|
+
params.debug?.("memory embeddings: openai batch created", {
|
|
1556
|
+
batchId: batchInfo.id,
|
|
1557
|
+
status: batchInfo.status,
|
|
1558
|
+
group: groupIndex + 1,
|
|
1559
|
+
groups: groups.length,
|
|
1560
|
+
requests: group.length
|
|
1561
|
+
});
|
|
1562
|
+
if (!params.wait && batchInfo.status !== "completed") {
|
|
1563
|
+
throw new Error(
|
|
1564
|
+
`openai batch ${batchInfo.id} submitted; enable batch.wait to await completion`
|
|
1565
|
+
);
|
|
1566
|
+
}
|
|
1567
|
+
const completed = batchInfo.status === "completed" ? {
|
|
1568
|
+
outputFileId: batchInfo.output_file_id ?? "",
|
|
1569
|
+
errorFileId: batchInfo.error_file_id ?? void 0
|
|
1570
|
+
} : await waitForOpenAiBatch({
|
|
1571
|
+
openAi: params.openAi,
|
|
1572
|
+
batchId: batchInfo.id,
|
|
1573
|
+
wait: params.wait,
|
|
1574
|
+
pollIntervalMs: params.pollIntervalMs,
|
|
1575
|
+
timeoutMs: params.timeoutMs,
|
|
1576
|
+
debug: params.debug,
|
|
1577
|
+
initial: batchInfo
|
|
1578
|
+
});
|
|
1579
|
+
if (!completed.outputFileId) {
|
|
1580
|
+
throw new Error(`openai batch ${batchInfo.id} completed without output file`);
|
|
1581
|
+
}
|
|
1582
|
+
const content = await fetchOpenAiFileContent({
|
|
1583
|
+
openAi: params.openAi,
|
|
1584
|
+
fileId: completed.outputFileId
|
|
1585
|
+
});
|
|
1586
|
+
const outputLines = parseOpenAiBatchOutput(content);
|
|
1587
|
+
const errors = [];
|
|
1588
|
+
const remaining = new Set(group.map((request) => request.custom_id));
|
|
1589
|
+
for (const line of outputLines) {
|
|
1590
|
+
const customId = line.custom_id;
|
|
1591
|
+
if (!customId) continue;
|
|
1592
|
+
remaining.delete(customId);
|
|
1593
|
+
if (line.error?.message) {
|
|
1594
|
+
errors.push(`${customId}: ${line.error.message}`);
|
|
1595
|
+
continue;
|
|
1596
|
+
}
|
|
1597
|
+
const response = line.response;
|
|
1598
|
+
const statusCode = response?.status_code ?? 0;
|
|
1599
|
+
if (statusCode >= 400) {
|
|
1600
|
+
const message = response?.body?.error?.message ?? (typeof response?.body === "string" ? response.body : void 0) ?? "unknown error";
|
|
1601
|
+
errors.push(`${customId}: ${message}`);
|
|
1602
|
+
continue;
|
|
1603
|
+
}
|
|
1604
|
+
const data = response?.body?.data ?? [];
|
|
1605
|
+
const embedding = data[0]?.embedding ?? [];
|
|
1606
|
+
if (embedding.length === 0) {
|
|
1607
|
+
errors.push(`${customId}: empty embedding`);
|
|
1608
|
+
continue;
|
|
1609
|
+
}
|
|
1610
|
+
byCustomId.set(customId, embedding);
|
|
1611
|
+
}
|
|
1612
|
+
if (errors.length > 0) {
|
|
1613
|
+
throw new Error(`openai batch ${batchInfo.id} failed: ${errors.join("; ")}`);
|
|
1614
|
+
}
|
|
1615
|
+
if (remaining.size > 0) {
|
|
1616
|
+
throw new Error(`openai batch ${batchInfo.id} missing ${remaining.size} embedding responses`);
|
|
1617
|
+
}
|
|
1618
|
+
});
|
|
1619
|
+
params.debug?.("memory embeddings: openai batch submit", {
|
|
1620
|
+
requests: params.requests.length,
|
|
1621
|
+
groups: groups.length,
|
|
1622
|
+
wait: params.wait,
|
|
1623
|
+
concurrency: params.concurrency,
|
|
1624
|
+
pollIntervalMs: params.pollIntervalMs,
|
|
1625
|
+
timeoutMs: params.timeoutMs
|
|
1626
|
+
});
|
|
1627
|
+
await runWithConcurrency(tasks, params.concurrency);
|
|
1628
|
+
return byCustomId;
|
|
1629
|
+
}
|
|
1630
|
+
|
|
1631
|
+
// src/embeddings/batch-gemini.ts
|
|
1632
|
+
var GEMINI_BATCH_MAX_REQUESTS = 5e4;
|
|
1633
|
+
function getGeminiBaseUrl(gemini) {
|
|
1634
|
+
return gemini.baseUrl?.replace(/\/$/, "") ?? "";
|
|
1635
|
+
}
|
|
1636
|
+
function getGeminiHeaders(gemini, params) {
|
|
1637
|
+
const headers = gemini.headers ? { ...gemini.headers } : {};
|
|
1638
|
+
if (params.json) {
|
|
1639
|
+
if (!headers["Content-Type"] && !headers["content-type"]) {
|
|
1640
|
+
headers["Content-Type"] = "application/json";
|
|
1641
|
+
}
|
|
1642
|
+
} else {
|
|
1643
|
+
delete headers["Content-Type"];
|
|
1644
|
+
delete headers["content-type"];
|
|
1645
|
+
}
|
|
1646
|
+
return headers;
|
|
1647
|
+
}
|
|
1648
|
+
function getGeminiUploadUrl(baseUrl) {
|
|
1649
|
+
if (baseUrl.includes("/v1beta")) {
|
|
1650
|
+
return baseUrl.replace(/\/v1beta\/?$/, "/upload/v1beta");
|
|
1651
|
+
}
|
|
1652
|
+
return `${baseUrl.replace(/\/$/, "")}/upload`;
|
|
1653
|
+
}
|
|
1654
|
+
function splitGeminiBatchRequests(requests) {
|
|
1655
|
+
if (requests.length <= GEMINI_BATCH_MAX_REQUESTS) return [requests];
|
|
1656
|
+
const groups = [];
|
|
1657
|
+
for (let i = 0; i < requests.length; i += GEMINI_BATCH_MAX_REQUESTS) {
|
|
1658
|
+
groups.push(requests.slice(i, i + GEMINI_BATCH_MAX_REQUESTS));
|
|
1659
|
+
}
|
|
1660
|
+
return groups;
|
|
1661
|
+
}
|
|
1662
|
+
function buildGeminiUploadBody(params) {
|
|
1663
|
+
const boundary = `minimem-${hashText(params.displayName)}`;
|
|
1664
|
+
const jsonPart = JSON.stringify({
|
|
1665
|
+
file: {
|
|
1666
|
+
displayName: params.displayName,
|
|
1667
|
+
mimeType: "application/jsonl"
|
|
1668
|
+
}
|
|
1669
|
+
});
|
|
1670
|
+
const delimiter = `--${boundary}\r
|
|
1671
|
+
`;
|
|
1672
|
+
const closeDelimiter = `--${boundary}--\r
|
|
1673
|
+
`;
|
|
1674
|
+
const parts = [
|
|
1675
|
+
`${delimiter}Content-Type: application/json; charset=UTF-8\r
|
|
1676
|
+
\r
|
|
1677
|
+
${jsonPart}\r
|
|
1678
|
+
`,
|
|
1679
|
+
`${delimiter}Content-Type: application/jsonl; charset=UTF-8\r
|
|
1680
|
+
\r
|
|
1681
|
+
${params.jsonl}\r
|
|
1682
|
+
`,
|
|
1683
|
+
closeDelimiter
|
|
1684
|
+
];
|
|
1685
|
+
const body = new Blob([parts.join("")], { type: "multipart/related" });
|
|
1686
|
+
return {
|
|
1687
|
+
body,
|
|
1688
|
+
contentType: `multipart/related; boundary=${boundary}`
|
|
1689
|
+
};
|
|
1690
|
+
}
|
|
1691
|
+
async function submitGeminiBatch(params) {
|
|
1692
|
+
const baseUrl = getGeminiBaseUrl(params.gemini);
|
|
1693
|
+
const jsonl = params.requests.map(
|
|
1694
|
+
(request) => JSON.stringify({
|
|
1695
|
+
key: request.custom_id,
|
|
1696
|
+
request: {
|
|
1697
|
+
content: request.content,
|
|
1698
|
+
task_type: request.taskType
|
|
1699
|
+
}
|
|
1700
|
+
})
|
|
1701
|
+
).join("\n");
|
|
1702
|
+
const displayName = `memory-embeddings-${hashText(String(Date.now()))}`;
|
|
1703
|
+
const uploadPayload = buildGeminiUploadBody({ jsonl, displayName });
|
|
1704
|
+
const uploadUrl = `${getGeminiUploadUrl(baseUrl)}/files?uploadType=multipart`;
|
|
1705
|
+
const fileRes = await fetch(uploadUrl, {
|
|
1706
|
+
method: "POST",
|
|
1707
|
+
headers: {
|
|
1708
|
+
...getGeminiHeaders(params.gemini, { json: false }),
|
|
1709
|
+
"Content-Type": uploadPayload.contentType
|
|
1710
|
+
},
|
|
1711
|
+
body: uploadPayload.body
|
|
1712
|
+
});
|
|
1713
|
+
if (!fileRes.ok) {
|
|
1714
|
+
const text2 = await fileRes.text();
|
|
1715
|
+
throw new Error(`gemini batch file upload failed: ${fileRes.status} ${text2}`);
|
|
1716
|
+
}
|
|
1717
|
+
const filePayload = await fileRes.json();
|
|
1718
|
+
const fileId = filePayload.name ?? filePayload.file?.name;
|
|
1719
|
+
if (!fileId) {
|
|
1720
|
+
throw new Error("gemini batch file upload failed: missing file id");
|
|
1721
|
+
}
|
|
1722
|
+
const batchBody = {
|
|
1723
|
+
batch: {
|
|
1724
|
+
displayName: `memory-embeddings-${params.source}`,
|
|
1725
|
+
inputConfig: {
|
|
1726
|
+
file_name: fileId
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
};
|
|
1730
|
+
const batchEndpoint = `${baseUrl}/${params.gemini.modelPath}:asyncBatchEmbedContent`;
|
|
1731
|
+
const batchRes = await fetch(batchEndpoint, {
|
|
1732
|
+
method: "POST",
|
|
1733
|
+
headers: getGeminiHeaders(params.gemini, { json: true }),
|
|
1734
|
+
body: JSON.stringify(batchBody)
|
|
1735
|
+
});
|
|
1736
|
+
if (batchRes.ok) {
|
|
1737
|
+
return await batchRes.json();
|
|
1738
|
+
}
|
|
1739
|
+
const text = await batchRes.text();
|
|
1740
|
+
if (batchRes.status === 404) {
|
|
1741
|
+
throw new Error(
|
|
1742
|
+
"gemini batch create failed: 404 (asyncBatchEmbedContent not available for this model/baseUrl). Disable batch.enabled or switch providers."
|
|
1743
|
+
);
|
|
1744
|
+
}
|
|
1745
|
+
throw new Error(`gemini batch create failed: ${batchRes.status} ${text}`);
|
|
1746
|
+
}
|
|
1747
|
+
async function fetchGeminiBatchStatus(params) {
|
|
1748
|
+
const baseUrl = getGeminiBaseUrl(params.gemini);
|
|
1749
|
+
const name = params.batchName.startsWith("batches/") ? params.batchName : `batches/${params.batchName}`;
|
|
1750
|
+
const statusUrl = `${baseUrl}/${name}`;
|
|
1751
|
+
const res = await fetch(statusUrl, {
|
|
1752
|
+
headers: getGeminiHeaders(params.gemini, { json: true })
|
|
1753
|
+
});
|
|
1754
|
+
if (!res.ok) {
|
|
1755
|
+
const text = await res.text();
|
|
1756
|
+
throw new Error(`gemini batch status failed: ${res.status} ${text}`);
|
|
1757
|
+
}
|
|
1758
|
+
return await res.json();
|
|
1759
|
+
}
|
|
1760
|
+
async function fetchGeminiFileContent(params) {
|
|
1761
|
+
const baseUrl = getGeminiBaseUrl(params.gemini);
|
|
1762
|
+
const file = params.fileId.startsWith("files/") ? params.fileId : `files/${params.fileId}`;
|
|
1763
|
+
const downloadUrl = `${baseUrl}/${file}:download`;
|
|
1764
|
+
const res = await fetch(downloadUrl, {
|
|
1765
|
+
headers: getGeminiHeaders(params.gemini, { json: true })
|
|
1766
|
+
});
|
|
1767
|
+
if (!res.ok) {
|
|
1768
|
+
const text = await res.text();
|
|
1769
|
+
throw new Error(`gemini batch file content failed: ${res.status} ${text}`);
|
|
1770
|
+
}
|
|
1771
|
+
return await res.text();
|
|
1772
|
+
}
|
|
1773
|
+
function parseGeminiBatchOutput(text) {
|
|
1774
|
+
if (!text.trim()) return [];
|
|
1775
|
+
return text.split("\n").map((line) => line.trim()).filter(Boolean).map((line) => JSON.parse(line));
|
|
1776
|
+
}
|
|
1777
|
+
async function waitForGeminiBatch(params) {
|
|
1778
|
+
const start = Date.now();
|
|
1779
|
+
let current = params.initial;
|
|
1780
|
+
while (true) {
|
|
1781
|
+
const status = current ?? await fetchGeminiBatchStatus({
|
|
1782
|
+
gemini: params.gemini,
|
|
1783
|
+
batchName: params.batchName
|
|
1784
|
+
});
|
|
1785
|
+
const state = status.state ?? "UNKNOWN";
|
|
1786
|
+
if (["SUCCEEDED", "COMPLETED", "DONE"].includes(state)) {
|
|
1787
|
+
const outputFileId = status.outputConfig?.file ?? status.outputConfig?.fileId ?? status.metadata?.output?.responsesFile;
|
|
1788
|
+
if (!outputFileId) {
|
|
1789
|
+
throw new Error(`gemini batch ${params.batchName} completed without output file`);
|
|
1790
|
+
}
|
|
1791
|
+
return { outputFileId };
|
|
1792
|
+
}
|
|
1793
|
+
if (["FAILED", "CANCELLED", "CANCELED", "EXPIRED"].includes(state)) {
|
|
1794
|
+
const message = status.error?.message ?? "unknown error";
|
|
1795
|
+
throw new Error(`gemini batch ${params.batchName} ${state}: ${message}`);
|
|
1796
|
+
}
|
|
1797
|
+
if (!params.wait) {
|
|
1798
|
+
throw new Error(`gemini batch ${params.batchName} still ${state}; wait disabled`);
|
|
1799
|
+
}
|
|
1800
|
+
if (Date.now() - start > params.timeoutMs) {
|
|
1801
|
+
throw new Error(`gemini batch ${params.batchName} timed out after ${params.timeoutMs}ms`);
|
|
1802
|
+
}
|
|
1803
|
+
params.debug?.(`gemini batch ${params.batchName} ${state}; waiting ${params.pollIntervalMs}ms`);
|
|
1804
|
+
await new Promise((resolve) => setTimeout(resolve, params.pollIntervalMs));
|
|
1805
|
+
current = void 0;
|
|
1806
|
+
}
|
|
1807
|
+
}
|
|
1808
|
+
async function runWithConcurrency2(tasks, limit) {
|
|
1809
|
+
if (tasks.length === 0) return [];
|
|
1810
|
+
const resolvedLimit = Math.max(1, Math.min(limit, tasks.length));
|
|
1811
|
+
const results = Array.from({ length: tasks.length });
|
|
1812
|
+
let next = 0;
|
|
1813
|
+
let firstError = null;
|
|
1814
|
+
const workers = Array.from({ length: resolvedLimit }, async () => {
|
|
1815
|
+
while (true) {
|
|
1816
|
+
if (firstError) return;
|
|
1817
|
+
const index = next;
|
|
1818
|
+
next += 1;
|
|
1819
|
+
if (index >= tasks.length) return;
|
|
1820
|
+
try {
|
|
1821
|
+
results[index] = await tasks[index]();
|
|
1822
|
+
} catch (err) {
|
|
1823
|
+
firstError = err;
|
|
1824
|
+
return;
|
|
1825
|
+
}
|
|
1826
|
+
}
|
|
1827
|
+
});
|
|
1828
|
+
await Promise.allSettled(workers);
|
|
1829
|
+
if (firstError) throw firstError;
|
|
1830
|
+
return results;
|
|
1831
|
+
}
|
|
1832
|
+
async function runGeminiEmbeddingBatches(params) {
|
|
1833
|
+
if (params.requests.length === 0) return /* @__PURE__ */ new Map();
|
|
1834
|
+
const groups = splitGeminiBatchRequests(params.requests);
|
|
1835
|
+
const byCustomId = /* @__PURE__ */ new Map();
|
|
1836
|
+
const tasks = groups.map((group, groupIndex) => async () => {
|
|
1837
|
+
const batchInfo = await submitGeminiBatch({
|
|
1838
|
+
gemini: params.gemini,
|
|
1839
|
+
requests: group,
|
|
1840
|
+
source: params.source
|
|
1841
|
+
});
|
|
1842
|
+
const batchName = batchInfo.name ?? "";
|
|
1843
|
+
if (!batchName) {
|
|
1844
|
+
throw new Error("gemini batch create failed: missing batch name");
|
|
1845
|
+
}
|
|
1846
|
+
params.debug?.("memory embeddings: gemini batch created", {
|
|
1847
|
+
batchName,
|
|
1848
|
+
state: batchInfo.state,
|
|
1849
|
+
group: groupIndex + 1,
|
|
1850
|
+
groups: groups.length,
|
|
1851
|
+
requests: group.length
|
|
1852
|
+
});
|
|
1853
|
+
if (!params.wait && batchInfo.state && !["SUCCEEDED", "COMPLETED", "DONE"].includes(batchInfo.state)) {
|
|
1854
|
+
throw new Error(
|
|
1855
|
+
`gemini batch ${batchName} submitted; enable batch.wait to await completion`
|
|
1856
|
+
);
|
|
1857
|
+
}
|
|
1858
|
+
const completed = batchInfo.state && ["SUCCEEDED", "COMPLETED", "DONE"].includes(batchInfo.state) ? {
|
|
1859
|
+
outputFileId: batchInfo.outputConfig?.file ?? batchInfo.outputConfig?.fileId ?? batchInfo.metadata?.output?.responsesFile ?? ""
|
|
1860
|
+
} : await waitForGeminiBatch({
|
|
1861
|
+
gemini: params.gemini,
|
|
1862
|
+
batchName,
|
|
1863
|
+
wait: params.wait,
|
|
1864
|
+
pollIntervalMs: params.pollIntervalMs,
|
|
1865
|
+
timeoutMs: params.timeoutMs,
|
|
1866
|
+
debug: params.debug,
|
|
1867
|
+
initial: batchInfo
|
|
1868
|
+
});
|
|
1869
|
+
if (!completed.outputFileId) {
|
|
1870
|
+
throw new Error(`gemini batch ${batchName} completed without output file`);
|
|
1871
|
+
}
|
|
1872
|
+
const content = await fetchGeminiFileContent({
|
|
1873
|
+
gemini: params.gemini,
|
|
1874
|
+
fileId: completed.outputFileId
|
|
1875
|
+
});
|
|
1876
|
+
const outputLines = parseGeminiBatchOutput(content);
|
|
1877
|
+
const errors = [];
|
|
1878
|
+
const remaining = new Set(group.map((request) => request.custom_id));
|
|
1879
|
+
for (const line of outputLines) {
|
|
1880
|
+
const customId = line.key ?? line.custom_id ?? line.request_id;
|
|
1881
|
+
if (!customId) continue;
|
|
1882
|
+
remaining.delete(customId);
|
|
1883
|
+
if (line.error?.message) {
|
|
1884
|
+
errors.push(`${customId}: ${line.error.message}`);
|
|
1885
|
+
continue;
|
|
1886
|
+
}
|
|
1887
|
+
if (line.response?.error?.message) {
|
|
1888
|
+
errors.push(`${customId}: ${line.response.error.message}`);
|
|
1889
|
+
continue;
|
|
1890
|
+
}
|
|
1891
|
+
const embedding = line.embedding?.values ?? line.response?.embedding?.values ?? [];
|
|
1892
|
+
if (embedding.length === 0) {
|
|
1893
|
+
errors.push(`${customId}: empty embedding`);
|
|
1894
|
+
continue;
|
|
1895
|
+
}
|
|
1896
|
+
byCustomId.set(customId, embedding);
|
|
1897
|
+
}
|
|
1898
|
+
if (errors.length > 0) {
|
|
1899
|
+
throw new Error(`gemini batch ${batchName} failed: ${errors.join("; ")}`);
|
|
1900
|
+
}
|
|
1901
|
+
if (remaining.size > 0) {
|
|
1902
|
+
throw new Error(`gemini batch ${batchName} missing ${remaining.size} embedding responses`);
|
|
1903
|
+
}
|
|
1904
|
+
});
|
|
1905
|
+
params.debug?.("memory embeddings: gemini batch submit", {
|
|
1906
|
+
requests: params.requests.length,
|
|
1907
|
+
groups: groups.length,
|
|
1908
|
+
wait: params.wait,
|
|
1909
|
+
concurrency: params.concurrency,
|
|
1910
|
+
pollIntervalMs: params.pollIntervalMs,
|
|
1911
|
+
timeoutMs: params.timeoutMs
|
|
1912
|
+
});
|
|
1913
|
+
await runWithConcurrency2(tasks, params.concurrency);
|
|
1914
|
+
return byCustomId;
|
|
1915
|
+
}
|
|
1916
|
+
|
|
1917
|
+
// src/minimem.ts
|
|
1918
|
+
function resolveMinimemSubdir(memoryDir) {
|
|
1919
|
+
const envDir = process.env.MINIMEM_CONFIG_DIR;
|
|
1920
|
+
if (envDir) return envDir;
|
|
1921
|
+
if (import_node_fs3.default.existsSync(import_node_path3.default.join(memoryDir, "config.json"))) return ".";
|
|
1922
|
+
const swarmDir = import_node_path3.default.join(memoryDir, ".swarm", "minimem");
|
|
1923
|
+
if (import_node_fs3.default.existsSync(import_node_path3.default.join(swarmDir, "config.json"))) return import_node_path3.default.join(".swarm", "minimem");
|
|
1924
|
+
return ".minimem";
|
|
1925
|
+
}
|
|
1926
|
+
var META_KEY = "memory_index_meta_v1";
|
|
1927
|
+
var SNIPPET_MAX_CHARS = 700;
|
|
1928
|
+
var VECTOR_TABLE = "chunks_vec";
|
|
1929
|
+
var FTS_TABLE = "chunks_fts";
|
|
1930
|
+
var EMBEDDING_CACHE_TABLE = "embedding_cache";
|
|
1931
|
+
var EMBEDDING_RETRY_MAX_ATTEMPTS = 3;
|
|
1932
|
+
var EMBEDDING_RETRY_BASE_DELAY_MS = 500;
|
|
1933
|
+
var EMBEDDING_RETRY_MAX_DELAY_MS = 8e3;
|
|
1934
|
+
var EMBEDDING_QUERY_TIMEOUT_REMOTE_MS = 6e4;
|
|
1935
|
+
var EMBEDDING_QUERY_TIMEOUT_LOCAL_MS = 5 * 6e4;
|
|
1936
|
+
var Minimem = class _Minimem {
|
|
1937
|
+
memoryDir;
|
|
1938
|
+
dbPath;
|
|
1939
|
+
chunking;
|
|
1940
|
+
cache;
|
|
1941
|
+
hybrid;
|
|
1942
|
+
queryConfig;
|
|
1943
|
+
watchConfig;
|
|
1944
|
+
batchConfig;
|
|
1945
|
+
vectorExtensionPath;
|
|
1946
|
+
debug;
|
|
1947
|
+
provider;
|
|
1948
|
+
openAi;
|
|
1949
|
+
gemini;
|
|
1950
|
+
providerKey = "";
|
|
1951
|
+
providerFallbackReason;
|
|
1952
|
+
db;
|
|
1953
|
+
vector;
|
|
1954
|
+
fts;
|
|
1955
|
+
vectorReady = null;
|
|
1956
|
+
watcher = null;
|
|
1957
|
+
watchTimer = null;
|
|
1958
|
+
closed = false;
|
|
1959
|
+
dirty = true;
|
|
1960
|
+
syncing = null;
|
|
1961
|
+
syncLock = false;
|
|
1962
|
+
embeddingOptions;
|
|
1963
|
+
constructor(config) {
|
|
1964
|
+
this.memoryDir = import_node_path3.default.resolve(config.memoryDir);
|
|
1965
|
+
this.dbPath = config.dbPath ?? import_node_path3.default.join(this.memoryDir, resolveMinimemSubdir(this.memoryDir), "index.db");
|
|
1966
|
+
this.chunking = {
|
|
1967
|
+
tokens: config.chunking?.tokens ?? 256,
|
|
1968
|
+
overlap: config.chunking?.overlap ?? 32
|
|
1969
|
+
};
|
|
1970
|
+
this.cache = {
|
|
1971
|
+
enabled: config.cache?.enabled ?? true,
|
|
1972
|
+
maxEntries: config.cache?.maxEntries ?? 1e4
|
|
1973
|
+
};
|
|
1974
|
+
this.hybrid = {
|
|
1975
|
+
enabled: config.hybrid?.enabled ?? true,
|
|
1976
|
+
vectorWeight: config.hybrid?.vectorWeight ?? 0.7,
|
|
1977
|
+
textWeight: config.hybrid?.textWeight ?? 0.3,
|
|
1978
|
+
candidateMultiplier: config.hybrid?.candidateMultiplier ?? 2
|
|
1979
|
+
};
|
|
1980
|
+
this.queryConfig = {
|
|
1981
|
+
maxResults: config.query?.maxResults ?? 10,
|
|
1982
|
+
minScore: config.query?.minScore ?? 0.3
|
|
1983
|
+
};
|
|
1984
|
+
this.watchConfig = {
|
|
1985
|
+
enabled: config.watch?.enabled ?? true,
|
|
1986
|
+
debounceMs: config.watch?.debounceMs ?? 1e3
|
|
1987
|
+
};
|
|
1988
|
+
this.batchConfig = {
|
|
1989
|
+
enabled: config.batch?.enabled ?? false,
|
|
1990
|
+
wait: config.batch?.wait ?? true,
|
|
1991
|
+
concurrency: config.batch?.concurrency ?? 2,
|
|
1992
|
+
pollIntervalMs: config.batch?.pollIntervalMs ?? 2e3,
|
|
1993
|
+
timeoutMs: config.batch?.timeoutMs ?? 60 * 60 * 1e3
|
|
1994
|
+
};
|
|
1995
|
+
this.vectorExtensionPath = config.vectorExtensionPath;
|
|
1996
|
+
this.debug = config.debug;
|
|
1997
|
+
this.embeddingOptions = config.embedding;
|
|
1998
|
+
this.vector = {
|
|
1999
|
+
enabled: true,
|
|
2000
|
+
available: null,
|
|
2001
|
+
extensionPath: this.vectorExtensionPath
|
|
2002
|
+
};
|
|
2003
|
+
this.fts = { enabled: this.hybrid.enabled, available: false };
|
|
2004
|
+
}
|
|
2005
|
+
static async create(config) {
|
|
2006
|
+
const instance = new _Minimem(config);
|
|
2007
|
+
await instance.initialize();
|
|
2008
|
+
return instance;
|
|
2009
|
+
}
|
|
2010
|
+
async initialize() {
|
|
2011
|
+
const providerResult = await createEmbeddingProvider(this.embeddingOptions);
|
|
2012
|
+
this.provider = providerResult.provider;
|
|
2013
|
+
this.openAi = providerResult.openAi;
|
|
2014
|
+
this.gemini = providerResult.gemini;
|
|
2015
|
+
this.providerKey = this.computeProviderKey();
|
|
2016
|
+
this.providerFallbackReason = providerResult.fallbackReason;
|
|
2017
|
+
if (this.provider.id === "none") {
|
|
2018
|
+
this.debug?.("Running in BM25-only mode (no embedding API available)");
|
|
2019
|
+
}
|
|
2020
|
+
this.db = this.openDatabase();
|
|
2021
|
+
this.ensureSchema();
|
|
2022
|
+
const meta = this.readMeta();
|
|
2023
|
+
if (meta?.vectorDims) {
|
|
2024
|
+
this.vector.dims = meta.vectorDims;
|
|
2025
|
+
}
|
|
2026
|
+
if (this.watchConfig.enabled) {
|
|
2027
|
+
this.ensureWatcher();
|
|
2028
|
+
}
|
|
2029
|
+
}
|
|
2030
|
+
openDatabase() {
|
|
2031
|
+
const dbDir = import_node_path3.default.dirname(this.dbPath);
|
|
2032
|
+
ensureDir(dbDir);
|
|
2033
|
+
return new import_node_sqlite.DatabaseSync(this.dbPath);
|
|
2034
|
+
}
|
|
2035
|
+
ensureSchema() {
|
|
2036
|
+
const result = ensureMemoryIndexSchema({
|
|
2037
|
+
db: this.db,
|
|
2038
|
+
embeddingCacheTable: EMBEDDING_CACHE_TABLE,
|
|
2039
|
+
ftsTable: FTS_TABLE,
|
|
2040
|
+
ftsEnabled: this.fts.enabled
|
|
2041
|
+
});
|
|
2042
|
+
this.fts.available = result.ftsAvailable;
|
|
2043
|
+
if (result.ftsError) {
|
|
2044
|
+
this.fts.loadError = result.ftsError;
|
|
2045
|
+
}
|
|
2046
|
+
}
|
|
2047
|
+
computeProviderKey() {
|
|
2048
|
+
const parts = [this.provider.id, this.provider.model];
|
|
2049
|
+
if (this.openAi) {
|
|
2050
|
+
parts.push(this.openAi.baseUrl);
|
|
2051
|
+
}
|
|
2052
|
+
if (this.gemini) {
|
|
2053
|
+
parts.push(this.gemini.baseUrl);
|
|
2054
|
+
}
|
|
2055
|
+
return hashText(parts.join(":"));
|
|
2056
|
+
}
|
|
2057
|
+
readMeta() {
|
|
2058
|
+
try {
|
|
2059
|
+
const row = this.db.prepare(`SELECT value FROM meta WHERE key = ?`).get(META_KEY);
|
|
2060
|
+
if (!row?.value) return null;
|
|
2061
|
+
return JSON.parse(row.value);
|
|
2062
|
+
} catch {
|
|
2063
|
+
return null;
|
|
2064
|
+
}
|
|
2065
|
+
}
|
|
2066
|
+
writeMeta(meta) {
|
|
2067
|
+
this.db.prepare(`INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)`).run(META_KEY, JSON.stringify(meta));
|
|
2068
|
+
}
|
|
2069
|
+
ensureWatcher() {
|
|
2070
|
+
if (this.watcher) return;
|
|
2071
|
+
const memorySubDir = import_node_path3.default.join(this.memoryDir, "memory");
|
|
2072
|
+
const memoryFile = import_node_path3.default.join(this.memoryDir, "MEMORY.md");
|
|
2073
|
+
this.watcher = import_chokidar.default.watch([memoryFile, memorySubDir], {
|
|
2074
|
+
ignoreInitial: true,
|
|
2075
|
+
persistent: true,
|
|
2076
|
+
awaitWriteFinish: { stabilityThreshold: 200, pollInterval: 50 }
|
|
2077
|
+
});
|
|
2078
|
+
const scheduleSync = () => {
|
|
2079
|
+
this.dirty = true;
|
|
2080
|
+
if (this.watchTimer) clearTimeout(this.watchTimer);
|
|
2081
|
+
this.watchTimer = setTimeout(() => {
|
|
2082
|
+
void this.sync({ reason: "watch" }).catch((err) => {
|
|
2083
|
+
this.debug?.(`memory sync failed (watch): ${String(err)}`);
|
|
2084
|
+
});
|
|
2085
|
+
}, this.watchConfig.debounceMs);
|
|
2086
|
+
};
|
|
2087
|
+
this.watcher.on("add", scheduleSync);
|
|
2088
|
+
this.watcher.on("change", scheduleSync);
|
|
2089
|
+
this.watcher.on("unlink", scheduleSync);
|
|
2090
|
+
}
|
|
2091
|
+
/**
|
|
2092
|
+
* Check if the index is stale by comparing file mtimes against stored values.
|
|
2093
|
+
* This is a lightweight check (stat calls only, no file reads).
|
|
2094
|
+
*/
|
|
2095
|
+
async isStale() {
|
|
2096
|
+
try {
|
|
2097
|
+
const files = await listMemoryFiles(this.memoryDir);
|
|
2098
|
+
const stored = this.db.prepare(`SELECT path, mtime FROM files WHERE source = ?`).all("memory");
|
|
2099
|
+
if (files.length !== stored.length) {
|
|
2100
|
+
this.debug?.(`Stale: file count changed (${stored.length} -> ${files.length})`);
|
|
2101
|
+
return true;
|
|
2102
|
+
}
|
|
2103
|
+
const storedMap = new Map(stored.map((f) => [f.path, f.mtime]));
|
|
2104
|
+
for (const absPath of files) {
|
|
2105
|
+
const relPath = import_node_path3.default.relative(this.memoryDir, absPath).replace(/\\/g, "/");
|
|
2106
|
+
const storedMtime = storedMap.get(relPath);
|
|
2107
|
+
if (storedMtime === void 0) {
|
|
2108
|
+
this.debug?.(`Stale: new file ${relPath}`);
|
|
2109
|
+
return true;
|
|
2110
|
+
}
|
|
2111
|
+
const stat = await import_promises2.default.stat(absPath);
|
|
2112
|
+
const currentMtime = Math.floor(stat.mtimeMs);
|
|
2113
|
+
if (currentMtime !== storedMtime) {
|
|
2114
|
+
this.debug?.(`Stale: mtime changed for ${relPath}`);
|
|
2115
|
+
return true;
|
|
2116
|
+
}
|
|
2117
|
+
}
|
|
2118
|
+
return false;
|
|
2119
|
+
} catch (err) {
|
|
2120
|
+
this.debug?.(`Stale check failed: ${String(err)}`);
|
|
2121
|
+
return true;
|
|
2122
|
+
}
|
|
2123
|
+
}
|
|
2124
|
+
async search(query, opts) {
|
|
2125
|
+
if (this.dirty || !this.watchConfig.enabled && await this.isStale()) {
|
|
2126
|
+
await this.sync({ reason: "search" });
|
|
2127
|
+
}
|
|
2128
|
+
const cleaned = query.trim();
|
|
2129
|
+
if (!cleaned) return [];
|
|
2130
|
+
const minScore = opts?.minScore ?? this.queryConfig.minScore;
|
|
2131
|
+
const maxResults = opts?.maxResults ?? this.queryConfig.maxResults;
|
|
2132
|
+
const candidates = Math.min(
|
|
2133
|
+
200,
|
|
2134
|
+
Math.max(1, Math.floor(maxResults * this.hybrid.candidateMultiplier))
|
|
2135
|
+
);
|
|
2136
|
+
const sourceFilter = { sql: "", params: [] };
|
|
2137
|
+
const keywordResults = this.hybrid.enabled && this.fts.available ? await searchKeyword({
|
|
2138
|
+
db: this.db,
|
|
2139
|
+
ftsTable: FTS_TABLE,
|
|
2140
|
+
providerModel: this.provider.model,
|
|
2141
|
+
query: cleaned,
|
|
2142
|
+
limit: candidates,
|
|
2143
|
+
snippetMaxChars: SNIPPET_MAX_CHARS,
|
|
2144
|
+
sourceFilter,
|
|
2145
|
+
buildFtsQuery,
|
|
2146
|
+
bm25RankToScore
|
|
2147
|
+
}).catch(() => []) : [];
|
|
2148
|
+
const queryVec = await this.embedQueryWithTimeout(cleaned);
|
|
2149
|
+
const hasVector = queryVec.some((v) => v !== 0);
|
|
2150
|
+
const vectorResults = hasVector ? await searchVector({
|
|
2151
|
+
db: this.db,
|
|
2152
|
+
vectorTable: VECTOR_TABLE,
|
|
2153
|
+
providerModel: this.provider.model,
|
|
2154
|
+
queryVec,
|
|
2155
|
+
limit: candidates,
|
|
2156
|
+
snippetMaxChars: SNIPPET_MAX_CHARS,
|
|
2157
|
+
ensureVectorReady: (dims) => this.ensureVectorReady(dims),
|
|
2158
|
+
sourceFilterVec: sourceFilter,
|
|
2159
|
+
sourceFilterChunks: sourceFilter
|
|
2160
|
+
}).catch(() => []) : [];
|
|
2161
|
+
const typeFilterFn = opts?.type ? (id) => {
|
|
2162
|
+
const row = this.db.prepare(`SELECT type FROM chunks WHERE id = ?`).get(id);
|
|
2163
|
+
return row?.type === opts.type;
|
|
2164
|
+
} : void 0;
|
|
2165
|
+
if (!this.hybrid.enabled) {
|
|
2166
|
+
let results = vectorResults;
|
|
2167
|
+
if (typeFilterFn) results = results.filter((r) => typeFilterFn(r.id));
|
|
2168
|
+
return results.filter((entry) => entry.score >= minScore).slice(0, maxResults).map((r) => ({
|
|
2169
|
+
path: r.path,
|
|
2170
|
+
startLine: r.startLine,
|
|
2171
|
+
endLine: r.endLine,
|
|
2172
|
+
score: r.score,
|
|
2173
|
+
snippet: r.snippet
|
|
2174
|
+
}));
|
|
2175
|
+
}
|
|
2176
|
+
let filteredVector = vectorResults;
|
|
2177
|
+
let filteredKeyword = keywordResults;
|
|
2178
|
+
if (typeFilterFn) {
|
|
2179
|
+
filteredVector = vectorResults.filter((r) => typeFilterFn(r.id));
|
|
2180
|
+
filteredKeyword = keywordResults.filter((r) => typeFilterFn(r.id));
|
|
2181
|
+
}
|
|
2182
|
+
const merged = mergeHybridResults({
|
|
2183
|
+
vector: filteredVector.map((r) => ({
|
|
2184
|
+
id: r.id,
|
|
2185
|
+
path: r.path,
|
|
2186
|
+
startLine: r.startLine,
|
|
2187
|
+
endLine: r.endLine,
|
|
2188
|
+
source: r.source,
|
|
2189
|
+
snippet: r.snippet,
|
|
2190
|
+
vectorScore: r.score
|
|
2191
|
+
})),
|
|
2192
|
+
keyword: filteredKeyword.map((r) => ({
|
|
2193
|
+
id: r.id,
|
|
2194
|
+
path: r.path,
|
|
2195
|
+
startLine: r.startLine,
|
|
2196
|
+
endLine: r.endLine,
|
|
2197
|
+
source: r.source,
|
|
2198
|
+
snippet: r.snippet,
|
|
2199
|
+
textScore: r.textScore
|
|
2200
|
+
})),
|
|
2201
|
+
vectorWeight: this.hybrid.vectorWeight,
|
|
2202
|
+
textWeight: this.hybrid.textWeight
|
|
2203
|
+
});
|
|
2204
|
+
return merged.filter((entry) => entry.score >= minScore).slice(0, maxResults).map((r) => ({
|
|
2205
|
+
path: r.path,
|
|
2206
|
+
startLine: r.startLine,
|
|
2207
|
+
endLine: r.endLine,
|
|
2208
|
+
score: r.score,
|
|
2209
|
+
snippet: r.snippet
|
|
2210
|
+
}));
|
|
2211
|
+
}
|
|
2212
|
+
async sync(opts) {
|
|
2213
|
+
if (this.syncing) {
|
|
2214
|
+
await this.syncing;
|
|
2215
|
+
return;
|
|
2216
|
+
}
|
|
2217
|
+
if (this.syncLock) {
|
|
2218
|
+
return;
|
|
2219
|
+
}
|
|
2220
|
+
this.syncLock = true;
|
|
2221
|
+
this.syncing = this.runSync(opts);
|
|
2222
|
+
try {
|
|
2223
|
+
await this.syncing;
|
|
2224
|
+
} finally {
|
|
2225
|
+
this.syncing = null;
|
|
2226
|
+
this.syncLock = false;
|
|
2227
|
+
}
|
|
2228
|
+
}
|
|
2229
|
+
async runSync(opts) {
|
|
2230
|
+
this.debug?.(`memory sync starting`, { reason: opts?.reason });
|
|
2231
|
+
await this.ensureVectorReady();
|
|
2232
|
+
const meta = this.readMeta();
|
|
2233
|
+
const needsFullReindex = opts?.force || !meta || meta.model !== this.provider.model || meta.provider !== this.provider.id || meta.providerKey !== this.providerKey || meta.chunkTokens !== this.chunking.tokens || meta.chunkOverlap !== this.chunking.overlap || this.vector.available && !meta?.vectorDims;
|
|
2234
|
+
const files = await listMemoryFiles(this.memoryDir);
|
|
2235
|
+
const activePaths = /* @__PURE__ */ new Set();
|
|
2236
|
+
for (const absPath of files) {
|
|
2237
|
+
const entry = await buildFileEntry(absPath, this.memoryDir);
|
|
2238
|
+
activePaths.add(entry.path);
|
|
2239
|
+
const record = this.db.prepare(`SELECT hash FROM files WHERE path = ? AND source = ?`).get(entry.path, "memory");
|
|
2240
|
+
if (!needsFullReindex && record?.hash === entry.hash) {
|
|
2241
|
+
continue;
|
|
2242
|
+
}
|
|
2243
|
+
await this.indexFile(entry);
|
|
2244
|
+
}
|
|
2245
|
+
const staleRows = this.db.prepare(`SELECT path FROM files WHERE source = ?`).all("memory");
|
|
2246
|
+
for (const stale of staleRows) {
|
|
2247
|
+
if (activePaths.has(stale.path)) continue;
|
|
2248
|
+
this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(stale.path, "memory");
|
|
2249
|
+
try {
|
|
2250
|
+
this.db.prepare(
|
|
2251
|
+
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`
|
|
2252
|
+
).run(stale.path, "memory");
|
|
2253
|
+
} catch (err) {
|
|
2254
|
+
logError("deleteStaleVectorEntries", err, this.debug);
|
|
2255
|
+
}
|
|
2256
|
+
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(stale.path, "memory");
|
|
2257
|
+
this.db.prepare(`DELETE FROM knowledge_links WHERE source_path = ?`).run(stale.path);
|
|
2258
|
+
if (this.fts.enabled && this.fts.available) {
|
|
2259
|
+
try {
|
|
2260
|
+
this.db.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`).run(stale.path, "memory", this.provider.model);
|
|
2261
|
+
} catch (err) {
|
|
2262
|
+
logError("deleteStaleFtsEntries", err, this.debug);
|
|
2263
|
+
}
|
|
2264
|
+
}
|
|
2265
|
+
}
|
|
2266
|
+
this.writeMeta({
|
|
2267
|
+
model: this.provider.model,
|
|
2268
|
+
provider: this.provider.id,
|
|
2269
|
+
providerKey: this.providerKey,
|
|
2270
|
+
chunkTokens: this.chunking.tokens,
|
|
2271
|
+
chunkOverlap: this.chunking.overlap,
|
|
2272
|
+
vectorDims: this.vector.dims
|
|
2273
|
+
});
|
|
2274
|
+
this.pruneEmbeddingCacheIfNeeded();
|
|
2275
|
+
this.dirty = false;
|
|
2276
|
+
this.debug?.(`memory sync complete`, { files: files.length });
|
|
2277
|
+
}
|
|
2278
|
+
async indexFile(entry) {
|
|
2279
|
+
const content = await import_promises2.default.readFile(entry.absPath, "utf-8");
|
|
2280
|
+
const chunks = chunkMarkdown(content, this.chunking);
|
|
2281
|
+
const { frontmatter } = parseFrontmatter(content);
|
|
2282
|
+
const knowledgeType = frontmatter?.type ?? null;
|
|
2283
|
+
const knowledgeId = frontmatter?.id ?? null;
|
|
2284
|
+
const domains = frontmatter?.domain ?? null;
|
|
2285
|
+
const entities = frontmatter?.entities ?? null;
|
|
2286
|
+
const confidence = frontmatter?.confidence ?? null;
|
|
2287
|
+
const links = frontmatter?.links ?? null;
|
|
2288
|
+
const embeddings = await this.embedChunks(chunks);
|
|
2289
|
+
this.db.prepare(
|
|
2290
|
+
`INSERT OR REPLACE INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)`
|
|
2291
|
+
).run(entry.path, "memory", entry.hash, Math.floor(entry.mtimeMs), entry.size);
|
|
2292
|
+
try {
|
|
2293
|
+
this.db.prepare(
|
|
2294
|
+
`DELETE FROM ${VECTOR_TABLE} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`
|
|
2295
|
+
).run(entry.path, "memory");
|
|
2296
|
+
} catch (err) {
|
|
2297
|
+
logError("deleteOldVectorChunks", err, this.debug);
|
|
2298
|
+
}
|
|
2299
|
+
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(entry.path, "memory");
|
|
2300
|
+
if (this.fts.enabled && this.fts.available) {
|
|
2301
|
+
try {
|
|
2302
|
+
this.db.prepare(`DELETE FROM ${FTS_TABLE} WHERE path = ? AND source = ? AND model = ?`).run(entry.path, "memory", this.provider.model);
|
|
2303
|
+
} catch (err) {
|
|
2304
|
+
logError("deleteOldFtsChunks", err, this.debug);
|
|
2305
|
+
}
|
|
2306
|
+
}
|
|
2307
|
+
this.db.prepare(`DELETE FROM knowledge_links WHERE source_path = ?`).run(entry.path);
|
|
2308
|
+
const now = Date.now();
|
|
2309
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
2310
|
+
const chunk = chunks[i];
|
|
2311
|
+
const embedding = embeddings[i] ?? [];
|
|
2312
|
+
const chunkId = (0, import_node_crypto2.randomUUID)();
|
|
2313
|
+
const meta = extractChunkMetadata(chunk.text);
|
|
2314
|
+
this.db.prepare(
|
|
2315
|
+
`INSERT INTO chunks (id, path, source, start_line, end_line, hash, model, text, embedding, updated_at, type, knowledge_type, knowledge_id, domains, entities, confidence)
|
|
2316
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
2317
|
+
).run(
|
|
2318
|
+
chunkId,
|
|
2319
|
+
entry.path,
|
|
2320
|
+
"memory",
|
|
2321
|
+
chunk.startLine,
|
|
2322
|
+
chunk.endLine,
|
|
2323
|
+
chunk.hash,
|
|
2324
|
+
this.provider.model,
|
|
2325
|
+
chunk.text,
|
|
2326
|
+
JSON.stringify(embedding),
|
|
2327
|
+
now,
|
|
2328
|
+
meta.type ?? null,
|
|
2329
|
+
knowledgeType,
|
|
2330
|
+
knowledgeId,
|
|
2331
|
+
domains ? JSON.stringify(domains) : null,
|
|
2332
|
+
entities ? JSON.stringify(entities) : null,
|
|
2333
|
+
confidence
|
|
2334
|
+
);
|
|
2335
|
+
if (this.vector.available && embedding.length > 0) {
|
|
2336
|
+
if (!this.vector.dims) {
|
|
2337
|
+
this.vector.dims = embedding.length;
|
|
2338
|
+
this.ensureVectorTable(embedding.length);
|
|
2339
|
+
}
|
|
2340
|
+
try {
|
|
2341
|
+
this.db.prepare(`INSERT INTO ${VECTOR_TABLE} (id, embedding) VALUES (?, ?)`).run(chunkId, vectorToBlob(embedding));
|
|
2342
|
+
} catch (err) {
|
|
2343
|
+
logError("insertVectorChunk", err, this.debug);
|
|
2344
|
+
}
|
|
2345
|
+
}
|
|
2346
|
+
if (this.fts.enabled && this.fts.available) {
|
|
2347
|
+
try {
|
|
2348
|
+
this.db.prepare(
|
|
2349
|
+
`INSERT INTO ${FTS_TABLE} (text, id, path, source, model, start_line, end_line)
|
|
2350
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
2351
|
+
).run(
|
|
2352
|
+
chunk.text,
|
|
2353
|
+
chunkId,
|
|
2354
|
+
entry.path,
|
|
2355
|
+
"memory",
|
|
2356
|
+
this.provider.model,
|
|
2357
|
+
chunk.startLine,
|
|
2358
|
+
chunk.endLine
|
|
2359
|
+
);
|
|
2360
|
+
} catch (err) {
|
|
2361
|
+
logError("insertFtsChunk", err, this.debug);
|
|
2362
|
+
}
|
|
2363
|
+
}
|
|
2364
|
+
}
|
|
2365
|
+
if (links && knowledgeId) {
|
|
2366
|
+
const upsertLink = this.db.prepare(
|
|
2367
|
+
`INSERT OR REPLACE INTO knowledge_links (from_id, to_id, relation, layer, weight, source_path, created_at)
|
|
2368
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
2369
|
+
);
|
|
2370
|
+
for (const link of links) {
|
|
2371
|
+
upsertLink.run(
|
|
2372
|
+
knowledgeId,
|
|
2373
|
+
link.target,
|
|
2374
|
+
link.relation,
|
|
2375
|
+
link.layer ?? null,
|
|
2376
|
+
0.5,
|
|
2377
|
+
entry.path,
|
|
2378
|
+
now
|
|
2379
|
+
);
|
|
2380
|
+
}
|
|
2381
|
+
}
|
|
2382
|
+
}
|
|
2383
|
+
async embedChunks(chunks) {
|
|
2384
|
+
if (chunks.length === 0) return [];
|
|
2385
|
+
const hashes = chunks.map((c) => c.hash);
|
|
2386
|
+
const cached = this.loadEmbeddingCache(hashes);
|
|
2387
|
+
const missing = [];
|
|
2388
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
2389
|
+
if (!cached.has(hashes[i])) {
|
|
2390
|
+
missing.push({ index: i, chunk: chunks[i] });
|
|
2391
|
+
}
|
|
2392
|
+
}
|
|
2393
|
+
if (missing.length > 0) {
|
|
2394
|
+
const texts = missing.map((m) => m.chunk.text);
|
|
2395
|
+
const newEmbeddings = await this.embedBatchWithRetry(texts);
|
|
2396
|
+
for (let i = 0; i < missing.length; i++) {
|
|
2397
|
+
const hash = missing[i].chunk.hash;
|
|
2398
|
+
const embedding = newEmbeddings[i] ?? [];
|
|
2399
|
+
cached.set(hash, embedding);
|
|
2400
|
+
this.upsertEmbeddingCache(hash, embedding);
|
|
2401
|
+
}
|
|
2402
|
+
}
|
|
2403
|
+
return hashes.map((h) => cached.get(h) ?? []);
|
|
2404
|
+
}
|
|
2405
|
+
async embedBatchWithRetry(texts) {
|
|
2406
|
+
if (texts.length === 0) return [];
|
|
2407
|
+
if (this.batchConfig.enabled) {
|
|
2408
|
+
try {
|
|
2409
|
+
return await this.embedWithBatchApi(texts);
|
|
2410
|
+
} catch (err) {
|
|
2411
|
+
this.debug?.(`batch embedding failed, falling back to direct: ${String(err)}`);
|
|
2412
|
+
}
|
|
2413
|
+
}
|
|
2414
|
+
let lastError = null;
|
|
2415
|
+
for (let attempt = 0; attempt < EMBEDDING_RETRY_MAX_ATTEMPTS; attempt++) {
|
|
2416
|
+
try {
|
|
2417
|
+
return await this.provider.embedBatch(texts);
|
|
2418
|
+
} catch (err) {
|
|
2419
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
2420
|
+
if (attempt < EMBEDDING_RETRY_MAX_ATTEMPTS - 1) {
|
|
2421
|
+
const delay = Math.min(
|
|
2422
|
+
EMBEDDING_RETRY_MAX_DELAY_MS,
|
|
2423
|
+
EMBEDDING_RETRY_BASE_DELAY_MS * Math.pow(2, attempt)
|
|
2424
|
+
);
|
|
2425
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
2426
|
+
}
|
|
2427
|
+
}
|
|
2428
|
+
}
|
|
2429
|
+
throw lastError;
|
|
2430
|
+
}
|
|
2431
|
+
async embedWithBatchApi(texts) {
|
|
2432
|
+
if (this.openAi) {
|
|
2433
|
+
const requests = texts.map((text, i) => ({
|
|
2434
|
+
custom_id: `chunk-${i}`,
|
|
2435
|
+
method: "POST",
|
|
2436
|
+
url: OPENAI_BATCH_ENDPOINT,
|
|
2437
|
+
body: { model: this.openAi.model, input: text }
|
|
2438
|
+
}));
|
|
2439
|
+
const results = await runOpenAiEmbeddingBatches({
|
|
2440
|
+
openAi: this.openAi,
|
|
2441
|
+
source: "minimem",
|
|
2442
|
+
requests,
|
|
2443
|
+
wait: this.batchConfig.wait,
|
|
2444
|
+
pollIntervalMs: this.batchConfig.pollIntervalMs,
|
|
2445
|
+
timeoutMs: this.batchConfig.timeoutMs,
|
|
2446
|
+
concurrency: this.batchConfig.concurrency,
|
|
2447
|
+
debug: this.debug
|
|
2448
|
+
});
|
|
2449
|
+
return texts.map((_, i) => results.get(`chunk-${i}`) ?? []);
|
|
2450
|
+
}
|
|
2451
|
+
if (this.gemini) {
|
|
2452
|
+
const requests = texts.map((text, i) => ({
|
|
2453
|
+
custom_id: `chunk-${i}`,
|
|
2454
|
+
content: { parts: [{ text }] },
|
|
2455
|
+
taskType: "RETRIEVAL_DOCUMENT"
|
|
2456
|
+
}));
|
|
2457
|
+
const results = await runGeminiEmbeddingBatches({
|
|
2458
|
+
gemini: this.gemini,
|
|
2459
|
+
source: "minimem",
|
|
2460
|
+
requests,
|
|
2461
|
+
wait: this.batchConfig.wait,
|
|
2462
|
+
pollIntervalMs: this.batchConfig.pollIntervalMs,
|
|
2463
|
+
timeoutMs: this.batchConfig.timeoutMs,
|
|
2464
|
+
concurrency: this.batchConfig.concurrency,
|
|
2465
|
+
debug: this.debug
|
|
2466
|
+
});
|
|
2467
|
+
return texts.map((_, i) => results.get(`chunk-${i}`) ?? []);
|
|
2468
|
+
}
|
|
2469
|
+
throw new Error("Batch API not available for local embeddings");
|
|
2470
|
+
}
|
|
2471
|
+
async embedQueryWithTimeout(text) {
|
|
2472
|
+
const timeout = this.provider.id === "local" ? EMBEDDING_QUERY_TIMEOUT_LOCAL_MS : EMBEDDING_QUERY_TIMEOUT_REMOTE_MS;
|
|
2473
|
+
const ac = new AbortController();
|
|
2474
|
+
const timer = setTimeout(() => ac.abort(), timeout);
|
|
2475
|
+
try {
|
|
2476
|
+
const result = await Promise.race([
|
|
2477
|
+
this.provider.embedQuery(text),
|
|
2478
|
+
new Promise((_, reject) => {
|
|
2479
|
+
ac.signal.addEventListener(
|
|
2480
|
+
"abort",
|
|
2481
|
+
() => reject(new Error("embedding query timeout"))
|
|
2482
|
+
);
|
|
2483
|
+
})
|
|
2484
|
+
]);
|
|
2485
|
+
return result;
|
|
2486
|
+
} finally {
|
|
2487
|
+
clearTimeout(timer);
|
|
2488
|
+
}
|
|
2489
|
+
}
|
|
2490
|
+
loadEmbeddingCache(hashes) {
|
|
2491
|
+
const result = /* @__PURE__ */ new Map();
|
|
2492
|
+
if (!this.cache.enabled || hashes.length === 0) return result;
|
|
2493
|
+
const placeholders = hashes.map(() => "?").join(",");
|
|
2494
|
+
const rows = this.db.prepare(
|
|
2495
|
+
`SELECT hash, embedding FROM ${EMBEDDING_CACHE_TABLE}
|
|
2496
|
+
WHERE provider = ? AND model = ? AND provider_key = ? AND hash IN (${placeholders})`
|
|
2497
|
+
).all(this.provider.id, this.provider.model, this.providerKey, ...hashes);
|
|
2498
|
+
const now = Date.now();
|
|
2499
|
+
for (const row of rows) {
|
|
2500
|
+
result.set(row.hash, parseEmbedding(row.embedding));
|
|
2501
|
+
this.db.prepare(
|
|
2502
|
+
`UPDATE ${EMBEDDING_CACHE_TABLE} SET updated_at = ?
|
|
2503
|
+
WHERE provider = ? AND model = ? AND provider_key = ? AND hash = ?`
|
|
2504
|
+
).run(now, this.provider.id, this.provider.model, this.providerKey, row.hash);
|
|
2505
|
+
}
|
|
2506
|
+
return result;
|
|
2507
|
+
}
|
|
2508
|
+
upsertEmbeddingCache(hash, embedding) {
|
|
2509
|
+
if (!this.cache.enabled) return;
|
|
2510
|
+
const now = Date.now();
|
|
2511
|
+
this.db.prepare(
|
|
2512
|
+
`INSERT OR REPLACE INTO ${EMBEDDING_CACHE_TABLE}
|
|
2513
|
+
(provider, model, provider_key, hash, embedding, dims, updated_at)
|
|
2514
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
2515
|
+
).run(
|
|
2516
|
+
this.provider.id,
|
|
2517
|
+
this.provider.model,
|
|
2518
|
+
this.providerKey,
|
|
2519
|
+
hash,
|
|
2520
|
+
JSON.stringify(embedding),
|
|
2521
|
+
embedding.length,
|
|
2522
|
+
now
|
|
2523
|
+
);
|
|
2524
|
+
}
|
|
2525
|
+
pruneEmbeddingCacheIfNeeded() {
|
|
2526
|
+
if (!this.cache.enabled) return;
|
|
2527
|
+
const row = this.db.prepare(`SELECT COUNT(*) as count FROM ${EMBEDDING_CACHE_TABLE}`).get();
|
|
2528
|
+
if (row.count <= this.cache.maxEntries) return;
|
|
2529
|
+
const excess = row.count - this.cache.maxEntries;
|
|
2530
|
+
this.db.prepare(
|
|
2531
|
+
`DELETE FROM ${EMBEDDING_CACHE_TABLE}
|
|
2532
|
+
WHERE rowid IN (
|
|
2533
|
+
SELECT rowid FROM ${EMBEDDING_CACHE_TABLE}
|
|
2534
|
+
ORDER BY updated_at ASC
|
|
2535
|
+
LIMIT ?
|
|
2536
|
+
)`
|
|
2537
|
+
).run(excess);
|
|
2538
|
+
}
|
|
2539
|
+
async ensureVectorReady(dimensions) {
|
|
2540
|
+
if (this.vector.available === true) return true;
|
|
2541
|
+
if (this.vector.available === false) return false;
|
|
2542
|
+
if (!this.vectorReady) {
|
|
2543
|
+
this.vectorReady = this.loadVectorExtension();
|
|
2544
|
+
}
|
|
2545
|
+
const ready = await this.vectorReady;
|
|
2546
|
+
if (ready && dimensions && !this.vector.dims) {
|
|
2547
|
+
this.vector.dims = dimensions;
|
|
2548
|
+
this.ensureVectorTable(dimensions);
|
|
2549
|
+
}
|
|
2550
|
+
return ready;
|
|
2551
|
+
}
|
|
2552
|
+
async loadVectorExtension() {
|
|
2553
|
+
const result = await loadSqliteVecExtension({
|
|
2554
|
+
db: this.db,
|
|
2555
|
+
extensionPath: this.vectorExtensionPath
|
|
2556
|
+
});
|
|
2557
|
+
this.vector.available = result.ok;
|
|
2558
|
+
if (result.error) {
|
|
2559
|
+
this.vector.loadError = result.error;
|
|
2560
|
+
this.debug?.(`sqlite-vec load failed: ${result.error}`);
|
|
2561
|
+
}
|
|
2562
|
+
if (result.extensionPath) {
|
|
2563
|
+
this.vector.extensionPath = result.extensionPath;
|
|
2564
|
+
}
|
|
2565
|
+
return result.ok;
|
|
2566
|
+
}
|
|
2567
|
+
ensureVectorTable(dimensions) {
|
|
2568
|
+
if (!this.vector.available) return;
|
|
2569
|
+
try {
|
|
2570
|
+
this.db.exec(
|
|
2571
|
+
`CREATE VIRTUAL TABLE IF NOT EXISTS ${VECTOR_TABLE} USING vec0(
|
|
2572
|
+
id TEXT PRIMARY KEY,
|
|
2573
|
+
embedding FLOAT[${dimensions}]
|
|
2574
|
+
)`
|
|
2575
|
+
);
|
|
2576
|
+
} catch (err) {
|
|
2577
|
+
this.debug?.(`vector table creation failed: ${String(err)}`);
|
|
2578
|
+
}
|
|
2579
|
+
}
|
|
2580
|
+
async readFile(relativePath) {
|
|
2581
|
+
const absPath = import_node_path3.default.join(this.memoryDir, relativePath);
|
|
2582
|
+
try {
|
|
2583
|
+
return await import_promises2.default.readFile(absPath, "utf-8");
|
|
2584
|
+
} catch {
|
|
2585
|
+
return null;
|
|
2586
|
+
}
|
|
2587
|
+
}
|
|
2588
|
+
/**
|
|
2589
|
+
* Read specific lines from a memory file
|
|
2590
|
+
*/
|
|
2591
|
+
async readLines(relativePath, opts) {
|
|
2592
|
+
const content = await this.readFile(relativePath);
|
|
2593
|
+
if (content === null) return null;
|
|
2594
|
+
const allLines = content.split("\n");
|
|
2595
|
+
const from = Math.max(1, opts?.from ?? 1);
|
|
2596
|
+
const lines = opts?.lines ?? allLines.length;
|
|
2597
|
+
const startIdx = from - 1;
|
|
2598
|
+
const endIdx = Math.min(startIdx + lines, allLines.length);
|
|
2599
|
+
const selectedLines = allLines.slice(startIdx, endIdx);
|
|
2600
|
+
return {
|
|
2601
|
+
content: selectedLines.join("\n"),
|
|
2602
|
+
startLine: from,
|
|
2603
|
+
endLine: startIdx + selectedLines.length
|
|
2604
|
+
};
|
|
2605
|
+
}
|
|
2606
|
+
/**
|
|
2607
|
+
* Write content to a memory file (creates or overwrites)
|
|
2608
|
+
*/
|
|
2609
|
+
async writeFile(relativePath, content) {
|
|
2610
|
+
this.validateMemoryPath(relativePath);
|
|
2611
|
+
const absPath = import_node_path3.default.join(this.memoryDir, relativePath);
|
|
2612
|
+
const dir = import_node_path3.default.dirname(absPath);
|
|
2613
|
+
await import_promises2.default.mkdir(dir, { recursive: true });
|
|
2614
|
+
await import_promises2.default.writeFile(absPath, content, "utf-8");
|
|
2615
|
+
this.dirty = true;
|
|
2616
|
+
this.debug?.(`memory write: ${relativePath}`);
|
|
2617
|
+
}
|
|
2618
|
+
/**
|
|
2619
|
+
* Append content to a memory file (creates if doesn't exist)
|
|
2620
|
+
*/
|
|
2621
|
+
async appendFile(relativePath, content) {
|
|
2622
|
+
this.validateMemoryPath(relativePath);
|
|
2623
|
+
const absPath = import_node_path3.default.join(this.memoryDir, relativePath);
|
|
2624
|
+
const dir = import_node_path3.default.dirname(absPath);
|
|
2625
|
+
await import_promises2.default.mkdir(dir, { recursive: true });
|
|
2626
|
+
let toAppend = content;
|
|
2627
|
+
try {
|
|
2628
|
+
const existing = await import_promises2.default.readFile(absPath, "utf-8");
|
|
2629
|
+
if (existing.length > 0 && !existing.endsWith("\n")) {
|
|
2630
|
+
toAppend = "\n" + content;
|
|
2631
|
+
}
|
|
2632
|
+
} catch {
|
|
2633
|
+
}
|
|
2634
|
+
await import_promises2.default.appendFile(absPath, toAppend, "utf-8");
|
|
2635
|
+
this.dirty = true;
|
|
2636
|
+
this.debug?.(`memory append: ${relativePath}`);
|
|
2637
|
+
}
|
|
2638
|
+
/**
|
|
2639
|
+
* Append content to today's daily log (memory/YYYY-MM-DD.md)
|
|
2640
|
+
*/
|
|
2641
|
+
async appendToday(content) {
|
|
2642
|
+
const today = (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
|
|
2643
|
+
const relativePath = `memory/${today}.md`;
|
|
2644
|
+
await this.appendFile(relativePath, content);
|
|
2645
|
+
return relativePath;
|
|
2646
|
+
}
|
|
2647
|
+
/**
|
|
2648
|
+
* List all memory files
|
|
2649
|
+
*/
|
|
2650
|
+
async listFiles() {
|
|
2651
|
+
const files = await listMemoryFiles(this.memoryDir);
|
|
2652
|
+
return files.map((f) => import_node_path3.default.relative(this.memoryDir, f).replace(/\\/g, "/"));
|
|
2653
|
+
}
|
|
2654
|
+
/**
|
|
2655
|
+
* Validate that a path is within allowed memory locations
|
|
2656
|
+
*/
|
|
2657
|
+
validateMemoryPath(relativePath) {
|
|
2658
|
+
const normalized = relativePath.replace(/\\/g, "/").replace(/^\.\//, "");
|
|
2659
|
+
if (normalized === "MEMORY.md" || normalized === "memory.md") {
|
|
2660
|
+
return;
|
|
2661
|
+
}
|
|
2662
|
+
if (normalized.startsWith("memory/") && normalized.endsWith(".md")) {
|
|
2663
|
+
if (normalized.includes("..")) {
|
|
2664
|
+
throw new Error(`Invalid memory path: ${relativePath} (path traversal not allowed)`);
|
|
2665
|
+
}
|
|
2666
|
+
return;
|
|
2667
|
+
}
|
|
2668
|
+
throw new Error(
|
|
2669
|
+
`Invalid memory path: ${relativePath}. Must be MEMORY.md or memory/*.md`
|
|
2670
|
+
);
|
|
2671
|
+
}
|
|
2672
|
+
async status() {
|
|
2673
|
+
const fileRow = this.db.prepare(`SELECT COUNT(*) as count FROM files`).get();
|
|
2674
|
+
const chunkRow = this.db.prepare(`SELECT COUNT(*) as count FROM chunks`).get();
|
|
2675
|
+
const cacheRow = this.db.prepare(`SELECT COUNT(*) as count FROM ${EMBEDDING_CACHE_TABLE}`).get();
|
|
2676
|
+
return {
|
|
2677
|
+
memoryDir: this.memoryDir,
|
|
2678
|
+
dbPath: this.dbPath,
|
|
2679
|
+
provider: this.provider.id,
|
|
2680
|
+
model: this.provider.model,
|
|
2681
|
+
vectorAvailable: this.vector.available === true,
|
|
2682
|
+
ftsAvailable: this.fts.available,
|
|
2683
|
+
bm25Only: this.provider.id === "none",
|
|
2684
|
+
fallbackReason: this.providerFallbackReason,
|
|
2685
|
+
fileCount: fileRow.count,
|
|
2686
|
+
chunkCount: chunkRow.count,
|
|
2687
|
+
cacheCount: cacheRow.count
|
|
2688
|
+
};
|
|
2689
|
+
}
|
|
2690
|
+
/**
|
|
2691
|
+
* Search with knowledge metadata filters (domain, entities, confidence, type).
|
|
2692
|
+
* Runs a standard search then post-filters by knowledge columns.
|
|
2693
|
+
*/
|
|
2694
|
+
async knowledgeSearch(query, opts) {
|
|
2695
|
+
if (this.dirty || !this.watchConfig.enabled && await this.isStale()) {
|
|
2696
|
+
await this.sync({ reason: "knowledgeSearch" });
|
|
2697
|
+
}
|
|
2698
|
+
const cleaned = query.trim();
|
|
2699
|
+
if (!cleaned) return [];
|
|
2700
|
+
const minScore = opts?.minScore ?? this.queryConfig.minScore;
|
|
2701
|
+
const maxResults = opts?.maxResults ?? this.queryConfig.maxResults;
|
|
2702
|
+
const { sql: knowledgeWhere, params: knowledgeParams } = buildKnowledgeFilterSql({
|
|
2703
|
+
domain: opts?.domain,
|
|
2704
|
+
entities: opts?.entities,
|
|
2705
|
+
minConfidence: opts?.minConfidence,
|
|
2706
|
+
knowledgeType: opts?.knowledgeType
|
|
2707
|
+
});
|
|
2708
|
+
if (!knowledgeWhere) {
|
|
2709
|
+
return this.search(query, { maxResults, minScore });
|
|
2710
|
+
}
|
|
2711
|
+
const matchingRows = this.db.prepare(
|
|
2712
|
+
`SELECT id FROM chunks c WHERE c.model = ? AND c.source = 'memory'${knowledgeWhere}`
|
|
2713
|
+
).all(this.provider.model, ...knowledgeParams);
|
|
2714
|
+
const matchingIds = new Set(matchingRows.map((r) => r.id));
|
|
2715
|
+
if (matchingIds.size === 0) return [];
|
|
2716
|
+
const overFetch = Math.max(maxResults * 3, 30);
|
|
2717
|
+
const results = await this.search(query, {
|
|
2718
|
+
maxResults: overFetch,
|
|
2719
|
+
minScore
|
|
2720
|
+
});
|
|
2721
|
+
const filtered = [];
|
|
2722
|
+
for (const r of results) {
|
|
2723
|
+
const row = this.db.prepare(
|
|
2724
|
+
`SELECT id FROM chunks WHERE path = ? AND start_line = ? AND end_line = ? AND model = ?`
|
|
2725
|
+
).get(r.path, r.startLine, r.endLine, this.provider.model);
|
|
2726
|
+
if (row && matchingIds.has(row.id)) {
|
|
2727
|
+
filtered.push(r);
|
|
2728
|
+
if (filtered.length >= maxResults) break;
|
|
2729
|
+
}
|
|
2730
|
+
}
|
|
2731
|
+
return filtered;
|
|
2732
|
+
}
|
|
2733
|
+
/**
|
|
2734
|
+
* Get knowledge graph links from or to a node.
|
|
2735
|
+
*/
|
|
2736
|
+
getLinks(nodeId, direction = "from", opts) {
|
|
2737
|
+
if (direction === "from") {
|
|
2738
|
+
return getLinksFrom(this.db, nodeId, opts);
|
|
2739
|
+
}
|
|
2740
|
+
return getLinksTo(this.db, nodeId, opts);
|
|
2741
|
+
}
|
|
2742
|
+
/**
|
|
2743
|
+
* Get neighbor nodes via BFS traversal.
|
|
2744
|
+
*/
|
|
2745
|
+
getGraphNeighbors(nodeId, depth = 1, opts) {
|
|
2746
|
+
return getNeighbors(this.db, nodeId, depth, opts);
|
|
2747
|
+
}
|
|
2748
|
+
/**
|
|
2749
|
+
* Find shortest path between two knowledge nodes.
|
|
2750
|
+
*/
|
|
2751
|
+
getGraphPath(fromId, toId, maxDepth = 3) {
|
|
2752
|
+
return getPathBetween(this.db, fromId, toId, maxDepth);
|
|
2753
|
+
}
|
|
2754
|
+
close() {
|
|
2755
|
+
if (this.closed) return;
|
|
2756
|
+
this.closed = true;
|
|
2757
|
+
if (this.watchTimer) {
|
|
2758
|
+
clearTimeout(this.watchTimer);
|
|
2759
|
+
this.watchTimer = null;
|
|
2760
|
+
}
|
|
2761
|
+
if (this.watcher) {
|
|
2762
|
+
void this.watcher.close();
|
|
2763
|
+
this.watcher = null;
|
|
2764
|
+
}
|
|
2765
|
+
try {
|
|
2766
|
+
this.db.close();
|
|
2767
|
+
} catch (err) {
|
|
2768
|
+
logError("dbClose", err, this.debug);
|
|
2769
|
+
}
|
|
2770
|
+
}
|
|
2771
|
+
};
|
|
2772
|
+
|
|
2773
|
+
// src/server/tools.ts
|
|
2774
|
+
var MEMORY_SEARCH_TOOL = {
|
|
2775
|
+
name: "memory_search",
|
|
2776
|
+
description: "Semantically search through memory files (MEMORY.md and memory/*.md). Use this to recall prior decisions, facts, preferences, people, dates, or context. Returns ranked snippets with file paths and line numbers. When multiple memory directories are configured, searches all by default.",
|
|
2777
|
+
inputSchema: {
|
|
2778
|
+
type: "object",
|
|
2779
|
+
properties: {
|
|
2780
|
+
query: {
|
|
2781
|
+
type: "string",
|
|
2782
|
+
description: "Natural language search query"
|
|
2783
|
+
},
|
|
2784
|
+
maxResults: {
|
|
2785
|
+
type: "number",
|
|
2786
|
+
description: "Maximum number of results to return (default: 10)"
|
|
2787
|
+
},
|
|
2788
|
+
minScore: {
|
|
2789
|
+
type: "number",
|
|
2790
|
+
description: "Minimum relevance score threshold 0-1 (default: 0.3)"
|
|
2791
|
+
},
|
|
2792
|
+
directories: {
|
|
2793
|
+
type: "array",
|
|
2794
|
+
items: { type: "string" },
|
|
2795
|
+
description: "Optional: filter to specific memory directories by name/path. If omitted, searches all configured directories."
|
|
2796
|
+
},
|
|
2797
|
+
detail: {
|
|
2798
|
+
type: "string",
|
|
2799
|
+
enum: ["compact", "full"],
|
|
2800
|
+
description: "Result detail level. 'compact' returns a lightweight index with short previews (~80 chars). 'full' returns complete snippets. Use 'compact' first, then memory_get_details for selected results. (default: 'compact')"
|
|
2801
|
+
},
|
|
2802
|
+
type: {
|
|
2803
|
+
type: "string",
|
|
2804
|
+
description: "Filter by observation type. Matches <!-- type: X --> comments in memory entries. Common types: decision, bugfix, feature, discovery, context, note."
|
|
2805
|
+
}
|
|
2806
|
+
},
|
|
2807
|
+
required: ["query"]
|
|
2808
|
+
}
|
|
2809
|
+
};
|
|
2810
|
+
var MEMORY_GET_DETAILS_TOOL = {
|
|
2811
|
+
name: "memory_get_details",
|
|
2812
|
+
description: "Fetch full text for specific memory chunks identified by path and line range. Use after memory_search with compact results to get details for selected items only. This two-step approach significantly reduces token usage.",
|
|
2813
|
+
inputSchema: {
|
|
2814
|
+
type: "object",
|
|
2815
|
+
properties: {
|
|
2816
|
+
results: {
|
|
2817
|
+
type: "array",
|
|
2818
|
+
items: { type: "object" },
|
|
2819
|
+
description: "Array of { path, startLine, endLine } objects from compact search results."
|
|
2820
|
+
},
|
|
2821
|
+
directories: {
|
|
2822
|
+
type: "array",
|
|
2823
|
+
items: { type: "string" },
|
|
2824
|
+
description: "Optional: filter to specific memory directories."
|
|
2825
|
+
}
|
|
2826
|
+
},
|
|
2827
|
+
required: ["results"]
|
|
2828
|
+
}
|
|
2829
|
+
};
|
|
2830
|
+
var KNOWLEDGE_SEARCH_TOOL = {
|
|
2831
|
+
name: "knowledge_search",
|
|
2832
|
+
description: "Search memory with knowledge metadata filters. Filter by domain, entities, confidence level, or knowledge type (observation, entity, domain-summary). Combines semantic search with structured knowledge filtering.",
|
|
2833
|
+
inputSchema: {
|
|
2834
|
+
type: "object",
|
|
2835
|
+
properties: {
|
|
2836
|
+
query: {
|
|
2837
|
+
type: "string",
|
|
2838
|
+
description: "Natural language search query"
|
|
2839
|
+
},
|
|
2840
|
+
domain: {
|
|
2841
|
+
type: "array",
|
|
2842
|
+
items: { type: "string" },
|
|
2843
|
+
description: "Filter to entries in these knowledge domains"
|
|
2844
|
+
},
|
|
2845
|
+
entities: {
|
|
2846
|
+
type: "array",
|
|
2847
|
+
items: { type: "string" },
|
|
2848
|
+
description: "Filter to entries referencing these entities"
|
|
2849
|
+
},
|
|
2850
|
+
minConfidence: {
|
|
2851
|
+
type: "number",
|
|
2852
|
+
description: "Minimum confidence threshold (0-1)"
|
|
2853
|
+
},
|
|
2854
|
+
knowledgeType: {
|
|
2855
|
+
type: "string",
|
|
2856
|
+
description: "Filter by knowledge type: observation, entity, domain-summary"
|
|
2857
|
+
},
|
|
2858
|
+
maxResults: {
|
|
2859
|
+
type: "number",
|
|
2860
|
+
description: "Maximum number of results (default: 10)"
|
|
2861
|
+
},
|
|
2862
|
+
minScore: {
|
|
2863
|
+
type: "number",
|
|
2864
|
+
description: "Minimum relevance score 0-1 (default: 0.3)"
|
|
2865
|
+
},
|
|
2866
|
+
directories: {
|
|
2867
|
+
type: "array",
|
|
2868
|
+
items: { type: "string" },
|
|
2869
|
+
description: "Optional: filter to specific memory directories"
|
|
2870
|
+
}
|
|
2871
|
+
},
|
|
2872
|
+
required: ["query"]
|
|
2873
|
+
}
|
|
2874
|
+
};
|
|
2875
|
+
var KNOWLEDGE_GRAPH_TOOL = {
|
|
2876
|
+
name: "knowledge_graph",
|
|
2877
|
+
description: "Traverse knowledge graph links from a note. Returns neighbor nodes connected by typed relationships (e.g., relates-to, supports, contradicts). Use depth parameter for multi-hop traversal.",
|
|
2878
|
+
inputSchema: {
|
|
2879
|
+
type: "object",
|
|
2880
|
+
properties: {
|
|
2881
|
+
nodeId: {
|
|
2882
|
+
type: "string",
|
|
2883
|
+
description: "The knowledge node ID to start traversal from"
|
|
2884
|
+
},
|
|
2885
|
+
depth: {
|
|
2886
|
+
type: "number",
|
|
2887
|
+
description: "Maximum traversal depth (default: 1, max: 3)",
|
|
2888
|
+
default: 1
|
|
2889
|
+
},
|
|
2890
|
+
relation: {
|
|
2891
|
+
type: "string",
|
|
2892
|
+
description: "Optional: filter to specific relation type"
|
|
2893
|
+
},
|
|
2894
|
+
layer: {
|
|
2895
|
+
type: "string",
|
|
2896
|
+
description: "Optional: filter to specific graph layer"
|
|
2897
|
+
},
|
|
2898
|
+
directories: {
|
|
2899
|
+
type: "array",
|
|
2900
|
+
items: { type: "string" },
|
|
2901
|
+
description: "Optional: filter to specific memory directories"
|
|
2902
|
+
}
|
|
2903
|
+
},
|
|
2904
|
+
required: ["nodeId"]
|
|
2905
|
+
}
|
|
2906
|
+
};
|
|
2907
|
+
var KNOWLEDGE_PATH_TOOL = {
|
|
2908
|
+
name: "knowledge_path",
|
|
2909
|
+
description: "Find the shortest path between two knowledge nodes in the graph. Uses BFS traversal up to a configurable max depth. Returns the sequence of links connecting the two nodes.",
|
|
2910
|
+
inputSchema: {
|
|
2911
|
+
type: "object",
|
|
2912
|
+
properties: {
|
|
2913
|
+
fromId: {
|
|
2914
|
+
type: "string",
|
|
2915
|
+
description: "Starting knowledge node ID"
|
|
2916
|
+
},
|
|
2917
|
+
toId: {
|
|
2918
|
+
type: "string",
|
|
2919
|
+
description: "Target knowledge node ID"
|
|
2920
|
+
},
|
|
2921
|
+
maxDepth: {
|
|
2922
|
+
type: "number",
|
|
2923
|
+
description: "Maximum path length (default: 3)",
|
|
2924
|
+
default: 3
|
|
2925
|
+
},
|
|
2926
|
+
directories: {
|
|
2927
|
+
type: "array",
|
|
2928
|
+
items: { type: "string" },
|
|
2929
|
+
description: "Optional: filter to specific memory directories"
|
|
2930
|
+
}
|
|
2931
|
+
},
|
|
2932
|
+
required: ["fromId", "toId"]
|
|
2933
|
+
}
|
|
2934
|
+
};
|
|
2935
|
+
var MEMORY_TOOLS = [
|
|
2936
|
+
MEMORY_SEARCH_TOOL,
|
|
2937
|
+
MEMORY_GET_DETAILS_TOOL,
|
|
2938
|
+
KNOWLEDGE_SEARCH_TOOL,
|
|
2939
|
+
KNOWLEDGE_GRAPH_TOOL,
|
|
2940
|
+
KNOWLEDGE_PATH_TOOL
|
|
2941
|
+
];
|
|
2942
|
+
function getToolDefinitions() {
|
|
2943
|
+
return MEMORY_TOOLS;
|
|
2944
|
+
}
|
|
2945
|
+
var MemoryToolExecutor = class {
|
|
2946
|
+
instances;
|
|
2947
|
+
constructor(instances) {
|
|
2948
|
+
if (Array.isArray(instances)) {
|
|
2949
|
+
this.instances = instances;
|
|
2950
|
+
} else if ("minimem" in instances) {
|
|
2951
|
+
this.instances = [instances];
|
|
2952
|
+
} else {
|
|
2953
|
+
this.instances = [{ minimem: instances, memoryDir: "default" }];
|
|
2954
|
+
}
|
|
2955
|
+
}
|
|
2956
|
+
/**
|
|
2957
|
+
* Get list of configured directory names/paths
|
|
2958
|
+
*/
|
|
2959
|
+
getDirectories() {
|
|
2960
|
+
return this.instances.map((i) => i.name ?? i.memoryDir);
|
|
2961
|
+
}
|
|
2962
|
+
/**
|
|
2963
|
+
* Execute a tool by name with given parameters
|
|
2964
|
+
*/
|
|
2965
|
+
async execute(toolName, params) {
|
|
2966
|
+
try {
|
|
2967
|
+
switch (toolName) {
|
|
2968
|
+
case "memory_search":
|
|
2969
|
+
return await this.memorySearch(params);
|
|
2970
|
+
case "memory_get_details":
|
|
2971
|
+
return await this.memoryGetDetails(params);
|
|
2972
|
+
case "knowledge_search":
|
|
2973
|
+
return await this.knowledgeSearch(params);
|
|
2974
|
+
case "knowledge_graph":
|
|
2975
|
+
return await this.knowledgeGraph(params);
|
|
2976
|
+
case "knowledge_path":
|
|
2977
|
+
return await this.knowledgePath(params);
|
|
2978
|
+
default:
|
|
2979
|
+
return {
|
|
2980
|
+
content: [{ type: "text", text: `Unknown tool: ${toolName}` }],
|
|
2981
|
+
isError: true
|
|
2982
|
+
};
|
|
2983
|
+
}
|
|
2984
|
+
} catch (err) {
|
|
2985
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
2986
|
+
return {
|
|
2987
|
+
content: [{ type: "text", text: `Error: ${message}` }],
|
|
2988
|
+
isError: true
|
|
2989
|
+
};
|
|
2990
|
+
}
|
|
2991
|
+
}
|
|
2992
|
+
/**
|
|
2993
|
+
* Filter instances by directory names/paths
|
|
2994
|
+
*/
|
|
2995
|
+
filterInstances(directories) {
|
|
2996
|
+
if (!directories || directories.length === 0) return this.instances;
|
|
2997
|
+
const dirFilter = new Set(directories.map((d) => d.toLowerCase()));
|
|
2998
|
+
const filtered = this.instances.filter((i) => {
|
|
2999
|
+
const name = (i.name ?? i.memoryDir).toLowerCase();
|
|
3000
|
+
const dir = i.memoryDir.toLowerCase();
|
|
3001
|
+
return dirFilter.has(name) || dirFilter.has(dir) || [...dirFilter].some((f) => dir.includes(f) || name.includes(f));
|
|
3002
|
+
});
|
|
3003
|
+
return filtered.length > 0 ? filtered : null;
|
|
3004
|
+
}
|
|
3005
|
+
async memorySearch(params) {
|
|
3006
|
+
const maxResults = params.maxResults ?? 10;
|
|
3007
|
+
const minScore = params.minScore;
|
|
3008
|
+
const detail = params.detail ?? "compact";
|
|
3009
|
+
const instancesToSearch = this.filterInstances(params.directories);
|
|
3010
|
+
if (!instancesToSearch) {
|
|
3011
|
+
const available = this.getDirectories().join(", ");
|
|
3012
|
+
return {
|
|
3013
|
+
content: [
|
|
3014
|
+
{
|
|
3015
|
+
type: "text",
|
|
3016
|
+
text: `No matching directories found. Available: ${available}`
|
|
3017
|
+
}
|
|
3018
|
+
],
|
|
3019
|
+
isError: true
|
|
3020
|
+
};
|
|
3021
|
+
}
|
|
3022
|
+
const allResults = [];
|
|
3023
|
+
for (const instance of instancesToSearch) {
|
|
3024
|
+
const perDirMax = Math.ceil(maxResults * 1.5);
|
|
3025
|
+
const results = await instance.minimem.search(params.query, {
|
|
3026
|
+
maxResults: perDirMax,
|
|
3027
|
+
minScore,
|
|
3028
|
+
type: params.type
|
|
3029
|
+
});
|
|
3030
|
+
for (const result of results) {
|
|
3031
|
+
allResults.push({
|
|
3032
|
+
...result,
|
|
3033
|
+
memoryDir: instance.name ?? instance.memoryDir
|
|
3034
|
+
});
|
|
3035
|
+
}
|
|
3036
|
+
}
|
|
3037
|
+
allResults.sort((a, b) => b.score - a.score);
|
|
3038
|
+
const topResults = allResults.slice(0, maxResults);
|
|
3039
|
+
if (topResults.length === 0) {
|
|
3040
|
+
return {
|
|
3041
|
+
content: [{ type: "text", text: "No results found." }]
|
|
3042
|
+
};
|
|
3043
|
+
}
|
|
3044
|
+
const showSource = instancesToSearch.length > 1;
|
|
3045
|
+
if (detail === "compact") {
|
|
3046
|
+
return this.formatCompactResults(topResults, showSource, instancesToSearch.length);
|
|
3047
|
+
}
|
|
3048
|
+
return this.formatFullResults(topResults, showSource, instancesToSearch.length);
|
|
3049
|
+
}
|
|
3050
|
+
formatCompactResults(results, showSource, dirCount) {
|
|
3051
|
+
const formatted = results.map((r, i) => {
|
|
3052
|
+
const location = `${r.path}:${r.startLine}-${r.endLine}`;
|
|
3053
|
+
const score = (r.score * 100).toFixed(0);
|
|
3054
|
+
const source = showSource ? ` [${r.memoryDir}]` : "";
|
|
3055
|
+
const preview = compactPreview(r.snippet);
|
|
3056
|
+
return `[${i}] ${location}${source} (${score}%) \u2014 ${preview}`;
|
|
3057
|
+
}).join("\n");
|
|
3058
|
+
const hint = "\n\nUse memory_get_details to fetch full text for selected results.";
|
|
3059
|
+
const dirSummary = dirCount > 1 ? `
|
|
3060
|
+
(Searched ${dirCount} directories)` : "";
|
|
3061
|
+
return {
|
|
3062
|
+
content: [{ type: "text", text: formatted + dirSummary + hint }]
|
|
3063
|
+
};
|
|
3064
|
+
}
|
|
3065
|
+
formatFullResults(results, showSource, dirCount) {
|
|
3066
|
+
const formatted = results.map((r, i) => {
|
|
3067
|
+
const location = `${r.path}:${r.startLine}-${r.endLine}`;
|
|
3068
|
+
const score = (r.score * 100).toFixed(1);
|
|
3069
|
+
const source = showSource ? ` [${r.memoryDir}]` : "";
|
|
3070
|
+
return `[${i + 1}] ${location}${source} (${score}% match)
|
|
3071
|
+
${r.snippet}`;
|
|
3072
|
+
}).join("\n\n");
|
|
3073
|
+
const dirSummary = dirCount > 1 ? `
|
|
3074
|
+
|
|
3075
|
+
(Searched ${dirCount} directories)` : "";
|
|
3076
|
+
return {
|
|
3077
|
+
content: [{ type: "text", text: formatted + dirSummary }]
|
|
3078
|
+
};
|
|
3079
|
+
}
|
|
3080
|
+
async memoryGetDetails(params) {
|
|
3081
|
+
if (!params.results || params.results.length === 0) {
|
|
3082
|
+
return {
|
|
3083
|
+
content: [{ type: "text", text: "No results specified." }],
|
|
3084
|
+
isError: true
|
|
3085
|
+
};
|
|
3086
|
+
}
|
|
3087
|
+
const instancesToSearch = this.filterInstances(params.directories);
|
|
3088
|
+
if (!instancesToSearch) {
|
|
3089
|
+
const available = this.getDirectories().join(", ");
|
|
3090
|
+
return {
|
|
3091
|
+
content: [
|
|
3092
|
+
{
|
|
3093
|
+
type: "text",
|
|
3094
|
+
text: `No matching directories found. Available: ${available}`
|
|
3095
|
+
}
|
|
3096
|
+
],
|
|
3097
|
+
isError: true
|
|
3098
|
+
};
|
|
3099
|
+
}
|
|
3100
|
+
const details = [];
|
|
3101
|
+
for (const ref of params.results) {
|
|
3102
|
+
let found = false;
|
|
3103
|
+
for (const instance of instancesToSearch) {
|
|
3104
|
+
const lineCount = ref.endLine - ref.startLine + 1;
|
|
3105
|
+
const result = await instance.minimem.readLines(ref.path, {
|
|
3106
|
+
from: ref.startLine,
|
|
3107
|
+
lines: lineCount
|
|
3108
|
+
});
|
|
3109
|
+
if (result) {
|
|
3110
|
+
const location = `${ref.path}:${result.startLine}-${result.endLine}`;
|
|
3111
|
+
details.push(`--- ${location} ---
|
|
3112
|
+
${result.content}`);
|
|
3113
|
+
found = true;
|
|
3114
|
+
break;
|
|
3115
|
+
}
|
|
3116
|
+
}
|
|
3117
|
+
if (!found) {
|
|
3118
|
+
details.push(`--- ${ref.path}:${ref.startLine}-${ref.endLine} ---
|
|
3119
|
+
(not found)`);
|
|
3120
|
+
}
|
|
3121
|
+
}
|
|
3122
|
+
return {
|
|
3123
|
+
content: [{ type: "text", text: details.join("\n\n") }]
|
|
3124
|
+
};
|
|
3125
|
+
}
|
|
3126
|
+
async knowledgeSearch(params) {
|
|
3127
|
+
const instancesToSearch = this.filterInstances(params.directories);
|
|
3128
|
+
if (!instancesToSearch) {
|
|
3129
|
+
const available = this.getDirectories().join(", ");
|
|
3130
|
+
return {
|
|
3131
|
+
content: [{ type: "text", text: `No matching directories found. Available: ${available}` }],
|
|
3132
|
+
isError: true
|
|
3133
|
+
};
|
|
3134
|
+
}
|
|
3135
|
+
const maxResults = params.maxResults ?? 10;
|
|
3136
|
+
const allResults = [];
|
|
3137
|
+
for (const instance of instancesToSearch) {
|
|
3138
|
+
const results = await instance.minimem.knowledgeSearch(params.query, {
|
|
3139
|
+
maxResults: Math.ceil(maxResults * 1.5),
|
|
3140
|
+
minScore: params.minScore,
|
|
3141
|
+
domain: params.domain,
|
|
3142
|
+
entities: params.entities,
|
|
3143
|
+
minConfidence: params.minConfidence,
|
|
3144
|
+
knowledgeType: params.knowledgeType
|
|
3145
|
+
});
|
|
3146
|
+
for (const result of results) {
|
|
3147
|
+
allResults.push({
|
|
3148
|
+
...result,
|
|
3149
|
+
memoryDir: instance.name ?? instance.memoryDir
|
|
3150
|
+
});
|
|
3151
|
+
}
|
|
3152
|
+
}
|
|
3153
|
+
allResults.sort((a, b) => b.score - a.score);
|
|
3154
|
+
const topResults = allResults.slice(0, maxResults);
|
|
3155
|
+
if (topResults.length === 0) {
|
|
3156
|
+
return { content: [{ type: "text", text: "No knowledge results found." }] };
|
|
3157
|
+
}
|
|
3158
|
+
const formatted = topResults.map((r, i) => {
|
|
3159
|
+
const location = `${r.path}:${r.startLine}-${r.endLine}`;
|
|
3160
|
+
const score = (r.score * 100).toFixed(0);
|
|
3161
|
+
const preview = compactPreview(r.snippet);
|
|
3162
|
+
return `[${i}] ${location} (${score}%) \u2014 ${preview}`;
|
|
3163
|
+
}).join("\n");
|
|
3164
|
+
return { content: [{ type: "text", text: formatted }] };
|
|
3165
|
+
}
|
|
3166
|
+
async knowledgeGraph(params) {
|
|
3167
|
+
const instancesToSearch = this.filterInstances(params.directories);
|
|
3168
|
+
if (!instancesToSearch) {
|
|
3169
|
+
const available = this.getDirectories().join(", ");
|
|
3170
|
+
return {
|
|
3171
|
+
content: [{ type: "text", text: `No matching directories found. Available: ${available}` }],
|
|
3172
|
+
isError: true
|
|
3173
|
+
};
|
|
3174
|
+
}
|
|
3175
|
+
const depth = Math.min(params.depth ?? 1, 3);
|
|
3176
|
+
const allNeighbors = [];
|
|
3177
|
+
for (const instance of instancesToSearch) {
|
|
3178
|
+
const neighbors = instance.minimem.getGraphNeighbors(params.nodeId, depth, {
|
|
3179
|
+
relation: params.relation,
|
|
3180
|
+
layer: params.layer
|
|
3181
|
+
});
|
|
3182
|
+
for (const n of neighbors) {
|
|
3183
|
+
allNeighbors.push({
|
|
3184
|
+
id: n.id,
|
|
3185
|
+
depth: n.depth,
|
|
3186
|
+
relation: n.link.relation,
|
|
3187
|
+
layer: n.link.layer,
|
|
3188
|
+
memoryDir: instance.name ?? instance.memoryDir
|
|
3189
|
+
});
|
|
3190
|
+
}
|
|
3191
|
+
}
|
|
3192
|
+
if (allNeighbors.length === 0) {
|
|
3193
|
+
return { content: [{ type: "text", text: `No neighbors found for node "${params.nodeId}".` }] };
|
|
3194
|
+
}
|
|
3195
|
+
const formatted = allNeighbors.map((n) => ` [depth=${n.depth}] ${n.id} \u2014(${n.relation})${n.layer ? ` [${n.layer}]` : ""}`).join("\n");
|
|
3196
|
+
return {
|
|
3197
|
+
content: [{ type: "text", text: `Neighbors of "${params.nodeId}":
|
|
3198
|
+
${formatted}` }]
|
|
3199
|
+
};
|
|
3200
|
+
}
|
|
3201
|
+
async knowledgePath(params) {
|
|
3202
|
+
const instancesToSearch = this.filterInstances(params.directories);
|
|
3203
|
+
if (!instancesToSearch) {
|
|
3204
|
+
const available = this.getDirectories().join(", ");
|
|
3205
|
+
return {
|
|
3206
|
+
content: [{ type: "text", text: `No matching directories found. Available: ${available}` }],
|
|
3207
|
+
isError: true
|
|
3208
|
+
};
|
|
3209
|
+
}
|
|
3210
|
+
const maxDepth = Math.min(params.maxDepth ?? 3, 5);
|
|
3211
|
+
for (const instance of instancesToSearch) {
|
|
3212
|
+
const path5 = instance.minimem.getGraphPath(params.fromId, params.toId, maxDepth);
|
|
3213
|
+
if (path5.length > 0) {
|
|
3214
|
+
const steps = path5.map((link) => ` ${link.fromId} \u2014(${link.relation})\u2192 ${link.toId}`).join("\n");
|
|
3215
|
+
return {
|
|
3216
|
+
content: [{
|
|
3217
|
+
type: "text",
|
|
3218
|
+
text: `Path from "${params.fromId}" to "${params.toId}" (${path5.length} steps):
|
|
3219
|
+
${steps}`
|
|
3220
|
+
}]
|
|
3221
|
+
};
|
|
3222
|
+
}
|
|
3223
|
+
}
|
|
3224
|
+
return {
|
|
3225
|
+
content: [{
|
|
3226
|
+
type: "text",
|
|
3227
|
+
text: `No path found from "${params.fromId}" to "${params.toId}" within depth ${maxDepth}.`
|
|
3228
|
+
}]
|
|
3229
|
+
};
|
|
3230
|
+
}
|
|
3231
|
+
};
|
|
3232
|
+
function compactPreview(snippet) {
|
|
3233
|
+
const maxLen = 80;
|
|
3234
|
+
const lines = snippet.split("\n").filter((l) => l.trim());
|
|
3235
|
+
if (lines.length === 0) return "(empty)";
|
|
3236
|
+
const heading = lines.find((l) => l.startsWith("#"));
|
|
3237
|
+
const text = heading ?? lines[0];
|
|
3238
|
+
const cleaned = text.replace(/^#+\s*/, "").trim();
|
|
3239
|
+
if (cleaned.length <= maxLen) return `"${cleaned}"`;
|
|
3240
|
+
return `"${cleaned.slice(0, maxLen - 3)}..."`;
|
|
3241
|
+
}
|
|
3242
|
+
function createToolExecutor(instances) {
|
|
3243
|
+
return new MemoryToolExecutor(instances);
|
|
3244
|
+
}
|
|
3245
|
+
|
|
3246
|
+
// src/server/mcp.ts
|
|
3247
|
+
var readline = __toESM(require("readline"), 1);
|
|
3248
|
+
var PROTOCOL_VERSION = "2024-11-05";
|
|
3249
|
+
var SERVER_NAME = "minimem";
|
|
3250
|
+
var SERVER_VERSION = "0.1.0";
|
|
3251
|
+
var McpServer = class {
|
|
3252
|
+
executor;
|
|
3253
|
+
initialized = false;
|
|
3254
|
+
constructor(instances) {
|
|
3255
|
+
this.executor = new MemoryToolExecutor(instances);
|
|
3256
|
+
}
|
|
3257
|
+
/**
|
|
3258
|
+
* Handle a JSON-RPC request and return a response
|
|
3259
|
+
*/
|
|
3260
|
+
async handleRequest(request) {
|
|
3261
|
+
try {
|
|
3262
|
+
const result = await this.dispatch(request.method, request.params);
|
|
3263
|
+
return {
|
|
3264
|
+
jsonrpc: "2.0",
|
|
3265
|
+
id: request.id,
|
|
3266
|
+
result
|
|
3267
|
+
};
|
|
3268
|
+
} catch (err) {
|
|
3269
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
3270
|
+
const code = err instanceof McpError ? err.code : -32603;
|
|
3271
|
+
return {
|
|
3272
|
+
jsonrpc: "2.0",
|
|
3273
|
+
id: request.id,
|
|
3274
|
+
error: { code, message }
|
|
3275
|
+
};
|
|
3276
|
+
}
|
|
3277
|
+
}
|
|
3278
|
+
/**
|
|
3279
|
+
* Dispatch a method call
|
|
3280
|
+
*/
|
|
3281
|
+
async dispatch(method, params) {
|
|
3282
|
+
switch (method) {
|
|
3283
|
+
case "initialize":
|
|
3284
|
+
return this.initialize(params);
|
|
3285
|
+
case "initialized":
|
|
3286
|
+
return {};
|
|
3287
|
+
case "tools/list":
|
|
3288
|
+
return this.listTools();
|
|
3289
|
+
case "tools/call":
|
|
3290
|
+
return this.callTool(params);
|
|
3291
|
+
case "ping":
|
|
3292
|
+
return {};
|
|
3293
|
+
default:
|
|
3294
|
+
throw new McpError(-32601, `Method not found: ${method}`);
|
|
3295
|
+
}
|
|
3296
|
+
}
|
|
3297
|
+
/**
|
|
3298
|
+
* Handle initialize request
|
|
3299
|
+
*/
|
|
3300
|
+
initialize(params) {
|
|
3301
|
+
this.initialized = true;
|
|
3302
|
+
return {
|
|
3303
|
+
protocolVersion: PROTOCOL_VERSION,
|
|
3304
|
+
capabilities: {
|
|
3305
|
+
tools: {
|
|
3306
|
+
listChanged: false
|
|
3307
|
+
}
|
|
3308
|
+
},
|
|
3309
|
+
serverInfo: {
|
|
3310
|
+
name: SERVER_NAME,
|
|
3311
|
+
version: SERVER_VERSION
|
|
3312
|
+
}
|
|
3313
|
+
};
|
|
3314
|
+
}
|
|
3315
|
+
/**
|
|
3316
|
+
* List available tools
|
|
3317
|
+
*/
|
|
3318
|
+
listTools() {
|
|
3319
|
+
const tools = MEMORY_TOOLS.map((tool) => ({
|
|
3320
|
+
name: tool.name,
|
|
3321
|
+
description: tool.description,
|
|
3322
|
+
inputSchema: tool.inputSchema
|
|
3323
|
+
}));
|
|
3324
|
+
return { tools };
|
|
3325
|
+
}
|
|
3326
|
+
/**
|
|
3327
|
+
* Call a tool
|
|
3328
|
+
*/
|
|
3329
|
+
async callTool(params) {
|
|
3330
|
+
if (!params?.name || typeof params.name !== "string") {
|
|
3331
|
+
throw new McpError(-32602, "Missing tool name");
|
|
3332
|
+
}
|
|
3333
|
+
const toolName = params.name;
|
|
3334
|
+
const toolParams = params.arguments ?? {};
|
|
3335
|
+
const result = await this.executor.execute(toolName, toolParams);
|
|
3336
|
+
return result;
|
|
3337
|
+
}
|
|
3338
|
+
};
|
|
3339
|
+
var McpError = class extends Error {
|
|
3340
|
+
constructor(code, message) {
|
|
3341
|
+
super(message);
|
|
3342
|
+
this.code = code;
|
|
3343
|
+
this.name = "McpError";
|
|
3344
|
+
}
|
|
3345
|
+
};
|
|
3346
|
+
function createMcpServer(instances) {
|
|
3347
|
+
return new McpServer(instances);
|
|
3348
|
+
}
|
|
3349
|
+
async function runMcpServer(server) {
|
|
3350
|
+
const rl = readline.createInterface({
|
|
3351
|
+
input: process.stdin,
|
|
3352
|
+
output: process.stdout,
|
|
3353
|
+
terminal: false
|
|
3354
|
+
});
|
|
3355
|
+
const send = (message) => {
|
|
3356
|
+
const json = JSON.stringify(message);
|
|
3357
|
+
process.stdout.write(json + "\n");
|
|
3358
|
+
};
|
|
3359
|
+
rl.on("line", async (line) => {
|
|
3360
|
+
if (!line.trim()) return;
|
|
3361
|
+
try {
|
|
3362
|
+
const request = JSON.parse(line);
|
|
3363
|
+
if (request.jsonrpc !== "2.0") {
|
|
3364
|
+
send({
|
|
3365
|
+
jsonrpc: "2.0",
|
|
3366
|
+
id: request.id ?? null,
|
|
3367
|
+
error: { code: -32600, message: "Invalid JSON-RPC version" }
|
|
3368
|
+
});
|
|
3369
|
+
return;
|
|
3370
|
+
}
|
|
3371
|
+
if (request.id === void 0) {
|
|
3372
|
+
await server.handleRequest({ ...request, id: 0 });
|
|
3373
|
+
return;
|
|
3374
|
+
}
|
|
3375
|
+
const response = await server.handleRequest(request);
|
|
3376
|
+
send(response);
|
|
3377
|
+
} catch (err) {
|
|
3378
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
3379
|
+
send({
|
|
3380
|
+
jsonrpc: "2.0",
|
|
3381
|
+
id: null,
|
|
3382
|
+
error: { code: -32700, message: `Parse error: ${message}` }
|
|
3383
|
+
});
|
|
3384
|
+
}
|
|
3385
|
+
});
|
|
3386
|
+
rl.on("close", () => {
|
|
3387
|
+
process.exit(0);
|
|
3388
|
+
});
|
|
3389
|
+
await new Promise(() => {
|
|
3390
|
+
});
|
|
3391
|
+
}
|
|
3392
|
+
function generateMcpConfig(opts) {
|
|
3393
|
+
return {
|
|
3394
|
+
command: "node",
|
|
3395
|
+
args: [opts.serverPath],
|
|
3396
|
+
env: {
|
|
3397
|
+
MEMORY_DIR: opts.memoryDir,
|
|
3398
|
+
...opts.embeddingProvider ? { EMBEDDING_PROVIDER: opts.embeddingProvider } : {}
|
|
3399
|
+
}
|
|
3400
|
+
};
|
|
3401
|
+
}
|
|
3402
|
+
|
|
3403
|
+
// src/core/indexer.ts
|
|
3404
|
+
var import_node_crypto3 = require("crypto");
|
|
3405
|
+
var import_promises3 = __toESM(require("fs/promises"), 1);
|
|
3406
|
+
var import_node_path4 = __toESM(require("path"), 1);
|
|
3407
|
+
var META_KEY2 = "memory_index_meta_v1";
|
|
3408
|
+
var EMBEDDING_CACHE_TABLE2 = "embedding_cache";
|
|
3409
|
+
var VECTOR_TABLE2 = "chunks_vec";
|
|
3410
|
+
var FTS_TABLE2 = "chunks_fts";
|
|
3411
|
+
var EMBEDDING_RETRY_MAX_ATTEMPTS2 = 3;
|
|
3412
|
+
var EMBEDDING_RETRY_BASE_DELAY_MS2 = 500;
|
|
3413
|
+
var EMBEDDING_RETRY_MAX_DELAY_MS2 = 8e3;
|
|
3414
|
+
var MemoryIndexer = class {
|
|
3415
|
+
config;
|
|
3416
|
+
db;
|
|
3417
|
+
provider;
|
|
3418
|
+
providerKey;
|
|
3419
|
+
openAi;
|
|
3420
|
+
gemini;
|
|
3421
|
+
// Vector/FTS state (shared with parent)
|
|
3422
|
+
vectorState;
|
|
3423
|
+
ftsAvailable;
|
|
3424
|
+
constructor(db, provider, config, options) {
|
|
3425
|
+
this.db = db;
|
|
3426
|
+
this.provider = provider;
|
|
3427
|
+
this.config = config;
|
|
3428
|
+
this.openAi = options?.openAi;
|
|
3429
|
+
this.gemini = options?.gemini;
|
|
3430
|
+
this.vectorState = options?.vectorState ?? { available: false };
|
|
3431
|
+
this.ftsAvailable = options?.ftsAvailable ?? false;
|
|
3432
|
+
this.providerKey = this.computeProviderKey();
|
|
3433
|
+
}
|
|
3434
|
+
/**
|
|
3435
|
+
* Update vector/FTS availability (called by parent when extensions load)
|
|
3436
|
+
*/
|
|
3437
|
+
setVectorState(state) {
|
|
3438
|
+
this.vectorState = state;
|
|
3439
|
+
}
|
|
3440
|
+
setFtsAvailable(available) {
|
|
3441
|
+
this.ftsAvailable = available;
|
|
3442
|
+
}
|
|
3443
|
+
getVectorDims() {
|
|
3444
|
+
return this.vectorState.dims;
|
|
3445
|
+
}
|
|
3446
|
+
/**
|
|
3447
|
+
* Compute a unique key for the current provider configuration
|
|
3448
|
+
*/
|
|
3449
|
+
computeProviderKey() {
|
|
3450
|
+
const parts = [this.provider.id, this.provider.model];
|
|
3451
|
+
if (this.openAi) {
|
|
3452
|
+
parts.push(this.openAi.baseUrl);
|
|
3453
|
+
}
|
|
3454
|
+
if (this.gemini) {
|
|
3455
|
+
parts.push(this.gemini.baseUrl);
|
|
3456
|
+
}
|
|
3457
|
+
return hashText(parts.join(":"));
|
|
3458
|
+
}
|
|
3459
|
+
/**
|
|
3460
|
+
* Read index metadata from database
|
|
3461
|
+
*/
|
|
3462
|
+
readMeta() {
|
|
3463
|
+
try {
|
|
3464
|
+
const row = this.db.prepare(`SELECT value FROM meta WHERE key = ?`).get(META_KEY2);
|
|
3465
|
+
if (!row?.value) return null;
|
|
3466
|
+
return JSON.parse(row.value);
|
|
3467
|
+
} catch {
|
|
3468
|
+
return null;
|
|
3469
|
+
}
|
|
3470
|
+
}
|
|
3471
|
+
/**
|
|
3472
|
+
* Write index metadata to database
|
|
3473
|
+
*/
|
|
3474
|
+
writeMeta(meta) {
|
|
3475
|
+
this.db.prepare(`INSERT OR REPLACE INTO meta (key, value) VALUES (?, ?)`).run(META_KEY2, JSON.stringify(meta));
|
|
3476
|
+
}
|
|
3477
|
+
/**
|
|
3478
|
+
* Check if the index is stale by comparing file mtimes
|
|
3479
|
+
*/
|
|
3480
|
+
async isStale() {
|
|
3481
|
+
try {
|
|
3482
|
+
const files = await listMemoryFiles(this.config.memoryDir);
|
|
3483
|
+
const stored = this.db.prepare(`SELECT path, mtime FROM files WHERE source = ?`).all("memory");
|
|
3484
|
+
if (files.length !== stored.length) {
|
|
3485
|
+
this.config.debug?.(`Stale: file count changed (${stored.length} -> ${files.length})`);
|
|
3486
|
+
return true;
|
|
3487
|
+
}
|
|
3488
|
+
const storedMap = new Map(stored.map((f) => [f.path, f.mtime]));
|
|
3489
|
+
for (const absPath of files) {
|
|
3490
|
+
const relPath = import_node_path4.default.relative(this.config.memoryDir, absPath).replace(/\\/g, "/");
|
|
3491
|
+
const storedMtime = storedMap.get(relPath);
|
|
3492
|
+
if (storedMtime === void 0) {
|
|
3493
|
+
this.config.debug?.(`Stale: new file ${relPath}`);
|
|
3494
|
+
return true;
|
|
3495
|
+
}
|
|
3496
|
+
const stat = await import_promises3.default.stat(absPath);
|
|
3497
|
+
const currentMtime = Math.floor(stat.mtimeMs);
|
|
3498
|
+
if (currentMtime !== storedMtime) {
|
|
3499
|
+
this.config.debug?.(`Stale: mtime changed for ${relPath}`);
|
|
3500
|
+
return true;
|
|
3501
|
+
}
|
|
3502
|
+
}
|
|
3503
|
+
return false;
|
|
3504
|
+
} catch (err) {
|
|
3505
|
+
this.config.debug?.(`Stale check failed: ${String(err)}`);
|
|
3506
|
+
return true;
|
|
3507
|
+
}
|
|
3508
|
+
}
|
|
3509
|
+
/**
|
|
3510
|
+
* Check if a full reindex is needed based on configuration changes
|
|
3511
|
+
*/
|
|
3512
|
+
needsFullReindex(force) {
|
|
3513
|
+
const meta = this.readMeta();
|
|
3514
|
+
return force === true || !meta || meta.model !== this.provider.model || meta.provider !== this.provider.id || meta.providerKey !== this.providerKey || meta.chunkTokens !== this.config.chunking.tokens || meta.chunkOverlap !== this.config.chunking.overlap || this.vectorState.available && !meta?.vectorDims;
|
|
3515
|
+
}
|
|
3516
|
+
/**
|
|
3517
|
+
* Index all memory files, returns stats
|
|
3518
|
+
*/
|
|
3519
|
+
async indexAll(force) {
|
|
3520
|
+
const needsFullReindex = this.needsFullReindex(force);
|
|
3521
|
+
const files = await listMemoryFiles(this.config.memoryDir);
|
|
3522
|
+
const activePaths = /* @__PURE__ */ new Set();
|
|
3523
|
+
let filesProcessed = 0;
|
|
3524
|
+
let chunksCreated = 0;
|
|
3525
|
+
for (const absPath of files) {
|
|
3526
|
+
const entry = await buildFileEntry(absPath, this.config.memoryDir);
|
|
3527
|
+
activePaths.add(entry.path);
|
|
3528
|
+
const record = this.db.prepare(`SELECT hash FROM files WHERE path = ? AND source = ?`).get(entry.path, "memory");
|
|
3529
|
+
if (!needsFullReindex && record?.hash === entry.hash) {
|
|
3530
|
+
continue;
|
|
3531
|
+
}
|
|
3532
|
+
const chunkCount = await this.indexFile(entry);
|
|
3533
|
+
filesProcessed++;
|
|
3534
|
+
chunksCreated += chunkCount;
|
|
3535
|
+
}
|
|
3536
|
+
const staleRemoved = this.removeStaleEntries(activePaths);
|
|
3537
|
+
this.writeMeta({
|
|
3538
|
+
model: this.provider.model,
|
|
3539
|
+
provider: this.provider.id,
|
|
3540
|
+
providerKey: this.providerKey,
|
|
3541
|
+
chunkTokens: this.config.chunking.tokens,
|
|
3542
|
+
chunkOverlap: this.config.chunking.overlap,
|
|
3543
|
+
vectorDims: this.vectorState.dims
|
|
3544
|
+
});
|
|
3545
|
+
this.pruneEmbeddingCacheIfNeeded();
|
|
3546
|
+
return { filesProcessed, chunksCreated, staleRemoved };
|
|
3547
|
+
}
|
|
3548
|
+
/**
|
|
3549
|
+
* Index a single file
|
|
3550
|
+
*/
|
|
3551
|
+
async indexFile(entry) {
|
|
3552
|
+
const content = await import_promises3.default.readFile(entry.absPath, "utf-8");
|
|
3553
|
+
const chunks = chunkMarkdown(content, this.config.chunking);
|
|
3554
|
+
const embeddings = await this.embedChunks(chunks);
|
|
3555
|
+
this.db.prepare(
|
|
3556
|
+
`INSERT OR REPLACE INTO files (path, source, hash, mtime, size) VALUES (?, ?, ?, ?, ?)`
|
|
3557
|
+
).run(entry.path, "memory", entry.hash, Math.floor(entry.mtimeMs), entry.size);
|
|
3558
|
+
this.deleteChunksForFile(entry.path);
|
|
3559
|
+
const now = Date.now();
|
|
3560
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
3561
|
+
const chunk = chunks[i];
|
|
3562
|
+
const embedding = embeddings[i] ?? [];
|
|
3563
|
+
this.insertChunk(entry.path, chunk, embedding, now);
|
|
3564
|
+
}
|
|
3565
|
+
return chunks.length;
|
|
3566
|
+
}
|
|
3567
|
+
/**
|
|
3568
|
+
* Delete all chunks for a file
|
|
3569
|
+
*/
|
|
3570
|
+
deleteChunksForFile(filePath) {
|
|
3571
|
+
try {
|
|
3572
|
+
this.db.prepare(
|
|
3573
|
+
`DELETE FROM ${VECTOR_TABLE2} WHERE id IN (SELECT id FROM chunks WHERE path = ? AND source = ?)`
|
|
3574
|
+
).run(filePath, "memory");
|
|
3575
|
+
} catch {
|
|
3576
|
+
}
|
|
3577
|
+
this.db.prepare(`DELETE FROM chunks WHERE path = ? AND source = ?`).run(filePath, "memory");
|
|
3578
|
+
if (this.config.ftsEnabled && this.ftsAvailable) {
|
|
3579
|
+
try {
|
|
3580
|
+
this.db.prepare(`DELETE FROM ${FTS_TABLE2} WHERE path = ? AND source = ? AND model = ?`).run(filePath, "memory", this.provider.model);
|
|
3581
|
+
} catch {
|
|
3582
|
+
}
|
|
3583
|
+
}
|
|
3584
|
+
}
|
|
3585
|
+
/**
|
|
3586
|
+
* Insert a chunk into the database
|
|
3587
|
+
*/
|
|
3588
|
+
insertChunk(filePath, chunk, embedding, timestamp) {
|
|
3589
|
+
const chunkId = (0, import_node_crypto3.randomUUID)();
|
|
3590
|
+
this.db.prepare(
|
|
3591
|
+
`INSERT INTO chunks (id, path, source, start_line, end_line, hash, model, text, embedding, updated_at)
|
|
3592
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
3593
|
+
).run(
|
|
3594
|
+
chunkId,
|
|
3595
|
+
filePath,
|
|
3596
|
+
"memory",
|
|
3597
|
+
chunk.startLine,
|
|
3598
|
+
chunk.endLine,
|
|
3599
|
+
chunk.hash,
|
|
3600
|
+
this.provider.model,
|
|
3601
|
+
chunk.text,
|
|
3602
|
+
JSON.stringify(embedding),
|
|
3603
|
+
timestamp
|
|
3604
|
+
);
|
|
3605
|
+
if (this.vectorState.available && embedding.length > 0) {
|
|
3606
|
+
if (!this.vectorState.dims) {
|
|
3607
|
+
this.vectorState.dims = embedding.length;
|
|
3608
|
+
this.ensureVectorTable(embedding.length);
|
|
3609
|
+
}
|
|
3610
|
+
try {
|
|
3611
|
+
this.db.prepare(`INSERT INTO ${VECTOR_TABLE2} (id, embedding) VALUES (?, ?)`).run(chunkId, vectorToBlob(embedding));
|
|
3612
|
+
} catch {
|
|
3613
|
+
}
|
|
3614
|
+
}
|
|
3615
|
+
if (this.config.ftsEnabled && this.ftsAvailable) {
|
|
3616
|
+
try {
|
|
3617
|
+
this.db.prepare(
|
|
3618
|
+
`INSERT INTO ${FTS_TABLE2} (text, id, path, source, model, start_line, end_line)
|
|
3619
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
3620
|
+
).run(
|
|
3621
|
+
chunk.text,
|
|
3622
|
+
chunkId,
|
|
3623
|
+
filePath,
|
|
3624
|
+
"memory",
|
|
3625
|
+
this.provider.model,
|
|
3626
|
+
chunk.startLine,
|
|
3627
|
+
chunk.endLine
|
|
3628
|
+
);
|
|
3629
|
+
} catch {
|
|
3630
|
+
}
|
|
3631
|
+
}
|
|
3632
|
+
}
|
|
3633
|
+
/**
|
|
3634
|
+
* Remove stale file entries that no longer exist
|
|
3635
|
+
*/
|
|
3636
|
+
removeStaleEntries(activePaths) {
|
|
3637
|
+
const staleRows = this.db.prepare(`SELECT path FROM files WHERE source = ?`).all("memory");
|
|
3638
|
+
let removed = 0;
|
|
3639
|
+
for (const stale of staleRows) {
|
|
3640
|
+
if (activePaths.has(stale.path)) continue;
|
|
3641
|
+
this.db.prepare(`DELETE FROM files WHERE path = ? AND source = ?`).run(stale.path, "memory");
|
|
3642
|
+
this.deleteChunksForFile(stale.path);
|
|
3643
|
+
removed++;
|
|
3644
|
+
}
|
|
3645
|
+
return removed;
|
|
3646
|
+
}
|
|
3647
|
+
/**
|
|
3648
|
+
* Create vector table with the given dimensions
|
|
3649
|
+
*/
|
|
3650
|
+
ensureVectorTable(dimensions) {
|
|
3651
|
+
if (!this.vectorState.available) return;
|
|
3652
|
+
try {
|
|
3653
|
+
this.db.exec(
|
|
3654
|
+
`CREATE VIRTUAL TABLE IF NOT EXISTS ${VECTOR_TABLE2} USING vec0(
|
|
3655
|
+
id TEXT PRIMARY KEY,
|
|
3656
|
+
embedding FLOAT[${dimensions}]
|
|
3657
|
+
)`
|
|
3658
|
+
);
|
|
3659
|
+
} catch (err) {
|
|
3660
|
+
this.config.debug?.(`vector table creation failed: ${String(err)}`);
|
|
3661
|
+
}
|
|
3662
|
+
}
|
|
3663
|
+
/**
|
|
3664
|
+
* Get embeddings for chunks, using cache when available
|
|
3665
|
+
*/
|
|
3666
|
+
async embedChunks(chunks) {
|
|
3667
|
+
if (chunks.length === 0) return [];
|
|
3668
|
+
const hashes = chunks.map((c) => c.hash);
|
|
3669
|
+
const cached = this.loadEmbeddingCache(hashes);
|
|
3670
|
+
const missing = [];
|
|
3671
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
3672
|
+
if (!cached.has(hashes[i])) {
|
|
3673
|
+
missing.push({ index: i, chunk: chunks[i] });
|
|
3674
|
+
}
|
|
3675
|
+
}
|
|
3676
|
+
if (missing.length > 0) {
|
|
3677
|
+
const texts = missing.map((m) => m.chunk.text);
|
|
3678
|
+
const newEmbeddings = await this.embedBatchWithRetry(texts);
|
|
3679
|
+
for (let i = 0; i < missing.length; i++) {
|
|
3680
|
+
const hash = missing[i].chunk.hash;
|
|
3681
|
+
const embedding = newEmbeddings[i] ?? [];
|
|
3682
|
+
cached.set(hash, embedding);
|
|
3683
|
+
this.upsertEmbeddingCache(hash, embedding);
|
|
3684
|
+
}
|
|
3685
|
+
}
|
|
3686
|
+
return hashes.map((h) => cached.get(h) ?? []);
|
|
3687
|
+
}
|
|
3688
|
+
/**
|
|
3689
|
+
* Embed texts with retry logic
|
|
3690
|
+
*/
|
|
3691
|
+
async embedBatchWithRetry(texts) {
|
|
3692
|
+
if (texts.length === 0) return [];
|
|
3693
|
+
if (this.config.batch.enabled) {
|
|
3694
|
+
try {
|
|
3695
|
+
return await this.embedWithBatchApi(texts);
|
|
3696
|
+
} catch (err) {
|
|
3697
|
+
this.config.debug?.(`batch embedding failed, falling back to direct: ${String(err)}`);
|
|
3698
|
+
}
|
|
3699
|
+
}
|
|
3700
|
+
let lastError = null;
|
|
3701
|
+
for (let attempt = 0; attempt < EMBEDDING_RETRY_MAX_ATTEMPTS2; attempt++) {
|
|
3702
|
+
try {
|
|
3703
|
+
return await this.provider.embedBatch(texts);
|
|
3704
|
+
} catch (err) {
|
|
3705
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
3706
|
+
if (attempt < EMBEDDING_RETRY_MAX_ATTEMPTS2 - 1) {
|
|
3707
|
+
const delay = Math.min(
|
|
3708
|
+
EMBEDDING_RETRY_MAX_DELAY_MS2,
|
|
3709
|
+
EMBEDDING_RETRY_BASE_DELAY_MS2 * Math.pow(2, attempt)
|
|
3710
|
+
);
|
|
3711
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
3712
|
+
}
|
|
3713
|
+
}
|
|
3714
|
+
}
|
|
3715
|
+
throw lastError;
|
|
3716
|
+
}
|
|
3717
|
+
/**
|
|
3718
|
+
* Use batch API for large embedding jobs
|
|
3719
|
+
*/
|
|
3720
|
+
async embedWithBatchApi(texts) {
|
|
3721
|
+
if (this.openAi) {
|
|
3722
|
+
const requests = texts.map((text, i) => ({
|
|
3723
|
+
custom_id: `chunk-${i}`,
|
|
3724
|
+
method: "POST",
|
|
3725
|
+
url: OPENAI_BATCH_ENDPOINT,
|
|
3726
|
+
body: { model: this.openAi.model, input: text }
|
|
3727
|
+
}));
|
|
3728
|
+
const results = await runOpenAiEmbeddingBatches({
|
|
3729
|
+
openAi: this.openAi,
|
|
3730
|
+
source: "minimem",
|
|
3731
|
+
requests,
|
|
3732
|
+
wait: this.config.batch.wait,
|
|
3733
|
+
pollIntervalMs: this.config.batch.pollIntervalMs,
|
|
3734
|
+
timeoutMs: this.config.batch.timeoutMs,
|
|
3735
|
+
concurrency: this.config.batch.concurrency,
|
|
3736
|
+
debug: this.config.debug
|
|
3737
|
+
});
|
|
3738
|
+
return texts.map((_, i) => results.get(`chunk-${i}`) ?? []);
|
|
3739
|
+
}
|
|
3740
|
+
if (this.gemini) {
|
|
3741
|
+
const requests = texts.map((text, i) => ({
|
|
3742
|
+
custom_id: `chunk-${i}`,
|
|
3743
|
+
content: { parts: [{ text }] },
|
|
3744
|
+
taskType: "RETRIEVAL_DOCUMENT"
|
|
3745
|
+
}));
|
|
3746
|
+
const results = await runGeminiEmbeddingBatches({
|
|
3747
|
+
gemini: this.gemini,
|
|
3748
|
+
source: "minimem",
|
|
3749
|
+
requests,
|
|
3750
|
+
wait: this.config.batch.wait,
|
|
3751
|
+
pollIntervalMs: this.config.batch.pollIntervalMs,
|
|
3752
|
+
timeoutMs: this.config.batch.timeoutMs,
|
|
3753
|
+
concurrency: this.config.batch.concurrency,
|
|
3754
|
+
debug: this.config.debug
|
|
3755
|
+
});
|
|
3756
|
+
return texts.map((_, i) => results.get(`chunk-${i}`) ?? []);
|
|
3757
|
+
}
|
|
3758
|
+
throw new Error("Batch API not available for local embeddings");
|
|
3759
|
+
}
|
|
3760
|
+
/**
|
|
3761
|
+
* Load embeddings from cache
|
|
3762
|
+
*/
|
|
3763
|
+
loadEmbeddingCache(hashes) {
|
|
3764
|
+
const result = /* @__PURE__ */ new Map();
|
|
3765
|
+
if (!this.config.cache.enabled || hashes.length === 0) return result;
|
|
3766
|
+
const placeholders = hashes.map(() => "?").join(",");
|
|
3767
|
+
const rows = this.db.prepare(
|
|
3768
|
+
`SELECT hash, embedding FROM ${EMBEDDING_CACHE_TABLE2}
|
|
3769
|
+
WHERE provider = ? AND model = ? AND provider_key = ? AND hash IN (${placeholders})`
|
|
3770
|
+
).all(this.provider.id, this.provider.model, this.providerKey, ...hashes);
|
|
3771
|
+
const now = Date.now();
|
|
3772
|
+
for (const row of rows) {
|
|
3773
|
+
result.set(row.hash, parseEmbedding(row.embedding));
|
|
3774
|
+
this.db.prepare(
|
|
3775
|
+
`UPDATE ${EMBEDDING_CACHE_TABLE2} SET updated_at = ?
|
|
3776
|
+
WHERE provider = ? AND model = ? AND provider_key = ? AND hash = ?`
|
|
3777
|
+
).run(now, this.provider.id, this.provider.model, this.providerKey, row.hash);
|
|
3778
|
+
}
|
|
3779
|
+
return result;
|
|
3780
|
+
}
|
|
3781
|
+
/**
|
|
3782
|
+
* Save embedding to cache
|
|
3783
|
+
*/
|
|
3784
|
+
upsertEmbeddingCache(hash, embedding) {
|
|
3785
|
+
if (!this.config.cache.enabled) return;
|
|
3786
|
+
const now = Date.now();
|
|
3787
|
+
this.db.prepare(
|
|
3788
|
+
`INSERT OR REPLACE INTO ${EMBEDDING_CACHE_TABLE2}
|
|
3789
|
+
(provider, model, provider_key, hash, embedding, dims, updated_at)
|
|
3790
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`
|
|
3791
|
+
).run(
|
|
3792
|
+
this.provider.id,
|
|
3793
|
+
this.provider.model,
|
|
3794
|
+
this.providerKey,
|
|
3795
|
+
hash,
|
|
3796
|
+
JSON.stringify(embedding),
|
|
3797
|
+
embedding.length,
|
|
3798
|
+
now
|
|
3799
|
+
);
|
|
3800
|
+
}
|
|
3801
|
+
/**
|
|
3802
|
+
* Prune old cache entries if over limit
|
|
3803
|
+
*/
|
|
3804
|
+
pruneEmbeddingCacheIfNeeded() {
|
|
3805
|
+
if (!this.config.cache.enabled) return;
|
|
3806
|
+
const row = this.db.prepare(`SELECT COUNT(*) as count FROM ${EMBEDDING_CACHE_TABLE2}`).get();
|
|
3807
|
+
if (row.count <= this.config.cache.maxEntries) return;
|
|
3808
|
+
const excess = row.count - this.config.cache.maxEntries;
|
|
3809
|
+
this.db.prepare(
|
|
3810
|
+
`DELETE FROM ${EMBEDDING_CACHE_TABLE2}
|
|
3811
|
+
WHERE rowid IN (
|
|
3812
|
+
SELECT rowid FROM ${EMBEDDING_CACHE_TABLE2}
|
|
3813
|
+
ORDER BY updated_at ASC
|
|
3814
|
+
LIMIT ?
|
|
3815
|
+
)`
|
|
3816
|
+
).run(excess);
|
|
3817
|
+
}
|
|
3818
|
+
};
|
|
3819
|
+
|
|
3820
|
+
// src/core/searcher.ts
|
|
3821
|
+
var SNIPPET_MAX_CHARS2 = 700;
|
|
3822
|
+
var VECTOR_TABLE3 = "chunks_vec";
|
|
3823
|
+
var FTS_TABLE3 = "chunks_fts";
|
|
3824
|
+
var EMBEDDING_QUERY_TIMEOUT_REMOTE_MS2 = 6e4;
|
|
3825
|
+
var EMBEDDING_QUERY_TIMEOUT_LOCAL_MS2 = 5 * 6e4;
|
|
3826
|
+
var MemorySearcher = class {
|
|
3827
|
+
db;
|
|
3828
|
+
provider;
|
|
3829
|
+
config;
|
|
3830
|
+
// State from parent
|
|
3831
|
+
vectorState;
|
|
3832
|
+
ftsAvailable;
|
|
3833
|
+
// Callback to ensure vector is ready
|
|
3834
|
+
ensureVectorReadyFn;
|
|
3835
|
+
constructor(db, provider, config, options) {
|
|
3836
|
+
this.db = db;
|
|
3837
|
+
this.provider = provider;
|
|
3838
|
+
this.config = config;
|
|
3839
|
+
this.vectorState = options?.vectorState ?? { available: false };
|
|
3840
|
+
this.ftsAvailable = options?.ftsAvailable ?? false;
|
|
3841
|
+
this.ensureVectorReadyFn = options?.ensureVectorReady;
|
|
3842
|
+
}
|
|
3843
|
+
/**
|
|
3844
|
+
* Update vector/FTS availability (called by parent when extensions load)
|
|
3845
|
+
*/
|
|
3846
|
+
setVectorState(state) {
|
|
3847
|
+
this.vectorState = state;
|
|
3848
|
+
}
|
|
3849
|
+
setFtsAvailable(available) {
|
|
3850
|
+
this.ftsAvailable = available;
|
|
3851
|
+
}
|
|
3852
|
+
/**
|
|
3853
|
+
* Execute a search query
|
|
3854
|
+
*/
|
|
3855
|
+
async search(query, opts) {
|
|
3856
|
+
const cleaned = query.trim();
|
|
3857
|
+
if (!cleaned) return [];
|
|
3858
|
+
const minScore = opts?.minScore ?? this.config.query.minScore;
|
|
3859
|
+
const maxResults = opts?.maxResults ?? this.config.query.maxResults;
|
|
3860
|
+
const candidates = Math.min(
|
|
3861
|
+
200,
|
|
3862
|
+
Math.max(1, Math.floor(maxResults * this.config.hybrid.candidateMultiplier))
|
|
3863
|
+
);
|
|
3864
|
+
const sourceFilter = { sql: "", params: [] };
|
|
3865
|
+
const keywordResults = this.config.hybrid.enabled && this.ftsAvailable ? await searchKeyword({
|
|
3866
|
+
db: this.db,
|
|
3867
|
+
ftsTable: FTS_TABLE3,
|
|
3868
|
+
providerModel: this.provider.model,
|
|
3869
|
+
query: cleaned,
|
|
3870
|
+
limit: candidates,
|
|
3871
|
+
snippetMaxChars: SNIPPET_MAX_CHARS2,
|
|
3872
|
+
sourceFilter,
|
|
3873
|
+
buildFtsQuery,
|
|
3874
|
+
bm25RankToScore
|
|
3875
|
+
}).catch(() => []) : [];
|
|
3876
|
+
const queryVec = await this.embedQueryWithTimeout(cleaned);
|
|
3877
|
+
const hasVector = queryVec.some((v) => v !== 0);
|
|
3878
|
+
const vectorResults = hasVector ? await searchVector({
|
|
3879
|
+
db: this.db,
|
|
3880
|
+
vectorTable: VECTOR_TABLE3,
|
|
3881
|
+
providerModel: this.provider.model,
|
|
3882
|
+
queryVec,
|
|
3883
|
+
limit: candidates,
|
|
3884
|
+
snippetMaxChars: SNIPPET_MAX_CHARS2,
|
|
3885
|
+
ensureVectorReady: (dims) => this.ensureVectorReady(dims),
|
|
3886
|
+
sourceFilterVec: sourceFilter,
|
|
3887
|
+
sourceFilterChunks: sourceFilter
|
|
3888
|
+
}).catch(() => []) : [];
|
|
3889
|
+
if (!this.config.hybrid.enabled) {
|
|
3890
|
+
return vectorResults.filter((entry) => entry.score >= minScore).slice(0, maxResults).map((r) => ({
|
|
3891
|
+
path: r.path,
|
|
3892
|
+
startLine: r.startLine,
|
|
3893
|
+
endLine: r.endLine,
|
|
3894
|
+
score: r.score,
|
|
3895
|
+
snippet: r.snippet
|
|
3896
|
+
}));
|
|
3897
|
+
}
|
|
3898
|
+
const merged = mergeHybridResults({
|
|
3899
|
+
vector: vectorResults.map((r) => ({
|
|
3900
|
+
id: r.id,
|
|
3901
|
+
path: r.path,
|
|
3902
|
+
startLine: r.startLine,
|
|
3903
|
+
endLine: r.endLine,
|
|
3904
|
+
source: r.source,
|
|
3905
|
+
snippet: r.snippet,
|
|
3906
|
+
vectorScore: r.score
|
|
3907
|
+
})),
|
|
3908
|
+
keyword: keywordResults.map((r) => ({
|
|
3909
|
+
id: r.id,
|
|
3910
|
+
path: r.path,
|
|
3911
|
+
startLine: r.startLine,
|
|
3912
|
+
endLine: r.endLine,
|
|
3913
|
+
source: r.source,
|
|
3914
|
+
snippet: r.snippet,
|
|
3915
|
+
textScore: r.textScore
|
|
3916
|
+
})),
|
|
3917
|
+
vectorWeight: this.config.hybrid.vectorWeight,
|
|
3918
|
+
textWeight: this.config.hybrid.textWeight
|
|
3919
|
+
});
|
|
3920
|
+
return merged.filter((entry) => entry.score >= minScore).slice(0, maxResults).map((r) => ({
|
|
3921
|
+
path: r.path,
|
|
3922
|
+
startLine: r.startLine,
|
|
3923
|
+
endLine: r.endLine,
|
|
3924
|
+
score: r.score,
|
|
3925
|
+
snippet: r.snippet
|
|
3926
|
+
}));
|
|
3927
|
+
}
|
|
3928
|
+
/**
|
|
3929
|
+
* Embed a query string with timeout
|
|
3930
|
+
*/
|
|
3931
|
+
async embedQueryWithTimeout(text) {
|
|
3932
|
+
const timeout = this.provider.id === "local" ? EMBEDDING_QUERY_TIMEOUT_LOCAL_MS2 : EMBEDDING_QUERY_TIMEOUT_REMOTE_MS2;
|
|
3933
|
+
return Promise.race([
|
|
3934
|
+
this.provider.embedQuery(text),
|
|
3935
|
+
new Promise(
|
|
3936
|
+
(_, reject) => setTimeout(() => reject(new Error("embedding query timeout")), timeout)
|
|
3937
|
+
)
|
|
3938
|
+
]);
|
|
3939
|
+
}
|
|
3940
|
+
/**
|
|
3941
|
+
* Ensure vector extension is ready
|
|
3942
|
+
*/
|
|
3943
|
+
async ensureVectorReady(dims) {
|
|
3944
|
+
if (this.vectorState.available) return true;
|
|
3945
|
+
if (this.ensureVectorReadyFn) {
|
|
3946
|
+
return this.ensureVectorReadyFn(dims);
|
|
3947
|
+
}
|
|
3948
|
+
return false;
|
|
3949
|
+
}
|
|
3950
|
+
};
|
|
3951
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
3952
|
+
0 && (module.exports = {
|
|
3953
|
+
KNOWLEDGE_GRAPH_TOOL,
|
|
3954
|
+
KNOWLEDGE_PATH_TOOL,
|
|
3955
|
+
KNOWLEDGE_SEARCH_TOOL,
|
|
3956
|
+
MEMORY_GET_DETAILS_TOOL,
|
|
3957
|
+
MEMORY_SEARCH_TOOL,
|
|
3958
|
+
MEMORY_TOOLS,
|
|
3959
|
+
McpServer,
|
|
3960
|
+
MemoryIndexer,
|
|
3961
|
+
MemorySearcher,
|
|
3962
|
+
MemoryToolExecutor,
|
|
3963
|
+
Minimem,
|
|
3964
|
+
addFrontmatter,
|
|
3965
|
+
addSessionToContent,
|
|
3966
|
+
buildFileEntry,
|
|
3967
|
+
buildKnowledgeFilterSql,
|
|
3968
|
+
chunkMarkdown,
|
|
3969
|
+
cosineSimilarity,
|
|
3970
|
+
createEmbeddingProvider,
|
|
3971
|
+
createGeminiEmbeddingProvider,
|
|
3972
|
+
createMcpServer,
|
|
3973
|
+
createOpenAiEmbeddingProvider,
|
|
3974
|
+
createToolExecutor,
|
|
3975
|
+
extractChunkMetadata,
|
|
3976
|
+
extractSession,
|
|
3977
|
+
generateMcpConfig,
|
|
3978
|
+
getLinksFrom,
|
|
3979
|
+
getLinksTo,
|
|
3980
|
+
getNeighbors,
|
|
3981
|
+
getPathBetween,
|
|
3982
|
+
getToolDefinitions,
|
|
3983
|
+
hashText,
|
|
3984
|
+
isMemoryPath,
|
|
3985
|
+
listMemoryFiles,
|
|
3986
|
+
parseFrontmatter,
|
|
3987
|
+
runGeminiEmbeddingBatches,
|
|
3988
|
+
runMcpServer,
|
|
3989
|
+
runOpenAiEmbeddingBatches,
|
|
3990
|
+
serializeFrontmatter,
|
|
3991
|
+
stripPrivateContent
|
|
3992
|
+
});
|
|
3993
|
+
//# sourceMappingURL=index.cjs.map
|