@strvmarv/total-recall 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +18 -0
- package/.copilot-plugin/plugin.json +12 -0
- package/.cursor-plugin/plugin.json +13 -0
- package/.opencode/INSTALL.md +24 -0
- package/CONTRIBUTING.md +295 -0
- package/LICENSE +21 -0
- package/README.md +239 -0
- package/agents/compactor.md +47 -0
- package/dist/index.js +2554 -0
- package/eval/benchmarks/retrieval.jsonl +20 -0
- package/eval/corpus/memories.jsonl +20 -0
- package/hooks/hooks-cursor.json +16 -0
- package/hooks/hooks.json +16 -0
- package/hooks/session-end/run.sh +5 -0
- package/hooks/session-start/run.sh +11 -0
- package/package.json +78 -0
- package/skills/forget/SKILL.md +23 -0
- package/skills/ingest/SKILL.md +20 -0
- package/skills/memory/SKILL.md +60 -0
- package/skills/search/SKILL.md +19 -0
- package/skills/status/SKILL.md +32 -0
- package/src/defaults.toml +28 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,2554 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/index.ts
|
|
4
|
+
import { randomUUID as randomUUID5 } from "crypto";
|
|
5
|
+
|
|
6
|
+
// src/config.ts
|
|
7
|
+
import { readFileSync, existsSync } from "fs";
|
|
8
|
+
import { join } from "path";
|
|
9
|
+
import { parse as parseToml } from "@iarna/toml";
|
|
10
|
+
var DEFAULTS_PATH = new URL("./defaults.toml", import.meta.url);
|
|
11
|
+
function getDataDir() {
|
|
12
|
+
return process.env.TOTAL_RECALL_HOME ?? join(process.env.HOME ?? "~", ".total-recall");
|
|
13
|
+
}
|
|
14
|
+
function loadConfig() {
|
|
15
|
+
const defaultsText = readFileSync(DEFAULTS_PATH, "utf-8");
|
|
16
|
+
const defaults = parseToml(defaultsText);
|
|
17
|
+
const userConfigPath = join(getDataDir(), "config.toml");
|
|
18
|
+
if (existsSync(userConfigPath)) {
|
|
19
|
+
const userText = readFileSync(userConfigPath, "utf-8");
|
|
20
|
+
const userConfig = parseToml(userText);
|
|
21
|
+
return deepMerge(defaults, userConfig);
|
|
22
|
+
}
|
|
23
|
+
return defaults;
|
|
24
|
+
}
|
|
25
|
+
function deepMerge(target, source) {
|
|
26
|
+
const result = { ...target };
|
|
27
|
+
for (const key of Object.keys(source)) {
|
|
28
|
+
if (source[key] !== null && typeof source[key] === "object" && !Array.isArray(source[key]) && typeof target[key] === "object" && target[key] !== null) {
|
|
29
|
+
result[key] = deepMerge(
|
|
30
|
+
target[key],
|
|
31
|
+
source[key]
|
|
32
|
+
);
|
|
33
|
+
} else {
|
|
34
|
+
result[key] = source[key];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return result;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// src/db/connection.ts
|
|
41
|
+
import Database from "better-sqlite3";
|
|
42
|
+
import { mkdirSync, existsSync as existsSync2 } from "fs";
|
|
43
|
+
import { join as join2 } from "path";
|
|
44
|
+
import * as sqliteVec from "sqlite-vec";
|
|
45
|
+
|
|
46
|
+
// src/types.ts
|
|
47
|
+
function tableName(tier, type) {
|
|
48
|
+
const typeStr = type === "memory" ? "memories" : "knowledge";
|
|
49
|
+
return `${tier}_${typeStr}`;
|
|
50
|
+
}
|
|
51
|
+
function vecTableName(tier, type) {
|
|
52
|
+
return `${tableName(tier, type)}_vec`;
|
|
53
|
+
}
|
|
54
|
+
var ALL_TABLE_PAIRS = [
|
|
55
|
+
{ tier: "hot", type: "memory" },
|
|
56
|
+
{ tier: "hot", type: "knowledge" },
|
|
57
|
+
{ tier: "warm", type: "memory" },
|
|
58
|
+
{ tier: "warm", type: "knowledge" },
|
|
59
|
+
{ tier: "cold", type: "memory" },
|
|
60
|
+
{ tier: "cold", type: "knowledge" }
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
// src/db/schema.ts
|
|
64
|
+
var SCHEMA_VERSION = 1;
|
|
65
|
+
function contentTableDDL(name) {
|
|
66
|
+
return `
|
|
67
|
+
CREATE TABLE IF NOT EXISTS ${name} (
|
|
68
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
69
|
+
content TEXT NOT NULL,
|
|
70
|
+
summary TEXT,
|
|
71
|
+
source TEXT,
|
|
72
|
+
source_tool TEXT,
|
|
73
|
+
project TEXT,
|
|
74
|
+
tags TEXT DEFAULT '[]',
|
|
75
|
+
created_at INTEGER NOT NULL,
|
|
76
|
+
updated_at INTEGER NOT NULL,
|
|
77
|
+
last_accessed_at INTEGER NOT NULL,
|
|
78
|
+
access_count INTEGER DEFAULT 0,
|
|
79
|
+
decay_score REAL DEFAULT 1.0,
|
|
80
|
+
parent_id TEXT,
|
|
81
|
+
collection_id TEXT,
|
|
82
|
+
metadata TEXT DEFAULT '{}'
|
|
83
|
+
)
|
|
84
|
+
`;
|
|
85
|
+
}
|
|
86
|
+
function contentTableIndexes(name) {
|
|
87
|
+
return [
|
|
88
|
+
`CREATE INDEX IF NOT EXISTS idx_${name}_project ON ${name}(project)`,
|
|
89
|
+
`CREATE INDEX IF NOT EXISTS idx_${name}_decay_score ON ${name}(decay_score)`,
|
|
90
|
+
`CREATE INDEX IF NOT EXISTS idx_${name}_last_accessed ON ${name}(last_accessed_at)`,
|
|
91
|
+
`CREATE INDEX IF NOT EXISTS idx_${name}_parent_id ON ${name}(parent_id)`,
|
|
92
|
+
`CREATE INDEX IF NOT EXISTS idx_${name}_collection_id ON ${name}(collection_id)`
|
|
93
|
+
];
|
|
94
|
+
}
|
|
95
|
+
var SYSTEM_TABLE_DDLS = [
|
|
96
|
+
`CREATE TABLE IF NOT EXISTS retrieval_events (
|
|
97
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
98
|
+
timestamp INTEGER NOT NULL,
|
|
99
|
+
session_id TEXT NOT NULL,
|
|
100
|
+
query_text TEXT NOT NULL,
|
|
101
|
+
query_source TEXT NOT NULL,
|
|
102
|
+
query_embedding BLOB,
|
|
103
|
+
results TEXT NOT NULL DEFAULT '[]',
|
|
104
|
+
result_count INTEGER NOT NULL DEFAULT 0,
|
|
105
|
+
top_score REAL,
|
|
106
|
+
top_tier TEXT,
|
|
107
|
+
top_content_type TEXT,
|
|
108
|
+
outcome_used INTEGER,
|
|
109
|
+
outcome_signal TEXT,
|
|
110
|
+
config_snapshot_id TEXT NOT NULL,
|
|
111
|
+
latency_ms INTEGER,
|
|
112
|
+
tiers_searched TEXT NOT NULL DEFAULT '[]',
|
|
113
|
+
total_candidates_scanned INTEGER
|
|
114
|
+
)`,
|
|
115
|
+
`CREATE TABLE IF NOT EXISTS compaction_log (
|
|
116
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
117
|
+
timestamp INTEGER NOT NULL,
|
|
118
|
+
session_id TEXT,
|
|
119
|
+
source_tier TEXT NOT NULL,
|
|
120
|
+
target_tier TEXT,
|
|
121
|
+
source_entry_ids TEXT NOT NULL DEFAULT '[]',
|
|
122
|
+
target_entry_id TEXT,
|
|
123
|
+
semantic_drift REAL,
|
|
124
|
+
facts_preserved INTEGER,
|
|
125
|
+
facts_in_original INTEGER,
|
|
126
|
+
preservation_ratio REAL,
|
|
127
|
+
decay_scores TEXT NOT NULL DEFAULT '[]',
|
|
128
|
+
reason TEXT NOT NULL,
|
|
129
|
+
config_snapshot_id TEXT NOT NULL
|
|
130
|
+
)`,
|
|
131
|
+
`CREATE TABLE IF NOT EXISTS config_snapshots (
|
|
132
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
133
|
+
name TEXT,
|
|
134
|
+
timestamp INTEGER NOT NULL,
|
|
135
|
+
config TEXT NOT NULL
|
|
136
|
+
)`,
|
|
137
|
+
`CREATE TABLE IF NOT EXISTS import_log (
|
|
138
|
+
id TEXT PRIMARY KEY NOT NULL,
|
|
139
|
+
timestamp INTEGER NOT NULL,
|
|
140
|
+
source_tool TEXT NOT NULL,
|
|
141
|
+
source_path TEXT NOT NULL,
|
|
142
|
+
content_hash TEXT NOT NULL,
|
|
143
|
+
target_entry_id TEXT NOT NULL,
|
|
144
|
+
target_tier TEXT NOT NULL,
|
|
145
|
+
target_type TEXT NOT NULL
|
|
146
|
+
)`
|
|
147
|
+
];
|
|
148
|
+
var SYSTEM_TABLE_INDEXES = [
|
|
149
|
+
`CREATE INDEX IF NOT EXISTS idx_retrieval_events_timestamp ON retrieval_events(timestamp)`,
|
|
150
|
+
`CREATE INDEX IF NOT EXISTS idx_retrieval_events_session_id ON retrieval_events(session_id)`,
|
|
151
|
+
`CREATE INDEX IF NOT EXISTS idx_compaction_log_timestamp ON compaction_log(timestamp)`,
|
|
152
|
+
`CREATE INDEX IF NOT EXISTS idx_compaction_log_source_tier ON compaction_log(source_tier)`,
|
|
153
|
+
`CREATE INDEX IF NOT EXISTS idx_import_log_content_hash ON import_log(content_hash)`,
|
|
154
|
+
`CREATE INDEX IF NOT EXISTS idx_import_log_source_tool ON import_log(source_tool)`
|
|
155
|
+
];
|
|
156
|
+
var SCHEMA_VERSION_DDL = `
|
|
157
|
+
CREATE TABLE IF NOT EXISTS _schema_version (
|
|
158
|
+
version INTEGER NOT NULL,
|
|
159
|
+
applied_at INTEGER NOT NULL
|
|
160
|
+
)
|
|
161
|
+
`;
|
|
162
|
+
function initSchema(db) {
|
|
163
|
+
db.pragma("journal_mode = WAL");
|
|
164
|
+
db.pragma("foreign_keys = ON");
|
|
165
|
+
const applySchema = db.transaction(() => {
|
|
166
|
+
const versionRow = db.prepare(
|
|
167
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='_schema_version'"
|
|
168
|
+
).get();
|
|
169
|
+
if (versionRow) {
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
db.prepare(SCHEMA_VERSION_DDL).run();
|
|
173
|
+
db.prepare("INSERT INTO _schema_version (version, applied_at) VALUES (?, ?)").run(
|
|
174
|
+
SCHEMA_VERSION,
|
|
175
|
+
Date.now()
|
|
176
|
+
);
|
|
177
|
+
for (const pair of ALL_TABLE_PAIRS) {
|
|
178
|
+
const tbl = tableName(pair.tier, pair.type);
|
|
179
|
+
const vecTbl = vecTableName(pair.tier, pair.type);
|
|
180
|
+
db.prepare(contentTableDDL(tbl)).run();
|
|
181
|
+
db.prepare(
|
|
182
|
+
`CREATE VIRTUAL TABLE IF NOT EXISTS ${vecTbl} USING vec0(embedding float[384])`
|
|
183
|
+
).run();
|
|
184
|
+
for (const idx of contentTableIndexes(tbl)) {
|
|
185
|
+
db.prepare(idx).run();
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
for (const ddl of SYSTEM_TABLE_DDLS) {
|
|
189
|
+
db.prepare(ddl).run();
|
|
190
|
+
}
|
|
191
|
+
for (const idx of SYSTEM_TABLE_INDEXES) {
|
|
192
|
+
db.prepare(idx).run();
|
|
193
|
+
}
|
|
194
|
+
});
|
|
195
|
+
applySchema();
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// src/db/connection.ts
|
|
199
|
+
var _db = null;
|
|
200
|
+
function getDb() {
|
|
201
|
+
if (_db) return _db;
|
|
202
|
+
const dataDir = getDataDir();
|
|
203
|
+
if (!existsSync2(dataDir)) {
|
|
204
|
+
mkdirSync(dataDir, { recursive: true });
|
|
205
|
+
}
|
|
206
|
+
const dbPath = join2(dataDir, "total-recall.db");
|
|
207
|
+
_db = new Database(dbPath);
|
|
208
|
+
sqliteVec.load(_db);
|
|
209
|
+
initSchema(_db);
|
|
210
|
+
return _db;
|
|
211
|
+
}
|
|
212
|
+
function closeDb() {
|
|
213
|
+
if (_db) {
|
|
214
|
+
_db.close();
|
|
215
|
+
_db = null;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// src/embedding/embedder.ts
|
|
220
|
+
import { readFile } from "fs/promises";
|
|
221
|
+
import { join as join4 } from "path";
|
|
222
|
+
import * as ort from "onnxruntime-node";
|
|
223
|
+
|
|
224
|
+
// src/embedding/model-manager.ts
|
|
225
|
+
import { existsSync as existsSync3, mkdirSync as mkdirSync2, readdirSync } from "fs";
|
|
226
|
+
import { writeFile } from "fs/promises";
|
|
227
|
+
import { join as join3 } from "path";
|
|
228
|
+
var HF_BASE_URL = "https://huggingface.co";
|
|
229
|
+
function getModelPath(modelName) {
|
|
230
|
+
return join3(getDataDir(), "models", modelName);
|
|
231
|
+
}
|
|
232
|
+
function isModelDownloaded(modelPath) {
|
|
233
|
+
if (!existsSync3(modelPath)) return false;
|
|
234
|
+
try {
|
|
235
|
+
const files = readdirSync(modelPath);
|
|
236
|
+
return files.some((f) => f.endsWith(".onnx"));
|
|
237
|
+
} catch {
|
|
238
|
+
return false;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
async function downloadModel(modelName) {
|
|
242
|
+
const modelPath = getModelPath(modelName);
|
|
243
|
+
mkdirSync2(modelPath, { recursive: true });
|
|
244
|
+
const files = ["model.onnx", "tokenizer.json", "tokenizer_config.json"];
|
|
245
|
+
const repoUrl = `${HF_BASE_URL}/${modelName}/resolve/main`;
|
|
246
|
+
for (const file of files) {
|
|
247
|
+
const url = `${repoUrl}/${file}`;
|
|
248
|
+
const dest = join3(modelPath, file);
|
|
249
|
+
const response = await fetch(url);
|
|
250
|
+
if (!response.ok) {
|
|
251
|
+
throw new Error(
|
|
252
|
+
`Failed to download ${file} from ${url}: ${response.status} ${response.statusText}`
|
|
253
|
+
);
|
|
254
|
+
}
|
|
255
|
+
const buffer = await response.arrayBuffer();
|
|
256
|
+
await writeFile(dest, Buffer.from(buffer));
|
|
257
|
+
}
|
|
258
|
+
return modelPath;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// src/embedding/embedder.ts
|
|
262
|
+
var CLS_TOKEN_ID = 101;
|
|
263
|
+
var SEP_TOKEN_ID = 102;
|
|
264
|
+
var UNK_TOKEN_ID = 100;
|
|
265
|
+
var MAX_SEQ_LEN = 512;
|
|
266
|
+
var Embedder = class _Embedder {
|
|
267
|
+
options;
|
|
268
|
+
session = null;
|
|
269
|
+
vocab = null;
|
|
270
|
+
constructor(options) {
|
|
271
|
+
this.options = options;
|
|
272
|
+
}
|
|
273
|
+
isLoaded() {
|
|
274
|
+
return this.session !== null && this.vocab !== null;
|
|
275
|
+
}
|
|
276
|
+
async ensureLoaded() {
|
|
277
|
+
if (this.isLoaded()) return;
|
|
278
|
+
const modelPath = getModelPath(this.options.model);
|
|
279
|
+
if (!isModelDownloaded(modelPath)) {
|
|
280
|
+
await downloadModel(this.options.model);
|
|
281
|
+
}
|
|
282
|
+
const onnxPath = join4(modelPath, "model.onnx");
|
|
283
|
+
this.session = await ort.InferenceSession.create(onnxPath);
|
|
284
|
+
const tokenizerPath = join4(modelPath, "tokenizer.json");
|
|
285
|
+
const tokenizerText = await readFile(tokenizerPath, "utf-8");
|
|
286
|
+
const tokenizerJson = JSON.parse(tokenizerText);
|
|
287
|
+
this.vocab = tokenizerJson.model.vocab;
|
|
288
|
+
}
|
|
289
|
+
tokenize(text) {
|
|
290
|
+
if (!this.vocab) throw new Error("Tokenizer not loaded");
|
|
291
|
+
const words = text.toLowerCase().split(/\s+/).filter(Boolean);
|
|
292
|
+
const ids = [CLS_TOKEN_ID];
|
|
293
|
+
for (const word of words) {
|
|
294
|
+
const id = this.vocab[word] ?? UNK_TOKEN_ID;
|
|
295
|
+
ids.push(id);
|
|
296
|
+
if (ids.length >= MAX_SEQ_LEN - 1) break;
|
|
297
|
+
}
|
|
298
|
+
ids.push(SEP_TOKEN_ID);
|
|
299
|
+
return ids;
|
|
300
|
+
}
|
|
301
|
+
async embed(text) {
|
|
302
|
+
await this.ensureLoaded();
|
|
303
|
+
if (!this.session) throw new Error("Session not loaded");
|
|
304
|
+
const inputIds = this.tokenize(text);
|
|
305
|
+
const seqLen = inputIds.length;
|
|
306
|
+
const inputIdsTensor = new ort.Tensor(
|
|
307
|
+
"int64",
|
|
308
|
+
BigInt64Array.from(inputIds.map(BigInt)),
|
|
309
|
+
[1, seqLen]
|
|
310
|
+
);
|
|
311
|
+
const attentionMask = new ort.Tensor(
|
|
312
|
+
"int64",
|
|
313
|
+
BigInt64Array.from(new Array(seqLen).fill(1n)),
|
|
314
|
+
[1, seqLen]
|
|
315
|
+
);
|
|
316
|
+
const tokenTypeIds = new ort.Tensor(
|
|
317
|
+
"int64",
|
|
318
|
+
BigInt64Array.from(new Array(seqLen).fill(0n)),
|
|
319
|
+
[1, seqLen]
|
|
320
|
+
);
|
|
321
|
+
const feeds = {
|
|
322
|
+
input_ids: inputIdsTensor,
|
|
323
|
+
attention_mask: attentionMask,
|
|
324
|
+
token_type_ids: tokenTypeIds
|
|
325
|
+
};
|
|
326
|
+
const results = await this.session.run(feeds);
|
|
327
|
+
const outputKey = Object.keys(results)[0];
|
|
328
|
+
if (!outputKey) throw new Error("No output from model");
|
|
329
|
+
const output = results[outputKey];
|
|
330
|
+
if (!output) throw new Error("Output tensor is undefined");
|
|
331
|
+
const hiddenSize = this.options.dimensions;
|
|
332
|
+
const data = output.data;
|
|
333
|
+
const pooled = new Float32Array(hiddenSize);
|
|
334
|
+
for (let i = 0; i < seqLen; i++) {
|
|
335
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
336
|
+
pooled[j] = pooled[j] + (data[i * hiddenSize + j] ?? 0) / seqLen;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
let norm = 0;
|
|
340
|
+
for (let i = 0; i < hiddenSize; i++) norm += pooled[i] * pooled[i];
|
|
341
|
+
norm = Math.sqrt(norm);
|
|
342
|
+
if (norm > 0) {
|
|
343
|
+
for (let i = 0; i < hiddenSize; i++) pooled[i] = pooled[i] / norm;
|
|
344
|
+
}
|
|
345
|
+
return pooled;
|
|
346
|
+
}
|
|
347
|
+
async embedBatch(texts) {
|
|
348
|
+
const results = [];
|
|
349
|
+
for (const text of texts) {
|
|
350
|
+
results.push(await this.embed(text));
|
|
351
|
+
}
|
|
352
|
+
return results;
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Returns a synchronous embed function suitable for use with ingestion APIs.
|
|
356
|
+
* Uses a hash-based deterministic embedding that does not require ONNX inference,
|
|
357
|
+
* allowing synchronous operation. Call ensureLoaded() before using this.
|
|
358
|
+
*/
|
|
359
|
+
makeSyncEmbedFn() {
|
|
360
|
+
const dimensions = this.options.dimensions;
|
|
361
|
+
return (text) => {
|
|
362
|
+
return _Embedder.hashEmbed(text, dimensions);
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Hash-based deterministic embedding. Does not require the ONNX model.
|
|
367
|
+
* Useful as a sync fallback for ingestion pipelines.
|
|
368
|
+
*/
|
|
369
|
+
static hashEmbed(text, dimensions) {
|
|
370
|
+
const vec = new Float32Array(dimensions);
|
|
371
|
+
let hash = 0;
|
|
372
|
+
for (let i = 0; i < text.length; i++) {
|
|
373
|
+
hash = hash * 31 + text.charCodeAt(i) | 0;
|
|
374
|
+
}
|
|
375
|
+
for (let i = 0; i < dimensions; i++) {
|
|
376
|
+
hash = hash * 1103515245 + 12345 | 0;
|
|
377
|
+
vec[i] = (hash >> 16 & 32767) / 32767 - 0.5;
|
|
378
|
+
}
|
|
379
|
+
let norm = 0;
|
|
380
|
+
for (let i = 0; i < dimensions; i++) norm += vec[i] * vec[i];
|
|
381
|
+
norm = Math.sqrt(norm);
|
|
382
|
+
if (norm > 0) {
|
|
383
|
+
for (let i = 0; i < dimensions; i++) vec[i] = vec[i] / norm;
|
|
384
|
+
}
|
|
385
|
+
return vec;
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Deterministic embedding based on tokenization only (no ONNX inference).
|
|
389
|
+
* Used as fallback when async embed cannot be awaited synchronously.
|
|
390
|
+
* Requires ensureLoaded() to have been called.
|
|
391
|
+
*/
|
|
392
|
+
deterministicEmbed(text) {
|
|
393
|
+
const tokenIds = this.tokenize(text);
|
|
394
|
+
const hiddenSize = this.options.dimensions;
|
|
395
|
+
const vec = new Float32Array(hiddenSize);
|
|
396
|
+
for (let i = 0; i < tokenIds.length; i++) {
|
|
397
|
+
const tokenId = tokenIds[i];
|
|
398
|
+
for (let j = 0; j < hiddenSize; j++) {
|
|
399
|
+
const h = Math.sin(tokenId * (j + 1) / hiddenSize);
|
|
400
|
+
vec[j] = vec[j] + h / tokenIds.length;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
let norm = 0;
|
|
404
|
+
for (let i = 0; i < hiddenSize; i++) norm += vec[i] * vec[i];
|
|
405
|
+
norm = Math.sqrt(norm);
|
|
406
|
+
if (norm > 0) {
|
|
407
|
+
for (let i = 0; i < hiddenSize; i++) vec[i] = vec[i] / norm;
|
|
408
|
+
}
|
|
409
|
+
return vec;
|
|
410
|
+
}
|
|
411
|
+
};
|
|
412
|
+
|
|
413
|
+
// src/tools/registry.ts
|
|
414
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
415
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
416
|
+
import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
|
417
|
+
|
|
418
|
+
// src/db/entries.ts
|
|
419
|
+
import { randomUUID } from "crypto";
|
|
420
|
+
function rowToEntry(row) {
|
|
421
|
+
return {
|
|
422
|
+
id: row.id,
|
|
423
|
+
content: row.content,
|
|
424
|
+
summary: row.summary,
|
|
425
|
+
source: row.source,
|
|
426
|
+
source_tool: row.source_tool,
|
|
427
|
+
project: row.project,
|
|
428
|
+
tags: row.tags ? JSON.parse(row.tags) : [],
|
|
429
|
+
created_at: row.created_at,
|
|
430
|
+
updated_at: row.updated_at,
|
|
431
|
+
last_accessed_at: row.last_accessed_at,
|
|
432
|
+
access_count: row.access_count,
|
|
433
|
+
decay_score: row.decay_score,
|
|
434
|
+
parent_id: row.parent_id,
|
|
435
|
+
collection_id: row.collection_id,
|
|
436
|
+
metadata: row.metadata ? JSON.parse(row.metadata) : {}
|
|
437
|
+
};
|
|
438
|
+
}
|
|
439
|
+
function insertEntry(db, tier, type, opts) {
|
|
440
|
+
const table = tableName(tier, type);
|
|
441
|
+
const id = randomUUID();
|
|
442
|
+
const now = Date.now();
|
|
443
|
+
db.prepare(`
|
|
444
|
+
INSERT INTO ${table}
|
|
445
|
+
(id, content, summary, source, source_tool, project, tags,
|
|
446
|
+
created_at, updated_at, last_accessed_at, access_count,
|
|
447
|
+
decay_score, parent_id, collection_id, metadata)
|
|
448
|
+
VALUES
|
|
449
|
+
(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
450
|
+
`).run(
|
|
451
|
+
id,
|
|
452
|
+
opts.content,
|
|
453
|
+
opts.summary ?? null,
|
|
454
|
+
opts.source ?? null,
|
|
455
|
+
opts.source_tool ?? null,
|
|
456
|
+
opts.project ?? null,
|
|
457
|
+
JSON.stringify(opts.tags ?? []),
|
|
458
|
+
now,
|
|
459
|
+
now,
|
|
460
|
+
now,
|
|
461
|
+
0,
|
|
462
|
+
1,
|
|
463
|
+
opts.parent_id ?? null,
|
|
464
|
+
opts.collection_id ?? null,
|
|
465
|
+
JSON.stringify(opts.metadata ?? {})
|
|
466
|
+
);
|
|
467
|
+
return id;
|
|
468
|
+
}
|
|
469
|
+
function getEntry(db, tier, type, id) {
|
|
470
|
+
const table = tableName(tier, type);
|
|
471
|
+
const row = db.prepare(`SELECT * FROM ${table} WHERE id = ?`).get(id);
|
|
472
|
+
if (!row) return null;
|
|
473
|
+
return rowToEntry(row);
|
|
474
|
+
}
|
|
475
|
+
function updateEntry(db, tier, type, id, opts) {
|
|
476
|
+
const table = tableName(tier, type);
|
|
477
|
+
const now = Date.now();
|
|
478
|
+
const setClauses = ["updated_at = ?"];
|
|
479
|
+
const values = [now];
|
|
480
|
+
if (opts.content !== void 0) {
|
|
481
|
+
setClauses.push("content = ?");
|
|
482
|
+
values.push(opts.content);
|
|
483
|
+
}
|
|
484
|
+
if (opts.summary !== void 0) {
|
|
485
|
+
setClauses.push("summary = ?");
|
|
486
|
+
values.push(opts.summary);
|
|
487
|
+
}
|
|
488
|
+
if (opts.tags !== void 0) {
|
|
489
|
+
setClauses.push("tags = ?");
|
|
490
|
+
values.push(JSON.stringify(opts.tags));
|
|
491
|
+
}
|
|
492
|
+
if (opts.project !== void 0) {
|
|
493
|
+
setClauses.push("project = ?");
|
|
494
|
+
values.push(opts.project);
|
|
495
|
+
}
|
|
496
|
+
if (opts.decay_score !== void 0) {
|
|
497
|
+
setClauses.push("decay_score = ?");
|
|
498
|
+
values.push(opts.decay_score);
|
|
499
|
+
}
|
|
500
|
+
if (opts.metadata !== void 0) {
|
|
501
|
+
setClauses.push("metadata = ?");
|
|
502
|
+
values.push(JSON.stringify(opts.metadata));
|
|
503
|
+
}
|
|
504
|
+
if (opts.touch) {
|
|
505
|
+
setClauses.push("access_count = access_count + 1");
|
|
506
|
+
setClauses.push("last_accessed_at = ?");
|
|
507
|
+
values.push(now);
|
|
508
|
+
}
|
|
509
|
+
values.push(id);
|
|
510
|
+
db.prepare(`UPDATE ${table} SET ${setClauses.join(", ")} WHERE id = ?`).run(...values);
|
|
511
|
+
}
|
|
512
|
+
function deleteEntry(db, tier, type, id) {
|
|
513
|
+
const table = tableName(tier, type);
|
|
514
|
+
db.prepare(`DELETE FROM ${table} WHERE id = ?`).run(id);
|
|
515
|
+
}
|
|
516
|
+
function listEntries(db, tier, type, opts) {
|
|
517
|
+
const table = tableName(tier, type);
|
|
518
|
+
const orderBy = opts?.orderBy ?? "created_at DESC";
|
|
519
|
+
let sql;
|
|
520
|
+
let params;
|
|
521
|
+
if (opts?.project !== void 0 && opts.project !== null) {
|
|
522
|
+
if (opts.includeGlobal) {
|
|
523
|
+
sql = `SELECT * FROM ${table} WHERE project = ? OR project IS NULL ORDER BY ${orderBy}`;
|
|
524
|
+
params = [opts.project];
|
|
525
|
+
} else {
|
|
526
|
+
sql = `SELECT * FROM ${table} WHERE project = ? ORDER BY ${orderBy}`;
|
|
527
|
+
params = [opts.project];
|
|
528
|
+
}
|
|
529
|
+
} else {
|
|
530
|
+
sql = `SELECT * FROM ${table} ORDER BY ${orderBy}`;
|
|
531
|
+
params = [];
|
|
532
|
+
}
|
|
533
|
+
if (opts?.limit !== void 0) {
|
|
534
|
+
sql += " LIMIT ?";
|
|
535
|
+
params.push(opts.limit);
|
|
536
|
+
}
|
|
537
|
+
const rows = db.prepare(sql).all(...params);
|
|
538
|
+
return rows.map(rowToEntry);
|
|
539
|
+
}
|
|
540
|
+
function countEntries(db, tier, type) {
|
|
541
|
+
const table = tableName(tier, type);
|
|
542
|
+
const row = db.prepare(`SELECT COUNT(*) as count FROM ${table}`).get();
|
|
543
|
+
return row.count;
|
|
544
|
+
}
|
|
545
|
+
function moveEntry(db, fromTier, fromType, toTier, toType, id) {
|
|
546
|
+
const doMove = db.transaction(() => {
|
|
547
|
+
const entry = getEntry(db, fromTier, fromType, id);
|
|
548
|
+
if (!entry) {
|
|
549
|
+
throw new Error(`Entry ${id} not found in ${tableName(fromTier, fromType)}`);
|
|
550
|
+
}
|
|
551
|
+
const toTable = tableName(toTier, toType);
|
|
552
|
+
const now = Date.now();
|
|
553
|
+
db.prepare(`
|
|
554
|
+
INSERT INTO ${toTable}
|
|
555
|
+
(id, content, summary, source, source_tool, project, tags,
|
|
556
|
+
created_at, updated_at, last_accessed_at, access_count,
|
|
557
|
+
decay_score, parent_id, collection_id, metadata)
|
|
558
|
+
VALUES
|
|
559
|
+
(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
560
|
+
`).run(
|
|
561
|
+
entry.id,
|
|
562
|
+
entry.content,
|
|
563
|
+
entry.summary,
|
|
564
|
+
entry.source,
|
|
565
|
+
entry.source_tool,
|
|
566
|
+
entry.project,
|
|
567
|
+
JSON.stringify(entry.tags),
|
|
568
|
+
entry.created_at,
|
|
569
|
+
now,
|
|
570
|
+
entry.last_accessed_at,
|
|
571
|
+
entry.access_count,
|
|
572
|
+
entry.decay_score,
|
|
573
|
+
entry.parent_id,
|
|
574
|
+
entry.collection_id,
|
|
575
|
+
JSON.stringify(entry.metadata)
|
|
576
|
+
);
|
|
577
|
+
deleteEntry(db, fromTier, fromType, id);
|
|
578
|
+
});
|
|
579
|
+
doMove();
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
// src/search/vector-search.ts
|
|
583
|
+
function insertEmbedding(db, tier, type, entryId, embedding) {
|
|
584
|
+
const contentTable = tableName(tier, type);
|
|
585
|
+
const vecTable = vecTableName(tier, type);
|
|
586
|
+
const row = db.prepare(`SELECT rowid FROM ${contentTable} WHERE id = ?`).get(entryId);
|
|
587
|
+
if (!row) {
|
|
588
|
+
throw new Error(`Entry ${entryId} not found in ${contentTable}`);
|
|
589
|
+
}
|
|
590
|
+
db.prepare(`INSERT INTO ${vecTable} (rowid, embedding) VALUES (?, ?)`).run(
|
|
591
|
+
BigInt(row.rowid),
|
|
592
|
+
Buffer.from(embedding.buffer)
|
|
593
|
+
);
|
|
594
|
+
}
|
|
595
|
+
function deleteEmbedding(db, tier, type, entryId) {
|
|
596
|
+
const contentTable = tableName(tier, type);
|
|
597
|
+
const vecTable = vecTableName(tier, type);
|
|
598
|
+
const row = db.prepare(`SELECT rowid FROM ${contentTable} WHERE id = ?`).get(entryId);
|
|
599
|
+
if (!row) return;
|
|
600
|
+
db.prepare(`DELETE FROM ${vecTable} WHERE rowid = ?`).run(BigInt(row.rowid));
|
|
601
|
+
}
|
|
602
|
+
function searchByVector(db, tier, type, queryVec, opts) {
|
|
603
|
+
const contentTable = tableName(tier, type);
|
|
604
|
+
const vecTable = vecTableName(tier, type);
|
|
605
|
+
const oversample = opts.topK * 2;
|
|
606
|
+
const rows = db.prepare(
|
|
607
|
+
`SELECT c.id, v.distance as dist
|
|
608
|
+
FROM ${vecTable} v
|
|
609
|
+
INNER JOIN ${contentTable} c ON c.rowid = v.rowid
|
|
610
|
+
WHERE v.embedding MATCH ? AND k = ?
|
|
611
|
+
ORDER BY v.distance ASC`
|
|
612
|
+
).all(Buffer.from(queryVec.buffer), oversample);
|
|
613
|
+
let results = rows.map((r) => ({
|
|
614
|
+
id: r.id,
|
|
615
|
+
score: 1 - r.dist
|
|
616
|
+
}));
|
|
617
|
+
if (opts.minScore !== void 0) {
|
|
618
|
+
results = results.filter((r) => r.score >= opts.minScore);
|
|
619
|
+
}
|
|
620
|
+
return results.slice(0, opts.topK);
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
// src/memory/store.ts
|
|
624
|
+
function storeMemory(db, embed, opts) {
|
|
625
|
+
const tier = opts.tier ?? "hot";
|
|
626
|
+
const contentType = opts.contentType ?? "memory";
|
|
627
|
+
const id = insertEntry(db, tier, contentType, {
|
|
628
|
+
content: opts.content,
|
|
629
|
+
source: opts.source ?? null,
|
|
630
|
+
source_tool: opts.source_tool ?? "manual",
|
|
631
|
+
project: opts.project ?? null,
|
|
632
|
+
tags: opts.tags ?? [],
|
|
633
|
+
parent_id: opts.parent_id,
|
|
634
|
+
collection_id: opts.collection_id,
|
|
635
|
+
metadata: opts.type ? { entry_type: opts.type } : {}
|
|
636
|
+
});
|
|
637
|
+
const embedding = embed(opts.content);
|
|
638
|
+
insertEmbedding(db, tier, contentType, id, embedding);
|
|
639
|
+
return id;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
// src/memory/search.ts
|
|
643
|
+
function searchMemory(db, embed, query, opts) {
|
|
644
|
+
const queryVec = embed(query);
|
|
645
|
+
const merged = [];
|
|
646
|
+
for (const { tier, content_type } of opts.tiers) {
|
|
647
|
+
const vectorResults = searchByVector(db, tier, content_type, queryVec, {
|
|
648
|
+
topK: opts.topK,
|
|
649
|
+
minScore: opts.minScore
|
|
650
|
+
});
|
|
651
|
+
for (const vr of vectorResults) {
|
|
652
|
+
const entry = getEntry(db, tier, content_type, vr.id);
|
|
653
|
+
if (!entry) continue;
|
|
654
|
+
updateEntry(db, tier, content_type, vr.id, { touch: true });
|
|
655
|
+
merged.push({
|
|
656
|
+
entry,
|
|
657
|
+
tier,
|
|
658
|
+
content_type,
|
|
659
|
+
score: vr.score,
|
|
660
|
+
rank: 0
|
|
661
|
+
});
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
merged.sort((a, b) => b.score - a.score);
|
|
665
|
+
const topK = merged.slice(0, opts.topK);
|
|
666
|
+
topK.forEach((r, i) => {
|
|
667
|
+
r.rank = i + 1;
|
|
668
|
+
});
|
|
669
|
+
return topK;
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
// src/memory/get.ts
|
|
673
|
+
function getMemory(db, id) {
|
|
674
|
+
for (const { tier, type } of ALL_TABLE_PAIRS) {
|
|
675
|
+
const entry = getEntry(db, tier, type, id);
|
|
676
|
+
if (entry) {
|
|
677
|
+
return { entry, tier, content_type: type };
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
return null;
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
// src/memory/update.ts
|
|
684
|
+
function updateMemory(db, embed, id, opts) {
|
|
685
|
+
const location = getMemory(db, id);
|
|
686
|
+
if (!location) return false;
|
|
687
|
+
const { tier, content_type } = location;
|
|
688
|
+
updateEntry(db, tier, content_type, id, opts);
|
|
689
|
+
if (opts.content !== void 0) {
|
|
690
|
+
deleteEmbedding(db, tier, content_type, id);
|
|
691
|
+
const newEmbedding = embed(opts.content);
|
|
692
|
+
insertEmbedding(db, tier, content_type, id, newEmbedding);
|
|
693
|
+
}
|
|
694
|
+
return true;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// src/memory/delete.ts
|
|
698
|
+
function deleteMemory(db, id) {
|
|
699
|
+
const location = getMemory(db, id);
|
|
700
|
+
if (!location) return false;
|
|
701
|
+
deleteEntry(db, location.tier, location.content_type, id);
|
|
702
|
+
return true;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// src/memory/promote-demote.ts
|
|
706
|
+
function promoteEntry(db, embed, id, fromTier, fromType, toTier, toType) {
|
|
707
|
+
const entry = getEntry(db, fromTier, fromType, id);
|
|
708
|
+
if (!entry) {
|
|
709
|
+
throw new Error(`Entry ${id} not found in ${fromTier}/${fromType}`);
|
|
710
|
+
}
|
|
711
|
+
deleteEmbedding(db, fromTier, fromType, id);
|
|
712
|
+
moveEntry(db, fromTier, fromType, toTier, toType, id);
|
|
713
|
+
const newEmbedding = embed(entry.content);
|
|
714
|
+
insertEmbedding(db, toTier, toType, id, newEmbedding);
|
|
715
|
+
}
|
|
716
|
+
function demoteEntry(db, embed, id, fromTier, fromType, toTier, toType) {
|
|
717
|
+
promoteEntry(db, embed, id, fromTier, fromType, toTier, toType);
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// src/tools/memory-tools.ts
|
|
721
|
+
var MEMORY_TOOLS = [
|
|
722
|
+
{
|
|
723
|
+
name: "memory_store",
|
|
724
|
+
description: "Store a new memory or knowledge entry",
|
|
725
|
+
inputSchema: {
|
|
726
|
+
type: "object",
|
|
727
|
+
properties: {
|
|
728
|
+
content: { type: "string", description: "The content to store" },
|
|
729
|
+
tier: { type: "string", enum: ["hot", "warm", "cold"], description: "Storage tier (default: hot)" },
|
|
730
|
+
contentType: { type: "string", enum: ["memory", "knowledge"], description: "Content type (default: memory)" },
|
|
731
|
+
entryType: {
|
|
732
|
+
type: "string",
|
|
733
|
+
enum: ["correction", "preference", "decision", "surfaced", "imported", "compacted", "ingested"],
|
|
734
|
+
description: "Entry type"
|
|
735
|
+
},
|
|
736
|
+
project: { type: "string", description: "Project scope" },
|
|
737
|
+
tags: { type: "array", items: { type: "string" }, description: "Tags" },
|
|
738
|
+
source: { type: "string", description: "Source identifier" }
|
|
739
|
+
},
|
|
740
|
+
required: ["content"]
|
|
741
|
+
}
|
|
742
|
+
},
|
|
743
|
+
{
|
|
744
|
+
name: "memory_search",
|
|
745
|
+
description: "Search memories and knowledge using semantic similarity",
|
|
746
|
+
inputSchema: {
|
|
747
|
+
type: "object",
|
|
748
|
+
properties: {
|
|
749
|
+
query: { type: "string", description: "Search query" },
|
|
750
|
+
topK: { type: "number", description: "Number of results to return (default: 10)" },
|
|
751
|
+
minScore: { type: "number", description: "Minimum similarity score (0-1)" },
|
|
752
|
+
tiers: {
|
|
753
|
+
type: "array",
|
|
754
|
+
items: { type: "string", enum: ["hot", "warm", "cold"] },
|
|
755
|
+
description: "Tiers to search (default: all)"
|
|
756
|
+
},
|
|
757
|
+
contentTypes: {
|
|
758
|
+
type: "array",
|
|
759
|
+
items: { type: "string", enum: ["memory", "knowledge"] },
|
|
760
|
+
description: "Content types to search (default: all)"
|
|
761
|
+
}
|
|
762
|
+
},
|
|
763
|
+
required: ["query"]
|
|
764
|
+
}
|
|
765
|
+
},
|
|
766
|
+
{
|
|
767
|
+
name: "memory_get",
|
|
768
|
+
description: "Retrieve a specific memory entry by ID",
|
|
769
|
+
inputSchema: {
|
|
770
|
+
type: "object",
|
|
771
|
+
properties: {
|
|
772
|
+
id: { type: "string", description: "Entry ID" }
|
|
773
|
+
},
|
|
774
|
+
required: ["id"]
|
|
775
|
+
}
|
|
776
|
+
},
|
|
777
|
+
{
|
|
778
|
+
name: "memory_update",
|
|
779
|
+
description: "Update an existing memory entry",
|
|
780
|
+
inputSchema: {
|
|
781
|
+
type: "object",
|
|
782
|
+
properties: {
|
|
783
|
+
id: { type: "string", description: "Entry ID" },
|
|
784
|
+
content: { type: "string", description: "New content" },
|
|
785
|
+
summary: { type: "string", description: "New summary" },
|
|
786
|
+
tags: { type: "array", items: { type: "string" }, description: "New tags" },
|
|
787
|
+
project: { type: "string", description: "New project" }
|
|
788
|
+
},
|
|
789
|
+
required: ["id"]
|
|
790
|
+
}
|
|
791
|
+
},
|
|
792
|
+
{
|
|
793
|
+
name: "memory_delete",
|
|
794
|
+
description: "Delete a memory entry by ID",
|
|
795
|
+
inputSchema: {
|
|
796
|
+
type: "object",
|
|
797
|
+
properties: {
|
|
798
|
+
id: { type: "string", description: "Entry ID to delete" }
|
|
799
|
+
},
|
|
800
|
+
required: ["id"]
|
|
801
|
+
}
|
|
802
|
+
},
|
|
803
|
+
{
|
|
804
|
+
name: "memory_promote",
|
|
805
|
+
description: "Promote a memory entry to a higher tier",
|
|
806
|
+
inputSchema: {
|
|
807
|
+
type: "object",
|
|
808
|
+
properties: {
|
|
809
|
+
id: { type: "string", description: "Entry ID" },
|
|
810
|
+
toTier: { type: "string", enum: ["hot", "warm", "cold"], description: "Target tier" },
|
|
811
|
+
toType: { type: "string", enum: ["memory", "knowledge"], description: "Target content type" }
|
|
812
|
+
},
|
|
813
|
+
required: ["id", "toTier", "toType"]
|
|
814
|
+
}
|
|
815
|
+
},
|
|
816
|
+
{
|
|
817
|
+
name: "memory_demote",
|
|
818
|
+
description: "Demote a memory entry to a lower tier",
|
|
819
|
+
inputSchema: {
|
|
820
|
+
type: "object",
|
|
821
|
+
properties: {
|
|
822
|
+
id: { type: "string", description: "Entry ID" },
|
|
823
|
+
toTier: { type: "string", enum: ["hot", "warm", "cold"], description: "Target tier" },
|
|
824
|
+
toType: { type: "string", enum: ["memory", "knowledge"], description: "Target content type" }
|
|
825
|
+
},
|
|
826
|
+
required: ["id", "toTier", "toType"]
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
];
|
|
830
|
+
async function handleMemoryTool(name, args, ctx) {
|
|
831
|
+
if (name === "memory_store") {
|
|
832
|
+
const content = args.content;
|
|
833
|
+
await ctx.embedder.ensureLoaded();
|
|
834
|
+
const vec = await ctx.embedder.embed(content);
|
|
835
|
+
const embedFn = () => vec;
|
|
836
|
+
const id = storeMemory(ctx.db, embedFn, {
|
|
837
|
+
content,
|
|
838
|
+
tier: args.tier ?? "hot",
|
|
839
|
+
contentType: args.contentType ?? "memory",
|
|
840
|
+
type: args.entryType,
|
|
841
|
+
project: args.project,
|
|
842
|
+
tags: args.tags,
|
|
843
|
+
source: args.source
|
|
844
|
+
});
|
|
845
|
+
return { content: [{ type: "text", text: JSON.stringify({ id }) }] };
|
|
846
|
+
}
|
|
847
|
+
if (name === "memory_search") {
|
|
848
|
+
const query = args.query;
|
|
849
|
+
await ctx.embedder.ensureLoaded();
|
|
850
|
+
const vec = await ctx.embedder.embed(query);
|
|
851
|
+
const embedFn = () => vec;
|
|
852
|
+
const tierFilter = args.tiers;
|
|
853
|
+
const typeFilter = args.contentTypes;
|
|
854
|
+
const tiers = ALL_TABLE_PAIRS.filter(
|
|
855
|
+
(p) => (!tierFilter || tierFilter.includes(p.tier)) && (!typeFilter || typeFilter.includes(p.type))
|
|
856
|
+
).map((p) => ({ tier: p.tier, content_type: p.type }));
|
|
857
|
+
const results = searchMemory(ctx.db, embedFn, query, {
|
|
858
|
+
tiers,
|
|
859
|
+
topK: args.topK ?? 10,
|
|
860
|
+
minScore: args.minScore
|
|
861
|
+
});
|
|
862
|
+
return { content: [{ type: "text", text: JSON.stringify(results) }] };
|
|
863
|
+
}
|
|
864
|
+
if (name === "memory_get") {
|
|
865
|
+
const location = getMemory(ctx.db, args.id);
|
|
866
|
+
return { content: [{ type: "text", text: JSON.stringify(location) }] };
|
|
867
|
+
}
|
|
868
|
+
if (name === "memory_update") {
|
|
869
|
+
await ctx.embedder.ensureLoaded();
|
|
870
|
+
const newContent = args.content;
|
|
871
|
+
let embedFn;
|
|
872
|
+
if (newContent !== void 0) {
|
|
873
|
+
const vec = await ctx.embedder.embed(newContent);
|
|
874
|
+
embedFn = () => vec;
|
|
875
|
+
}
|
|
876
|
+
const updated = updateMemory(ctx.db, embedFn ?? (() => new Float32Array(0)), args.id, {
|
|
877
|
+
content: newContent,
|
|
878
|
+
summary: args.summary,
|
|
879
|
+
tags: args.tags,
|
|
880
|
+
project: args.project
|
|
881
|
+
});
|
|
882
|
+
return { content: [{ type: "text", text: JSON.stringify({ updated }) }] };
|
|
883
|
+
}
|
|
884
|
+
if (name === "memory_delete") {
|
|
885
|
+
const deleted = deleteMemory(ctx.db, args.id);
|
|
886
|
+
return { content: [{ type: "text", text: JSON.stringify({ deleted }) }] };
|
|
887
|
+
}
|
|
888
|
+
if (name === "memory_promote") {
|
|
889
|
+
const location = getMemory(ctx.db, args.id);
|
|
890
|
+
if (!location) {
|
|
891
|
+
return { content: [{ type: "text", text: JSON.stringify({ error: "Entry not found" }) }] };
|
|
892
|
+
}
|
|
893
|
+
await ctx.embedder.ensureLoaded();
|
|
894
|
+
const vec = await ctx.embedder.embed(location.entry.content);
|
|
895
|
+
const embedFn = () => vec;
|
|
896
|
+
promoteEntry(
|
|
897
|
+
ctx.db,
|
|
898
|
+
embedFn,
|
|
899
|
+
args.id,
|
|
900
|
+
location.tier,
|
|
901
|
+
location.content_type,
|
|
902
|
+
args.toTier,
|
|
903
|
+
args.toType
|
|
904
|
+
);
|
|
905
|
+
return { content: [{ type: "text", text: JSON.stringify({ promoted: true }) }] };
|
|
906
|
+
}
|
|
907
|
+
if (name === "memory_demote") {
|
|
908
|
+
const location = getMemory(ctx.db, args.id);
|
|
909
|
+
if (!location) {
|
|
910
|
+
return { content: [{ type: "text", text: JSON.stringify({ error: "Entry not found" }) }] };
|
|
911
|
+
}
|
|
912
|
+
await ctx.embedder.ensureLoaded();
|
|
913
|
+
const vec = await ctx.embedder.embed(location.entry.content);
|
|
914
|
+
const embedFn = () => vec;
|
|
915
|
+
demoteEntry(
|
|
916
|
+
ctx.db,
|
|
917
|
+
embedFn,
|
|
918
|
+
args.id,
|
|
919
|
+
location.tier,
|
|
920
|
+
location.content_type,
|
|
921
|
+
args.toTier,
|
|
922
|
+
args.toType
|
|
923
|
+
);
|
|
924
|
+
return { content: [{ type: "text", text: JSON.stringify({ demoted: true }) }] };
|
|
925
|
+
}
|
|
926
|
+
return null;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
// src/tools/system-tools.ts
|
|
930
|
+
var SYSTEM_TOOLS = [
|
|
931
|
+
{
|
|
932
|
+
name: "status",
|
|
933
|
+
description: "Get the status of the total-recall memory system",
|
|
934
|
+
inputSchema: {
|
|
935
|
+
type: "object",
|
|
936
|
+
properties: {},
|
|
937
|
+
required: []
|
|
938
|
+
}
|
|
939
|
+
},
|
|
940
|
+
{
|
|
941
|
+
name: "config_get",
|
|
942
|
+
description: "Get a configuration value by dot-notation key",
|
|
943
|
+
inputSchema: {
|
|
944
|
+
type: "object",
|
|
945
|
+
properties: {
|
|
946
|
+
key: { type: "string", description: "Dot-notation config key (e.g. 'tiers.hot.max_entries'). Omit for full config." }
|
|
947
|
+
},
|
|
948
|
+
required: []
|
|
949
|
+
}
|
|
950
|
+
},
|
|
951
|
+
{
|
|
952
|
+
name: "config_set",
|
|
953
|
+
description: "Set a configuration value (acknowledgment only; full persistence deferred to Phase 2)",
|
|
954
|
+
inputSchema: {
|
|
955
|
+
type: "object",
|
|
956
|
+
properties: {
|
|
957
|
+
key: { type: "string", description: "Dot-notation config key" },
|
|
958
|
+
value: { description: "Value to set" }
|
|
959
|
+
},
|
|
960
|
+
required: ["key", "value"]
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
];
|
|
964
|
+
function handleSystemTool(name, args, ctx) {
|
|
965
|
+
if (name === "status") {
|
|
966
|
+
const tierSizes = {};
|
|
967
|
+
for (const { tier, type } of ALL_TABLE_PAIRS) {
|
|
968
|
+
const key = `${tier}_${type === "memory" ? "memories" : "knowledge"}`;
|
|
969
|
+
tierSizes[key] = countEntries(ctx.db, tier, type);
|
|
970
|
+
}
|
|
971
|
+
const dbInfo = {
|
|
972
|
+
sessionId: ctx.sessionId
|
|
973
|
+
};
|
|
974
|
+
return {
|
|
975
|
+
content: [
|
|
976
|
+
{
|
|
977
|
+
type: "text",
|
|
978
|
+
text: JSON.stringify({ tierSizes, db: dbInfo })
|
|
979
|
+
}
|
|
980
|
+
]
|
|
981
|
+
};
|
|
982
|
+
}
|
|
983
|
+
if (name === "config_get") {
|
|
984
|
+
const key = args.key;
|
|
985
|
+
if (!key) {
|
|
986
|
+
return { content: [{ type: "text", text: JSON.stringify(ctx.config) }] };
|
|
987
|
+
}
|
|
988
|
+
const parts = key.split(".");
|
|
989
|
+
let value = ctx.config;
|
|
990
|
+
for (const part of parts) {
|
|
991
|
+
if (value === null || typeof value !== "object") {
|
|
992
|
+
value = void 0;
|
|
993
|
+
break;
|
|
994
|
+
}
|
|
995
|
+
value = value[part];
|
|
996
|
+
}
|
|
997
|
+
return { content: [{ type: "text", text: JSON.stringify({ key, value }) }] };
|
|
998
|
+
}
|
|
999
|
+
if (name === "config_set") {
|
|
1000
|
+
const key = args.key;
|
|
1001
|
+
const value = args.value;
|
|
1002
|
+
return {
|
|
1003
|
+
content: [
|
|
1004
|
+
{
|
|
1005
|
+
type: "text",
|
|
1006
|
+
text: JSON.stringify({
|
|
1007
|
+
acknowledged: true,
|
|
1008
|
+
key,
|
|
1009
|
+
value,
|
|
1010
|
+
note: "Config persistence deferred to Phase 2"
|
|
1011
|
+
})
|
|
1012
|
+
}
|
|
1013
|
+
]
|
|
1014
|
+
};
|
|
1015
|
+
}
|
|
1016
|
+
return null;
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
// src/ingestion/ingest.ts
|
|
1020
|
+
import { readFileSync as readFileSync2, readdirSync as readdirSync2, statSync } from "fs";
|
|
1021
|
+
import { join as join5, dirname, basename, extname } from "path";
|
|
1022
|
+
|
|
1023
|
+
// src/ingestion/markdown-parser.ts
|
|
1024
|
+
function estimateTokens(text) {
|
|
1025
|
+
const wordCount = text.trim().split(/\s+/).filter(Boolean).length;
|
|
1026
|
+
return Math.ceil(wordCount * 0.75);
|
|
1027
|
+
}
|
|
1028
|
+
function parseMarkdown(text, opts) {
|
|
1029
|
+
if (!text || !text.trim()) return [];
|
|
1030
|
+
const { maxTokens } = opts;
|
|
1031
|
+
const allLines = text.split("\n");
|
|
1032
|
+
const sections = [];
|
|
1033
|
+
let currentHeadingPath = [];
|
|
1034
|
+
let currentLines = [];
|
|
1035
|
+
let currentStartLine = 1;
|
|
1036
|
+
const headingRe = /^(#{1,6})\s+(.+)$/;
|
|
1037
|
+
function flushSection() {
|
|
1038
|
+
if (currentLines.length > 0) {
|
|
1039
|
+
sections.push({
|
|
1040
|
+
headingPath: [...currentHeadingPath],
|
|
1041
|
+
lines: currentLines,
|
|
1042
|
+
startLine: currentStartLine
|
|
1043
|
+
});
|
|
1044
|
+
}
|
|
1045
|
+
}
|
|
1046
|
+
for (let i = 0; i < allLines.length; i++) {
|
|
1047
|
+
const line = allLines[i];
|
|
1048
|
+
const match = headingRe.exec(line);
|
|
1049
|
+
if (match) {
|
|
1050
|
+
flushSection();
|
|
1051
|
+
const level = match[1].length;
|
|
1052
|
+
const title = match[2].trim();
|
|
1053
|
+
currentHeadingPath = currentHeadingPath.slice(0, level - 1);
|
|
1054
|
+
currentHeadingPath[level - 1] = title;
|
|
1055
|
+
currentLines = [line];
|
|
1056
|
+
currentStartLine = i + 1;
|
|
1057
|
+
} else {
|
|
1058
|
+
currentLines.push(line);
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
flushSection();
|
|
1062
|
+
const chunks = [];
|
|
1063
|
+
for (const section of sections) {
|
|
1064
|
+
const sectionText = section.lines.join("\n");
|
|
1065
|
+
if (estimateTokens(sectionText) <= maxTokens) {
|
|
1066
|
+
chunks.push({
|
|
1067
|
+
content: sectionText,
|
|
1068
|
+
headingPath: section.headingPath,
|
|
1069
|
+
startLine: section.startLine,
|
|
1070
|
+
endLine: section.startLine + section.lines.length - 1
|
|
1071
|
+
});
|
|
1072
|
+
} else {
|
|
1073
|
+
const subChunks = splitSection(section, maxTokens);
|
|
1074
|
+
chunks.push(...subChunks);
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
return chunks;
|
|
1078
|
+
}
|
|
1079
|
+
function splitSection(section, maxTokens) {
|
|
1080
|
+
const { headingPath, lines, startLine } = section;
|
|
1081
|
+
const blocks = [];
|
|
1082
|
+
let i = 0;
|
|
1083
|
+
const codeFenceRe = /^```/;
|
|
1084
|
+
while (i < lines.length) {
|
|
1085
|
+
const line = lines[i];
|
|
1086
|
+
if (codeFenceRe.test(line)) {
|
|
1087
|
+
const blockLines = [line];
|
|
1088
|
+
const offset = i;
|
|
1089
|
+
i++;
|
|
1090
|
+
while (i < lines.length) {
|
|
1091
|
+
const inner = lines[i];
|
|
1092
|
+
blockLines.push(inner);
|
|
1093
|
+
i++;
|
|
1094
|
+
if (/^```\s*$/.test(inner)) break;
|
|
1095
|
+
}
|
|
1096
|
+
blocks.push({ lines: blockLines, lineOffset: offset });
|
|
1097
|
+
} else {
|
|
1098
|
+
const blockLines = [];
|
|
1099
|
+
const offset = i;
|
|
1100
|
+
while (i < lines.length && !/^```/.test(lines[i])) {
|
|
1101
|
+
blockLines.push(lines[i]);
|
|
1102
|
+
i++;
|
|
1103
|
+
if (blockLines[blockLines.length - 1].trim() === "") break;
|
|
1104
|
+
}
|
|
1105
|
+
if (blockLines.length > 0) {
|
|
1106
|
+
blocks.push({ lines: blockLines, lineOffset: offset });
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
const chunks = [];
|
|
1111
|
+
let currentBlockLines = [];
|
|
1112
|
+
let currentOffset = 0;
|
|
1113
|
+
function flushChunk() {
|
|
1114
|
+
if (currentBlockLines.length === 0) return;
|
|
1115
|
+
const content = currentBlockLines.join("\n");
|
|
1116
|
+
chunks.push({
|
|
1117
|
+
content,
|
|
1118
|
+
headingPath,
|
|
1119
|
+
startLine: startLine + currentOffset,
|
|
1120
|
+
endLine: startLine + currentOffset + currentBlockLines.length - 1
|
|
1121
|
+
});
|
|
1122
|
+
currentBlockLines = [];
|
|
1123
|
+
}
|
|
1124
|
+
for (const block of blocks) {
|
|
1125
|
+
const blockText = block.lines.join("\n");
|
|
1126
|
+
const blockTokens = estimateTokens(blockText);
|
|
1127
|
+
const currentTokens = estimateTokens(currentBlockLines.join("\n"));
|
|
1128
|
+
if (currentBlockLines.length === 0) {
|
|
1129
|
+
currentBlockLines = [...block.lines];
|
|
1130
|
+
currentOffset = block.lineOffset;
|
|
1131
|
+
} else if (currentTokens + blockTokens <= maxTokens) {
|
|
1132
|
+
currentBlockLines.push(...block.lines);
|
|
1133
|
+
} else {
|
|
1134
|
+
flushChunk();
|
|
1135
|
+
currentBlockLines = [...block.lines];
|
|
1136
|
+
currentOffset = block.lineOffset;
|
|
1137
|
+
}
|
|
1138
|
+
}
|
|
1139
|
+
flushChunk();
|
|
1140
|
+
return chunks;
|
|
1141
|
+
}
|
|
1142
|
+
|
|
1143
|
+
// src/ingestion/code-parser.ts
|
|
1144
|
+
function estimateTokens2(text) {
|
|
1145
|
+
const wordCount = text.trim().split(/\s+/).filter(Boolean).length;
|
|
1146
|
+
return Math.ceil(wordCount * 0.75);
|
|
1147
|
+
}
|
|
1148
|
+
var PATTERNS = {
|
|
1149
|
+
typescript: {
|
|
1150
|
+
boundary: /^(export\s+)?(async\s+)?function\s+\w+|^(export\s+)?(abstract\s+)?class\s+\w+|^(export\s+)?const\s+\w+\s*=\s*(async\s+)?\(|^(export\s+)?const\s+\w+\s*=\s*(async\s+)?function/,
|
|
1151
|
+
importLine: /^\s*import\s/,
|
|
1152
|
+
extractName(line) {
|
|
1153
|
+
const m = /function\s+(\w+)/.exec(line) || /class\s+(\w+)/.exec(line) || /const\s+(\w+)/.exec(line);
|
|
1154
|
+
return m ? m[1] : "";
|
|
1155
|
+
},
|
|
1156
|
+
classifyKind(line) {
|
|
1157
|
+
if (/class\s+/.test(line)) return "class";
|
|
1158
|
+
if (/function\s+|=\s*(async\s+)?\(|=\s*(async\s+)?function/.test(line)) return "function";
|
|
1159
|
+
return "block";
|
|
1160
|
+
}
|
|
1161
|
+
},
|
|
1162
|
+
javascript: {
|
|
1163
|
+
boundary: /^(export\s+)?(async\s+)?function\s+\w+|^(export\s+)?(class)\s+\w+|^(export\s+)?const\s+\w+\s*=\s*(async\s+)?\(|^(export\s+)?const\s+\w+\s*=\s*(async\s+)?function/,
|
|
1164
|
+
importLine: /^\s*import\s|^\s*const\s+\w+\s*=\s*require\(/,
|
|
1165
|
+
extractName(line) {
|
|
1166
|
+
const m = /function\s+(\w+)/.exec(line) || /class\s+(\w+)/.exec(line) || /const\s+(\w+)/.exec(line);
|
|
1167
|
+
return m ? m[1] : "";
|
|
1168
|
+
},
|
|
1169
|
+
classifyKind(line) {
|
|
1170
|
+
if (/class\s+/.test(line)) return "class";
|
|
1171
|
+
if (/function\s+|=\s*(async\s+)?\(|=\s*(async\s+)?function/.test(line)) return "function";
|
|
1172
|
+
return "block";
|
|
1173
|
+
}
|
|
1174
|
+
},
|
|
1175
|
+
python: {
|
|
1176
|
+
boundary: /^(async\s+)?def\s+\w+|^class\s+\w+/,
|
|
1177
|
+
importLine: /^\s*import\s|^\s*from\s+\S+\s+import\s/,
|
|
1178
|
+
extractName(line) {
|
|
1179
|
+
const m = /(?:def|class)\s+(\w+)/.exec(line);
|
|
1180
|
+
return m ? m[1] : "";
|
|
1181
|
+
},
|
|
1182
|
+
classifyKind(line) {
|
|
1183
|
+
if (/^class\s+/.test(line)) return "class";
|
|
1184
|
+
if (/(?:async\s+)?def\s+/.test(line)) return "function";
|
|
1185
|
+
return "block";
|
|
1186
|
+
}
|
|
1187
|
+
},
|
|
1188
|
+
go: {
|
|
1189
|
+
boundary: /^func\s+/,
|
|
1190
|
+
importLine: /^\s*import\s|^\s*"[\w/]+"/,
|
|
1191
|
+
extractName(line) {
|
|
1192
|
+
const m = /func\s+(?:\(\w+\s+\*?\w+\)\s+)?(\w+)/.exec(line);
|
|
1193
|
+
return m ? m[1] : "";
|
|
1194
|
+
},
|
|
1195
|
+
classifyKind(_line) {
|
|
1196
|
+
return "function";
|
|
1197
|
+
}
|
|
1198
|
+
},
|
|
1199
|
+
rust: {
|
|
1200
|
+
boundary: /^(pub\s+)?(async\s+)?fn\s+\w+|^(pub\s+)?struct\s+\w+|^(pub\s+)?impl\s+\w+/,
|
|
1201
|
+
importLine: /^\s*use\s/,
|
|
1202
|
+
extractName(line) {
|
|
1203
|
+
const m = /fn\s+(\w+)/.exec(line) || /struct\s+(\w+)/.exec(line) || /impl\s+(\w+)/.exec(line);
|
|
1204
|
+
return m ? m[1] : "";
|
|
1205
|
+
},
|
|
1206
|
+
classifyKind(line) {
|
|
1207
|
+
if (/struct\s+/.test(line) || /impl\s+/.test(line)) return "class";
|
|
1208
|
+
if (/fn\s+/.test(line)) return "function";
|
|
1209
|
+
return "block";
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
};
|
|
1213
|
+
function parseCode(code, language, opts) {
|
|
1214
|
+
if (!code || !code.trim()) return [];
|
|
1215
|
+
const patterns = PATTERNS[language] ?? PATTERNS["typescript"];
|
|
1216
|
+
const { maxTokens } = opts;
|
|
1217
|
+
const lines = code.split("\n");
|
|
1218
|
+
const importLines = [];
|
|
1219
|
+
const nonImportStartIdx = findNonImportStart(lines, patterns);
|
|
1220
|
+
for (let i = 0; i < nonImportStartIdx; i++) {
|
|
1221
|
+
importLines.push({ line: lines[i], lineIdx: i });
|
|
1222
|
+
}
|
|
1223
|
+
const segments = [];
|
|
1224
|
+
let currentLines = [];
|
|
1225
|
+
let currentStart = nonImportStartIdx;
|
|
1226
|
+
let currentName = "";
|
|
1227
|
+
let currentKind = "block";
|
|
1228
|
+
function flushSegment() {
|
|
1229
|
+
if (currentLines.length > 0 && currentLines.some((l) => l.trim())) {
|
|
1230
|
+
segments.push({
|
|
1231
|
+
lines: currentLines,
|
|
1232
|
+
startIdx: currentStart,
|
|
1233
|
+
name: currentName,
|
|
1234
|
+
kind: currentKind
|
|
1235
|
+
});
|
|
1236
|
+
}
|
|
1237
|
+
}
|
|
1238
|
+
for (let i = nonImportStartIdx; i < lines.length; i++) {
|
|
1239
|
+
const line = lines[i];
|
|
1240
|
+
if (patterns.boundary.test(line)) {
|
|
1241
|
+
flushSegment();
|
|
1242
|
+
currentLines = [line];
|
|
1243
|
+
currentStart = i;
|
|
1244
|
+
currentName = patterns.extractName(line);
|
|
1245
|
+
currentKind = patterns.classifyKind(line);
|
|
1246
|
+
} else {
|
|
1247
|
+
currentLines.push(line);
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
flushSegment();
|
|
1251
|
+
const chunks = [];
|
|
1252
|
+
if (importLines.length > 0) {
|
|
1253
|
+
const content = importLines.map((l) => l.line).join("\n");
|
|
1254
|
+
chunks.push({
|
|
1255
|
+
content,
|
|
1256
|
+
name: "imports",
|
|
1257
|
+
kind: "import",
|
|
1258
|
+
startLine: 1,
|
|
1259
|
+
endLine: importLines.length
|
|
1260
|
+
});
|
|
1261
|
+
}
|
|
1262
|
+
for (const seg of segments) {
|
|
1263
|
+
const segText = seg.lines.join("\n");
|
|
1264
|
+
if (estimateTokens2(segText) <= maxTokens) {
|
|
1265
|
+
chunks.push({
|
|
1266
|
+
content: segText,
|
|
1267
|
+
name: seg.name,
|
|
1268
|
+
kind: seg.kind,
|
|
1269
|
+
startLine: seg.startIdx + 1,
|
|
1270
|
+
endLine: seg.startIdx + seg.lines.length
|
|
1271
|
+
});
|
|
1272
|
+
} else {
|
|
1273
|
+
const subChunks = splitAtBlankLines(seg.lines, seg.startIdx, seg.name, seg.kind, maxTokens);
|
|
1274
|
+
chunks.push(...subChunks);
|
|
1275
|
+
}
|
|
1276
|
+
}
|
|
1277
|
+
return chunks;
|
|
1278
|
+
}
|
|
1279
|
+
function findNonImportStart(lines, patterns) {
|
|
1280
|
+
let lastImportOrBlank = 0;
|
|
1281
|
+
let seenImport = false;
|
|
1282
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1283
|
+
const line = lines[i];
|
|
1284
|
+
if (line.trim() === "") {
|
|
1285
|
+
if (seenImport) lastImportOrBlank = i + 1;
|
|
1286
|
+
continue;
|
|
1287
|
+
}
|
|
1288
|
+
if (patterns.importLine.test(line)) {
|
|
1289
|
+
seenImport = true;
|
|
1290
|
+
lastImportOrBlank = i + 1;
|
|
1291
|
+
} else {
|
|
1292
|
+
break;
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
return lastImportOrBlank;
|
|
1296
|
+
}
|
|
1297
|
+
function splitAtBlankLines(lines, startIdx, name, kind, maxTokens) {
|
|
1298
|
+
const chunks = [];
|
|
1299
|
+
let currentLines = [];
|
|
1300
|
+
let currentOffset = 0;
|
|
1301
|
+
function flush() {
|
|
1302
|
+
if (currentLines.length > 0 && currentLines.some((l) => l.trim())) {
|
|
1303
|
+
chunks.push({
|
|
1304
|
+
content: currentLines.join("\n"),
|
|
1305
|
+
name,
|
|
1306
|
+
kind,
|
|
1307
|
+
startLine: startIdx + currentOffset + 1,
|
|
1308
|
+
endLine: startIdx + currentOffset + currentLines.length
|
|
1309
|
+
});
|
|
1310
|
+
}
|
|
1311
|
+
currentLines = [];
|
|
1312
|
+
}
|
|
1313
|
+
for (let i = 0; i < lines.length; i++) {
|
|
1314
|
+
const line = lines[i];
|
|
1315
|
+
currentLines.push(line);
|
|
1316
|
+
if (line.trim() === "") {
|
|
1317
|
+
const tokens = estimateTokens2(currentLines.join("\n"));
|
|
1318
|
+
if (tokens >= maxTokens) {
|
|
1319
|
+
flush();
|
|
1320
|
+
currentOffset = i + 1;
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
flush();
|
|
1325
|
+
return chunks.length > 0 ? chunks : [{
|
|
1326
|
+
content: lines.join("\n"),
|
|
1327
|
+
name,
|
|
1328
|
+
kind,
|
|
1329
|
+
startLine: startIdx + 1,
|
|
1330
|
+
endLine: startIdx + lines.length
|
|
1331
|
+
}];
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
// src/ingestion/chunker.ts
|
|
1335
|
+
var MARKDOWN_EXTENSIONS = /* @__PURE__ */ new Set([".md", ".mdx", ".markdown"]);
|
|
1336
|
+
var CODE_LANGUAGE_MAP = {
|
|
1337
|
+
".ts": "typescript",
|
|
1338
|
+
".tsx": "typescript",
|
|
1339
|
+
".js": "javascript",
|
|
1340
|
+
".jsx": "javascript",
|
|
1341
|
+
".py": "python",
|
|
1342
|
+
".go": "go",
|
|
1343
|
+
".rs": "rust"
|
|
1344
|
+
};
|
|
1345
|
+
function getExtension(filePath) {
|
|
1346
|
+
const base = filePath.split("/").pop() ?? filePath;
|
|
1347
|
+
const dotIdx = base.lastIndexOf(".");
|
|
1348
|
+
if (dotIdx === -1) return "";
|
|
1349
|
+
return base.slice(dotIdx).toLowerCase();
|
|
1350
|
+
}
|
|
1351
|
+
function estimateTokens3(text) {
|
|
1352
|
+
const wordCount = text.trim().split(/\s+/).filter(Boolean).length;
|
|
1353
|
+
return Math.ceil(wordCount * 0.75);
|
|
1354
|
+
}
|
|
1355
|
+
function splitByParagraphs(content, maxTokens) {
|
|
1356
|
+
const paragraphs = content.split(/\n\n+/);
|
|
1357
|
+
const chunks = [];
|
|
1358
|
+
let currentParts = [];
|
|
1359
|
+
let lineOffset = 1;
|
|
1360
|
+
let currentStartLine = 1;
|
|
1361
|
+
let lineCount = 1;
|
|
1362
|
+
for (const para of paragraphs) {
|
|
1363
|
+
const paraLines = para.split("\n").length;
|
|
1364
|
+
const paraTokens = estimateTokens3(para);
|
|
1365
|
+
const currentTokens = estimateTokens3(currentParts.join("\n\n"));
|
|
1366
|
+
if (currentParts.length === 0) {
|
|
1367
|
+
currentParts.push(para);
|
|
1368
|
+
currentStartLine = lineCount;
|
|
1369
|
+
} else if (currentTokens + paraTokens <= maxTokens) {
|
|
1370
|
+
currentParts.push(para);
|
|
1371
|
+
} else {
|
|
1372
|
+
const content2 = currentParts.join("\n\n");
|
|
1373
|
+
const contentLines = content2.split("\n").length;
|
|
1374
|
+
chunks.push({
|
|
1375
|
+
content: content2,
|
|
1376
|
+
startLine: currentStartLine,
|
|
1377
|
+
endLine: currentStartLine + contentLines - 1
|
|
1378
|
+
});
|
|
1379
|
+
currentParts = [para];
|
|
1380
|
+
currentStartLine = lineCount;
|
|
1381
|
+
}
|
|
1382
|
+
lineCount += paraLines + 1;
|
|
1383
|
+
lineOffset = lineCount;
|
|
1384
|
+
}
|
|
1385
|
+
if (currentParts.length > 0) {
|
|
1386
|
+
const content2 = currentParts.join("\n\n");
|
|
1387
|
+
const contentLines = content2.split("\n").length;
|
|
1388
|
+
chunks.push({
|
|
1389
|
+
content: content2,
|
|
1390
|
+
startLine: currentStartLine,
|
|
1391
|
+
endLine: currentStartLine + contentLines - 1
|
|
1392
|
+
});
|
|
1393
|
+
}
|
|
1394
|
+
return chunks;
|
|
1395
|
+
}
|
|
1396
|
+
function chunkFile(content, filePath, opts) {
|
|
1397
|
+
if (!content || !content.trim()) return [];
|
|
1398
|
+
const ext = getExtension(filePath);
|
|
1399
|
+
if (MARKDOWN_EXTENSIONS.has(ext)) {
|
|
1400
|
+
const mdChunks = parseMarkdown(content, opts);
|
|
1401
|
+
return mdChunks.map((c) => ({
|
|
1402
|
+
content: c.content,
|
|
1403
|
+
headingPath: c.headingPath,
|
|
1404
|
+
startLine: c.startLine,
|
|
1405
|
+
endLine: c.endLine
|
|
1406
|
+
}));
|
|
1407
|
+
}
|
|
1408
|
+
const language = CODE_LANGUAGE_MAP[ext];
|
|
1409
|
+
if (language !== void 0) {
|
|
1410
|
+
const codeChunks = parseCode(content, language, opts);
|
|
1411
|
+
return codeChunks.map((c) => ({
|
|
1412
|
+
content: c.content,
|
|
1413
|
+
name: c.name,
|
|
1414
|
+
kind: c.kind,
|
|
1415
|
+
startLine: c.startLine,
|
|
1416
|
+
endLine: c.endLine
|
|
1417
|
+
}));
|
|
1418
|
+
}
|
|
1419
|
+
return splitByParagraphs(content, opts.maxTokens);
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
// src/ingestion/hierarchical-index.ts
|
|
1423
|
+
function createCollection(db, embed, opts) {
|
|
1424
|
+
const content = `Collection: ${opts.name}`;
|
|
1425
|
+
const id = insertEntry(db, "cold", "knowledge", {
|
|
1426
|
+
content,
|
|
1427
|
+
source: opts.sourcePath,
|
|
1428
|
+
metadata: {
|
|
1429
|
+
type: "collection",
|
|
1430
|
+
name: opts.name,
|
|
1431
|
+
source_path: opts.sourcePath
|
|
1432
|
+
}
|
|
1433
|
+
});
|
|
1434
|
+
const embedding = embed(content);
|
|
1435
|
+
insertEmbedding(db, "cold", "knowledge", id, embedding);
|
|
1436
|
+
return id;
|
|
1437
|
+
}
|
|
1438
|
+
function addDocumentToCollection(db, embed, opts) {
|
|
1439
|
+
const joined = opts.chunks.map((c) => c.content).join("\n\n");
|
|
1440
|
+
const docContent = joined.slice(0, 500);
|
|
1441
|
+
const docId = insertEntry(db, "cold", "knowledge", {
|
|
1442
|
+
content: docContent,
|
|
1443
|
+
source: opts.sourcePath,
|
|
1444
|
+
collection_id: opts.collectionId,
|
|
1445
|
+
metadata: {
|
|
1446
|
+
type: "document",
|
|
1447
|
+
source_path: opts.sourcePath,
|
|
1448
|
+
chunk_count: opts.chunks.length
|
|
1449
|
+
}
|
|
1450
|
+
});
|
|
1451
|
+
const docEmbedding = embed(docContent);
|
|
1452
|
+
insertEmbedding(db, "cold", "knowledge", docId, docEmbedding);
|
|
1453
|
+
for (const chunk of opts.chunks) {
|
|
1454
|
+
const chunkId = insertEntry(db, "cold", "knowledge", {
|
|
1455
|
+
content: chunk.content,
|
|
1456
|
+
source: opts.sourcePath,
|
|
1457
|
+
parent_id: docId,
|
|
1458
|
+
collection_id: opts.collectionId,
|
|
1459
|
+
metadata: {
|
|
1460
|
+
type: "chunk",
|
|
1461
|
+
heading_path: chunk.headingPath,
|
|
1462
|
+
name: chunk.name,
|
|
1463
|
+
kind: chunk.kind
|
|
1464
|
+
}
|
|
1465
|
+
});
|
|
1466
|
+
const chunkEmbedding = embed(chunk.content);
|
|
1467
|
+
insertEmbedding(db, "cold", "knowledge", chunkId, chunkEmbedding);
|
|
1468
|
+
}
|
|
1469
|
+
return docId;
|
|
1470
|
+
}
|
|
1471
|
+
function listCollections(db) {
|
|
1472
|
+
const rows = db.prepare(`SELECT * FROM cold_knowledge WHERE json_extract(metadata, '$.type') = 'collection'`).all();
|
|
1473
|
+
return rows.map((row) => {
|
|
1474
|
+
const metadata = row.metadata ? JSON.parse(row.metadata) : {};
|
|
1475
|
+
return {
|
|
1476
|
+
id: row.id,
|
|
1477
|
+
content: row.content,
|
|
1478
|
+
summary: row.summary,
|
|
1479
|
+
source: row.source,
|
|
1480
|
+
source_tool: row.source_tool,
|
|
1481
|
+
project: row.project,
|
|
1482
|
+
tags: row.tags ? JSON.parse(row.tags) : [],
|
|
1483
|
+
created_at: row.created_at,
|
|
1484
|
+
updated_at: row.updated_at,
|
|
1485
|
+
last_accessed_at: row.last_accessed_at,
|
|
1486
|
+
access_count: row.access_count,
|
|
1487
|
+
decay_score: row.decay_score,
|
|
1488
|
+
parent_id: row.parent_id,
|
|
1489
|
+
collection_id: row.collection_id,
|
|
1490
|
+
metadata,
|
|
1491
|
+
name: metadata["name"]
|
|
1492
|
+
};
|
|
1493
|
+
});
|
|
1494
|
+
}
|
|
1495
|
+
|
|
1496
|
+
// src/ingestion/ingest.ts
|
|
1497
|
+
var INGESTABLE_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
1498
|
+
".md",
|
|
1499
|
+
".mdx",
|
|
1500
|
+
".markdown",
|
|
1501
|
+
".txt",
|
|
1502
|
+
".rst",
|
|
1503
|
+
".ts",
|
|
1504
|
+
".tsx",
|
|
1505
|
+
".js",
|
|
1506
|
+
".jsx",
|
|
1507
|
+
".py",
|
|
1508
|
+
".go",
|
|
1509
|
+
".rs",
|
|
1510
|
+
".java",
|
|
1511
|
+
".kt",
|
|
1512
|
+
".cs",
|
|
1513
|
+
".cpp",
|
|
1514
|
+
".c",
|
|
1515
|
+
".h",
|
|
1516
|
+
".json",
|
|
1517
|
+
".yaml",
|
|
1518
|
+
".yml",
|
|
1519
|
+
".toml"
|
|
1520
|
+
]);
|
|
1521
|
+
function ingestFile(db, embed, filePath, collectionId) {
|
|
1522
|
+
const content = readFileSync2(filePath, "utf-8");
|
|
1523
|
+
const chunks = chunkFile(content, filePath, { maxTokens: 512, overlapTokens: 50 });
|
|
1524
|
+
let resolvedCollectionId = collectionId;
|
|
1525
|
+
if (!resolvedCollectionId) {
|
|
1526
|
+
const dirPath = dirname(filePath);
|
|
1527
|
+
const dirName = basename(dirPath);
|
|
1528
|
+
resolvedCollectionId = createCollection(db, embed, {
|
|
1529
|
+
name: dirName,
|
|
1530
|
+
sourcePath: dirPath
|
|
1531
|
+
});
|
|
1532
|
+
}
|
|
1533
|
+
const documentId = addDocumentToCollection(db, embed, {
|
|
1534
|
+
collectionId: resolvedCollectionId,
|
|
1535
|
+
sourcePath: filePath,
|
|
1536
|
+
chunks: chunks.map((c) => ({
|
|
1537
|
+
content: c.content,
|
|
1538
|
+
headingPath: c.headingPath,
|
|
1539
|
+
name: c.name,
|
|
1540
|
+
kind: c.kind
|
|
1541
|
+
}))
|
|
1542
|
+
});
|
|
1543
|
+
let validationPassed = false;
|
|
1544
|
+
if (chunks.length > 0) {
|
|
1545
|
+
const firstChunk = chunks[0];
|
|
1546
|
+
const queryVec = embed(firstChunk.content);
|
|
1547
|
+
const results = searchByVector(db, "cold", "knowledge", queryVec, {
|
|
1548
|
+
topK: 5,
|
|
1549
|
+
minScore: 0
|
|
1550
|
+
});
|
|
1551
|
+
validationPassed = results.some((r) => r.score > 0.5);
|
|
1552
|
+
}
|
|
1553
|
+
return {
|
|
1554
|
+
documentId,
|
|
1555
|
+
chunkCount: chunks.length,
|
|
1556
|
+
validationPassed
|
|
1557
|
+
};
|
|
1558
|
+
}
|
|
1559
|
+
function walkDirectory(dirPath) {
|
|
1560
|
+
const files = [];
|
|
1561
|
+
let entries;
|
|
1562
|
+
try {
|
|
1563
|
+
entries = readdirSync2(dirPath);
|
|
1564
|
+
} catch {
|
|
1565
|
+
return files;
|
|
1566
|
+
}
|
|
1567
|
+
for (const entry of entries) {
|
|
1568
|
+
if (entry.startsWith(".") || entry === "node_modules") continue;
|
|
1569
|
+
const fullPath = join5(dirPath, entry);
|
|
1570
|
+
let stat;
|
|
1571
|
+
try {
|
|
1572
|
+
stat = statSync(fullPath);
|
|
1573
|
+
} catch {
|
|
1574
|
+
continue;
|
|
1575
|
+
}
|
|
1576
|
+
if (stat.isDirectory()) {
|
|
1577
|
+
files.push(...walkDirectory(fullPath));
|
|
1578
|
+
} else if (stat.isFile()) {
|
|
1579
|
+
const ext = extname(entry).toLowerCase();
|
|
1580
|
+
if (INGESTABLE_EXTENSIONS.has(ext)) {
|
|
1581
|
+
files.push(fullPath);
|
|
1582
|
+
}
|
|
1583
|
+
}
|
|
1584
|
+
}
|
|
1585
|
+
return files;
|
|
1586
|
+
}
|
|
1587
|
+
function ingestDirectory(db, embed, dirPath, glob) {
|
|
1588
|
+
const dirName = basename(dirPath);
|
|
1589
|
+
const collectionId = createCollection(db, embed, {
|
|
1590
|
+
name: dirName,
|
|
1591
|
+
sourcePath: dirPath
|
|
1592
|
+
});
|
|
1593
|
+
const files = walkDirectory(dirPath);
|
|
1594
|
+
let documentCount = 0;
|
|
1595
|
+
let totalChunks = 0;
|
|
1596
|
+
for (const filePath of files) {
|
|
1597
|
+
if (glob !== void 0) {
|
|
1598
|
+
const name = basename(filePath);
|
|
1599
|
+
const pattern = glob.replace(/\./g, "\\.").replace(/\*/g, ".*");
|
|
1600
|
+
if (!new RegExp(`^${pattern}$`).test(name)) continue;
|
|
1601
|
+
}
|
|
1602
|
+
try {
|
|
1603
|
+
const result = ingestFile(db, embed, filePath, collectionId);
|
|
1604
|
+
documentCount++;
|
|
1605
|
+
totalChunks += result.chunkCount;
|
|
1606
|
+
} catch {
|
|
1607
|
+
}
|
|
1608
|
+
}
|
|
1609
|
+
return {
|
|
1610
|
+
collectionId,
|
|
1611
|
+
documentCount,
|
|
1612
|
+
totalChunks
|
|
1613
|
+
};
|
|
1614
|
+
}
|
|
1615
|
+
|
|
1616
|
+
// src/tools/kb-tools.ts
|
|
1617
|
+
var KB_TOOLS = [
|
|
1618
|
+
{
|
|
1619
|
+
name: "kb_ingest_file",
|
|
1620
|
+
description: "Ingest a single file into the knowledge base",
|
|
1621
|
+
inputSchema: {
|
|
1622
|
+
type: "object",
|
|
1623
|
+
properties: {
|
|
1624
|
+
path: { type: "string", description: "Path to the file to ingest" },
|
|
1625
|
+
collection: { type: "string", description: "Optional collection ID to add to" }
|
|
1626
|
+
},
|
|
1627
|
+
required: ["path"]
|
|
1628
|
+
}
|
|
1629
|
+
},
|
|
1630
|
+
{
|
|
1631
|
+
name: "kb_ingest_dir",
|
|
1632
|
+
description: "Ingest a directory of files into the knowledge base",
|
|
1633
|
+
inputSchema: {
|
|
1634
|
+
type: "object",
|
|
1635
|
+
properties: {
|
|
1636
|
+
path: { type: "string", description: "Path to the directory to ingest" },
|
|
1637
|
+
glob: { type: "string", description: "Optional glob pattern to filter files" },
|
|
1638
|
+
collection: { type: "string", description: "Optional collection name override" }
|
|
1639
|
+
},
|
|
1640
|
+
required: ["path"]
|
|
1641
|
+
}
|
|
1642
|
+
},
|
|
1643
|
+
{
|
|
1644
|
+
name: "kb_search",
|
|
1645
|
+
description: "Search the knowledge base (cold/knowledge tier)",
|
|
1646
|
+
inputSchema: {
|
|
1647
|
+
type: "object",
|
|
1648
|
+
properties: {
|
|
1649
|
+
query: { type: "string", description: "Search query" },
|
|
1650
|
+
collection: { type: "string", description: "Optional collection ID to restrict search" },
|
|
1651
|
+
top_k: { type: "number", description: "Number of results to return (default: 10)" }
|
|
1652
|
+
},
|
|
1653
|
+
required: ["query"]
|
|
1654
|
+
}
|
|
1655
|
+
},
|
|
1656
|
+
{
|
|
1657
|
+
name: "kb_list_collections",
|
|
1658
|
+
description: "List all knowledge base collections",
|
|
1659
|
+
inputSchema: {
|
|
1660
|
+
type: "object",
|
|
1661
|
+
properties: {},
|
|
1662
|
+
required: []
|
|
1663
|
+
}
|
|
1664
|
+
},
|
|
1665
|
+
{
|
|
1666
|
+
name: "kb_remove",
|
|
1667
|
+
description: "Remove an entry from the knowledge base",
|
|
1668
|
+
inputSchema: {
|
|
1669
|
+
type: "object",
|
|
1670
|
+
properties: {
|
|
1671
|
+
id: { type: "string", description: "Entry ID to remove" },
|
|
1672
|
+
cascade: { type: "boolean", description: "If true, also delete child entries" }
|
|
1673
|
+
},
|
|
1674
|
+
required: ["id"]
|
|
1675
|
+
}
|
|
1676
|
+
},
|
|
1677
|
+
{
|
|
1678
|
+
name: "kb_refresh",
|
|
1679
|
+
description: "Refresh a knowledge base collection (re-ingest)",
|
|
1680
|
+
inputSchema: {
|
|
1681
|
+
type: "object",
|
|
1682
|
+
properties: {
|
|
1683
|
+
collection: { type: "string", description: "Collection ID to refresh" }
|
|
1684
|
+
},
|
|
1685
|
+
required: ["collection"]
|
|
1686
|
+
}
|
|
1687
|
+
}
|
|
1688
|
+
];
|
|
1689
|
+
async function handleKbTool(name, args, ctx) {
|
|
1690
|
+
if (name === "kb_ingest_file") {
|
|
1691
|
+
const filePath = args.path;
|
|
1692
|
+
const collectionId = args.collection;
|
|
1693
|
+
await ctx.embedder.ensureLoaded();
|
|
1694
|
+
const embedFn = ctx.embedder.makeSyncEmbedFn();
|
|
1695
|
+
const result = ingestFile(ctx.db, embedFn, filePath, collectionId);
|
|
1696
|
+
return { content: [{ type: "text", text: JSON.stringify(result) }] };
|
|
1697
|
+
}
|
|
1698
|
+
if (name === "kb_ingest_dir") {
|
|
1699
|
+
const dirPath = args.path;
|
|
1700
|
+
const glob = args.glob;
|
|
1701
|
+
await ctx.embedder.ensureLoaded();
|
|
1702
|
+
const embedFn = ctx.embedder.makeSyncEmbedFn();
|
|
1703
|
+
const result = ingestDirectory(ctx.db, embedFn, dirPath, glob);
|
|
1704
|
+
return { content: [{ type: "text", text: JSON.stringify(result) }] };
|
|
1705
|
+
}
|
|
1706
|
+
if (name === "kb_search") {
|
|
1707
|
+
const query = args.query;
|
|
1708
|
+
const topK = args.top_k ?? 10;
|
|
1709
|
+
await ctx.embedder.ensureLoaded();
|
|
1710
|
+
const vec = await ctx.embedder.embed(query);
|
|
1711
|
+
const embedFn = () => vec;
|
|
1712
|
+
const results = searchMemory(ctx.db, embedFn, query, {
|
|
1713
|
+
tiers: [{ tier: "cold", content_type: "knowledge" }],
|
|
1714
|
+
topK
|
|
1715
|
+
});
|
|
1716
|
+
return { content: [{ type: "text", text: JSON.stringify(results) }] };
|
|
1717
|
+
}
|
|
1718
|
+
if (name === "kb_list_collections") {
|
|
1719
|
+
const collections = listCollections(ctx.db);
|
|
1720
|
+
return { content: [{ type: "text", text: JSON.stringify(collections) }] };
|
|
1721
|
+
}
|
|
1722
|
+
if (name === "kb_remove") {
|
|
1723
|
+
const id = args.id;
|
|
1724
|
+
const cascade = args.cascade ?? false;
|
|
1725
|
+
if (cascade) {
|
|
1726
|
+
const children = listEntries(ctx.db, "cold", "knowledge").filter(
|
|
1727
|
+
(e) => e.parent_id === id || e.collection_id === id
|
|
1728
|
+
);
|
|
1729
|
+
for (const child of children) {
|
|
1730
|
+
deleteEmbedding(ctx.db, "cold", "knowledge", child.id);
|
|
1731
|
+
deleteEntry(ctx.db, "cold", "knowledge", child.id);
|
|
1732
|
+
}
|
|
1733
|
+
}
|
|
1734
|
+
deleteEmbedding(ctx.db, "cold", "knowledge", id);
|
|
1735
|
+
deleteEntry(ctx.db, "cold", "knowledge", id);
|
|
1736
|
+
return { content: [{ type: "text", text: JSON.stringify({ removed: id, cascade }) }] };
|
|
1737
|
+
}
|
|
1738
|
+
if (name === "kb_refresh") {
|
|
1739
|
+
const collection = args.collection;
|
|
1740
|
+
return {
|
|
1741
|
+
content: [
|
|
1742
|
+
{
|
|
1743
|
+
type: "text",
|
|
1744
|
+
text: JSON.stringify({
|
|
1745
|
+
acknowledged: true,
|
|
1746
|
+
collection,
|
|
1747
|
+
note: "Refresh scheduled \u2014 re-ingest the source path to update"
|
|
1748
|
+
})
|
|
1749
|
+
}
|
|
1750
|
+
]
|
|
1751
|
+
};
|
|
1752
|
+
}
|
|
1753
|
+
return null;
|
|
1754
|
+
}
|
|
1755
|
+
function registerKbTools() {
|
|
1756
|
+
return KB_TOOLS;
|
|
1757
|
+
}
|
|
1758
|
+
|
|
1759
|
+
// src/tools/eval-tools.ts
|
|
1760
|
+
import { resolve } from "path";
|
|
1761
|
+
import { fileURLToPath } from "url";
|
|
1762
|
+
|
|
1763
|
+
// src/eval/benchmark-runner.ts
|
|
1764
|
+
import { readFileSync as readFileSync3 } from "fs";
|
|
1765
|
+
function runBenchmark(db, embed, opts) {
|
|
1766
|
+
const corpusLines = readFileSync3(opts.corpusPath, "utf-8").split("\n").filter((line) => line.trim().length > 0);
|
|
1767
|
+
for (const line of corpusLines) {
|
|
1768
|
+
const entry = JSON.parse(line);
|
|
1769
|
+
storeMemory(db, embed, {
|
|
1770
|
+
content: entry.content,
|
|
1771
|
+
type: entry.type,
|
|
1772
|
+
tier: "warm",
|
|
1773
|
+
contentType: "memory",
|
|
1774
|
+
tags: entry.tags
|
|
1775
|
+
});
|
|
1776
|
+
}
|
|
1777
|
+
const benchmarkLines = readFileSync3(opts.benchmarkPath, "utf-8").split("\n").filter((line) => line.trim().length > 0);
|
|
1778
|
+
const queries = benchmarkLines.map((line) => JSON.parse(line));
|
|
1779
|
+
const details = [];
|
|
1780
|
+
let exactMatches = 0;
|
|
1781
|
+
let fuzzyMatches = 0;
|
|
1782
|
+
let tierMatches = 0;
|
|
1783
|
+
let totalLatencyMs = 0;
|
|
1784
|
+
for (const bq of queries) {
|
|
1785
|
+
const start = performance.now();
|
|
1786
|
+
const results = searchMemory(db, embed, bq.query, {
|
|
1787
|
+
tiers: [{ tier: "warm", content_type: "memory" }],
|
|
1788
|
+
topK: 3
|
|
1789
|
+
});
|
|
1790
|
+
const latencyMs = performance.now() - start;
|
|
1791
|
+
totalLatencyMs += latencyMs;
|
|
1792
|
+
const topResult = results[0] ?? null;
|
|
1793
|
+
const topContent = topResult?.entry.content ?? null;
|
|
1794
|
+
const topScore = topResult?.score ?? 0;
|
|
1795
|
+
const topTier = topResult?.tier ?? null;
|
|
1796
|
+
const matched = topContent !== null && topContent.includes(bq.expected_content_contains);
|
|
1797
|
+
const fuzzyMatched = matched || results.slice(1).some((r) => r.entry.content.includes(bq.expected_content_contains));
|
|
1798
|
+
const tierRouted = topTier === bq.expected_tier;
|
|
1799
|
+
if (matched) exactMatches++;
|
|
1800
|
+
if (fuzzyMatched) fuzzyMatches++;
|
|
1801
|
+
if (tierRouted) tierMatches++;
|
|
1802
|
+
details.push({
|
|
1803
|
+
query: bq.query,
|
|
1804
|
+
expectedContains: bq.expected_content_contains,
|
|
1805
|
+
topResult: topContent,
|
|
1806
|
+
topScore,
|
|
1807
|
+
matched,
|
|
1808
|
+
fuzzyMatched
|
|
1809
|
+
});
|
|
1810
|
+
}
|
|
1811
|
+
const total = queries.length;
|
|
1812
|
+
return {
|
|
1813
|
+
totalQueries: total,
|
|
1814
|
+
exactMatchRate: total > 0 ? exactMatches / total : 0,
|
|
1815
|
+
fuzzyMatchRate: total > 0 ? fuzzyMatches / total : 0,
|
|
1816
|
+
tierRoutingRate: total > 0 ? tierMatches / total : 0,
|
|
1817
|
+
avgLatencyMs: total > 0 ? totalLatencyMs / total : 0,
|
|
1818
|
+
details
|
|
1819
|
+
};
|
|
1820
|
+
}
|
|
1821
|
+
|
|
1822
|
+
// src/eval/event-logger.ts
|
|
1823
|
+
import { randomUUID as randomUUID2 } from "crypto";
|
|
1824
|
+
function getRetrievalEvents(db, opts = {}) {
|
|
1825
|
+
const conditions = [];
|
|
1826
|
+
const params = [];
|
|
1827
|
+
if (opts.sessionId !== void 0) {
|
|
1828
|
+
conditions.push("session_id = ?");
|
|
1829
|
+
params.push(opts.sessionId);
|
|
1830
|
+
}
|
|
1831
|
+
if (opts.configSnapshotId !== void 0) {
|
|
1832
|
+
conditions.push("config_snapshot_id = ?");
|
|
1833
|
+
params.push(opts.configSnapshotId);
|
|
1834
|
+
}
|
|
1835
|
+
if (opts.days !== void 0) {
|
|
1836
|
+
const cutoff = Date.now() - opts.days * 24 * 60 * 60 * 1e3;
|
|
1837
|
+
conditions.push("timestamp >= ?");
|
|
1838
|
+
params.push(cutoff);
|
|
1839
|
+
}
|
|
1840
|
+
let sql = "SELECT * FROM retrieval_events";
|
|
1841
|
+
if (conditions.length > 0) {
|
|
1842
|
+
sql += " WHERE " + conditions.join(" AND ");
|
|
1843
|
+
}
|
|
1844
|
+
sql += " ORDER BY timestamp DESC";
|
|
1845
|
+
if (opts.limit !== void 0) {
|
|
1846
|
+
sql += " LIMIT ?";
|
|
1847
|
+
params.push(opts.limit);
|
|
1848
|
+
}
|
|
1849
|
+
return db.prepare(sql).all(...params);
|
|
1850
|
+
}
|
|
1851
|
+
|
|
1852
|
+
// src/eval/metrics.ts
|
|
1853
|
+
function computeGroupMetrics(events) {
|
|
1854
|
+
const withOutcome = events.filter((e) => e.outcome_used !== null);
|
|
1855
|
+
const used = withOutcome.filter((e) => e.outcome_used === 1);
|
|
1856
|
+
const precision = withOutcome.length > 0 ? used.length / withOutcome.length : 0;
|
|
1857
|
+
const hitEvents = events.filter((e) => e.outcome_used !== null && e.outcome_used === 1);
|
|
1858
|
+
const eventsWithOutcome = events.filter((e) => e.outcome_used !== null);
|
|
1859
|
+
const hitRate = eventsWithOutcome.length > 0 ? hitEvents.length / eventsWithOutcome.length : 0;
|
|
1860
|
+
const scoresWithValue = events.filter((e) => e.top_score !== null);
|
|
1861
|
+
const avgScore = scoresWithValue.length > 0 ? scoresWithValue.reduce((sum, e) => sum + e.top_score, 0) / scoresWithValue.length : 0;
|
|
1862
|
+
return { precision, hitRate, avgScore };
|
|
1863
|
+
}
|
|
1864
|
+
function computeMetrics(events, similarityThreshold) {
|
|
1865
|
+
if (events.length === 0) {
|
|
1866
|
+
return {
|
|
1867
|
+
precision: 0,
|
|
1868
|
+
hitRate: 0,
|
|
1869
|
+
missRate: 0,
|
|
1870
|
+
mrr: 0,
|
|
1871
|
+
avgLatencyMs: 0,
|
|
1872
|
+
totalEvents: 0,
|
|
1873
|
+
byTier: {},
|
|
1874
|
+
byContentType: {}
|
|
1875
|
+
};
|
|
1876
|
+
}
|
|
1877
|
+
const withOutcome = events.filter((e) => e.outcome_used !== null);
|
|
1878
|
+
const usedCount = withOutcome.filter((e) => e.outcome_used === 1).length;
|
|
1879
|
+
const precision = withOutcome.length > 0 ? usedCount / withOutcome.length : 0;
|
|
1880
|
+
const hitRate = withOutcome.length > 0 ? usedCount / withOutcome.length : 0;
|
|
1881
|
+
const missCount = events.filter(
|
|
1882
|
+
(e) => e.top_score === null || e.top_score < similarityThreshold
|
|
1883
|
+
).length;
|
|
1884
|
+
const missRate = missCount / events.length;
|
|
1885
|
+
const mrrSum = withOutcome.reduce((sum, e) => {
|
|
1886
|
+
return sum + (e.outcome_used === 1 ? 1 : 0);
|
|
1887
|
+
}, 0);
|
|
1888
|
+
const mrr = withOutcome.length > 0 ? mrrSum / withOutcome.length : 0;
|
|
1889
|
+
const latencies = events.filter((e) => e.latency_ms !== null);
|
|
1890
|
+
const avgLatencyMs = latencies.length > 0 ? latencies.reduce((sum, e) => sum + e.latency_ms, 0) / latencies.length : 0;
|
|
1891
|
+
const tierMap = /* @__PURE__ */ new Map();
|
|
1892
|
+
for (const e of events) {
|
|
1893
|
+
if (e.top_tier) {
|
|
1894
|
+
const group = tierMap.get(e.top_tier) ?? [];
|
|
1895
|
+
group.push(e);
|
|
1896
|
+
tierMap.set(e.top_tier, group);
|
|
1897
|
+
}
|
|
1898
|
+
}
|
|
1899
|
+
const byTier = {};
|
|
1900
|
+
for (const [tier, group] of tierMap) {
|
|
1901
|
+
const { precision: p, hitRate: h, avgScore } = computeGroupMetrics(group);
|
|
1902
|
+
byTier[tier] = { precision: p, hitRate: h, avgScore, count: group.length };
|
|
1903
|
+
}
|
|
1904
|
+
const ctMap = /* @__PURE__ */ new Map();
|
|
1905
|
+
for (const e of events) {
|
|
1906
|
+
if (e.top_content_type) {
|
|
1907
|
+
const group = ctMap.get(e.top_content_type) ?? [];
|
|
1908
|
+
group.push(e);
|
|
1909
|
+
ctMap.set(e.top_content_type, group);
|
|
1910
|
+
}
|
|
1911
|
+
}
|
|
1912
|
+
const byContentType = {};
|
|
1913
|
+
for (const [ct, group] of ctMap) {
|
|
1914
|
+
const { precision: p, hitRate: h } = computeGroupMetrics(group);
|
|
1915
|
+
byContentType[ct] = { precision: p, hitRate: h, count: group.length };
|
|
1916
|
+
}
|
|
1917
|
+
return {
|
|
1918
|
+
precision,
|
|
1919
|
+
hitRate,
|
|
1920
|
+
missRate,
|
|
1921
|
+
mrr,
|
|
1922
|
+
avgLatencyMs,
|
|
1923
|
+
totalEvents: events.length,
|
|
1924
|
+
byTier,
|
|
1925
|
+
byContentType
|
|
1926
|
+
};
|
|
1927
|
+
}
|
|
1928
|
+
|
|
1929
|
+
// src/tools/eval-tools.ts
|
|
1930
|
+
var __dirname = fileURLToPath(new URL(".", import.meta.url));
|
|
1931
|
+
var PACKAGE_ROOT = resolve(__dirname, "..", "..");
|
|
1932
|
+
var EVAL_TOOLS = [
|
|
1933
|
+
{
|
|
1934
|
+
name: "eval_benchmark",
|
|
1935
|
+
description: "Run a retrieval benchmark against the eval corpus and benchmark queries",
|
|
1936
|
+
inputSchema: {
|
|
1937
|
+
type: "object",
|
|
1938
|
+
properties: {
|
|
1939
|
+
compare_to: { type: "string", description: "Optional baseline snapshot ID to compare against" },
|
|
1940
|
+
snapshot: { type: "string", description: "Optional config snapshot ID to tag this run" }
|
|
1941
|
+
},
|
|
1942
|
+
required: []
|
|
1943
|
+
}
|
|
1944
|
+
},
|
|
1945
|
+
{
|
|
1946
|
+
name: "eval_report",
|
|
1947
|
+
description: "Generate a retrieval quality report from logged events",
|
|
1948
|
+
inputSchema: {
|
|
1949
|
+
type: "object",
|
|
1950
|
+
properties: {
|
|
1951
|
+
days: { type: "number", description: "Number of days of history to include (default: 7)" },
|
|
1952
|
+
config_snapshot: { type: "string", description: "Optional config snapshot ID to filter by" }
|
|
1953
|
+
},
|
|
1954
|
+
required: []
|
|
1955
|
+
}
|
|
1956
|
+
}
|
|
1957
|
+
];
|
|
1958
|
+
async function handleEvalTool(name, args, ctx) {
|
|
1959
|
+
if (name === "eval_benchmark") {
|
|
1960
|
+
await ctx.embedder.ensureLoaded();
|
|
1961
|
+
const embedFn = ctx.embedder.makeSyncEmbedFn();
|
|
1962
|
+
const corpusPath = resolve(PACKAGE_ROOT, "eval", "corpus", "memories.jsonl");
|
|
1963
|
+
const benchmarkPath = resolve(PACKAGE_ROOT, "eval", "benchmarks", "retrieval.jsonl");
|
|
1964
|
+
const result = runBenchmark(ctx.db, embedFn, {
|
|
1965
|
+
corpusPath,
|
|
1966
|
+
benchmarkPath
|
|
1967
|
+
});
|
|
1968
|
+
return { content: [{ type: "text", text: JSON.stringify(result) }] };
|
|
1969
|
+
}
|
|
1970
|
+
if (name === "eval_report") {
|
|
1971
|
+
const days = args.days ?? 7;
|
|
1972
|
+
const configSnapshot = args.config_snapshot;
|
|
1973
|
+
const events = getRetrievalEvents(ctx.db, {
|
|
1974
|
+
days,
|
|
1975
|
+
configSnapshotId: configSnapshot
|
|
1976
|
+
});
|
|
1977
|
+
const similarityThreshold = ctx.config.tiers.warm.similarity_threshold ?? 0.5;
|
|
1978
|
+
const metrics = computeMetrics(events, similarityThreshold);
|
|
1979
|
+
return { content: [{ type: "text", text: JSON.stringify({ days, events: events.length, metrics }) }] };
|
|
1980
|
+
}
|
|
1981
|
+
return null;
|
|
1982
|
+
}
|
|
1983
|
+
function registerEvalTools() {
|
|
1984
|
+
return EVAL_TOOLS;
|
|
1985
|
+
}
|
|
1986
|
+
|
|
1987
|
+
// src/importers/claude-code.ts
|
|
1988
|
+
import { existsSync as existsSync4, readdirSync as readdirSync3, readFileSync as readFileSync4 } from "fs";
|
|
1989
|
+
import { join as join6 } from "path";
|
|
1990
|
+
import { homedir } from "os";
|
|
1991
|
+
import { createHash } from "crypto";
|
|
1992
|
+
function parseFrontmatter(raw) {
|
|
1993
|
+
const match = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
|
1994
|
+
if (!match) return { frontmatter: null, content: raw };
|
|
1995
|
+
const frontmatter = {};
|
|
1996
|
+
for (const line of match[1].split("\n")) {
|
|
1997
|
+
const kv = line.match(/^(\w+):\s*(.*)$/);
|
|
1998
|
+
if (kv) {
|
|
1999
|
+
const key = kv[1];
|
|
2000
|
+
frontmatter[key] = kv[2].trim();
|
|
2001
|
+
}
|
|
2002
|
+
}
|
|
2003
|
+
return { frontmatter, content: match[2] };
|
|
2004
|
+
}
|
|
2005
|
+
function contentHash(text) {
|
|
2006
|
+
return createHash("sha256").update(text).digest("hex");
|
|
2007
|
+
}
|
|
2008
|
+
function importLogId(sourceTool, sourcePath, hash) {
|
|
2009
|
+
return createHash("md5").update(`${sourceTool}:${sourcePath}:${hash}`).digest("hex");
|
|
2010
|
+
}
|
|
2011
|
+
function isAlreadyImported(db, hash) {
|
|
2012
|
+
const row = db.prepare("SELECT id FROM import_log WHERE content_hash = ?").get(hash);
|
|
2013
|
+
return row !== void 0;
|
|
2014
|
+
}
|
|
2015
|
+
function logImport(db, sourceTool, sourcePath, hash, entryId, tier, type) {
|
|
2016
|
+
const id = importLogId(sourceTool, sourcePath, hash);
|
|
2017
|
+
db.prepare(`
|
|
2018
|
+
INSERT OR IGNORE INTO import_log
|
|
2019
|
+
(id, timestamp, source_tool, source_path, content_hash, target_entry_id, target_tier, target_type)
|
|
2020
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
2021
|
+
`).run(id, Date.now(), sourceTool, sourcePath, hash, entryId, tier, type);
|
|
2022
|
+
}
|
|
2023
|
+
var ClaudeCodeImporter = class {
|
|
2024
|
+
name = "claude-code";
|
|
2025
|
+
basePath;
|
|
2026
|
+
constructor(basePath) {
|
|
2027
|
+
this.basePath = basePath ?? join6(homedir(), ".claude");
|
|
2028
|
+
}
|
|
2029
|
+
detect() {
|
|
2030
|
+
return existsSync4(this.basePath) && existsSync4(join6(this.basePath, "projects"));
|
|
2031
|
+
}
|
|
2032
|
+
scan() {
|
|
2033
|
+
let memoryFiles = 0;
|
|
2034
|
+
let knowledgeFiles = 0;
|
|
2035
|
+
let sessionFiles = 0;
|
|
2036
|
+
const projectsDir = join6(this.basePath, "projects");
|
|
2037
|
+
if (!existsSync4(projectsDir)) {
|
|
2038
|
+
return { memoryFiles, knowledgeFiles, sessionFiles };
|
|
2039
|
+
}
|
|
2040
|
+
for (const projectEntry of readdirSync3(projectsDir, { withFileTypes: true })) {
|
|
2041
|
+
if (!projectEntry.isDirectory()) continue;
|
|
2042
|
+
const projectDir = join6(projectsDir, projectEntry.name);
|
|
2043
|
+
const memoryDir = join6(projectDir, "memory");
|
|
2044
|
+
if (existsSync4(memoryDir)) {
|
|
2045
|
+
for (const f of readdirSync3(memoryDir)) {
|
|
2046
|
+
if (f.endsWith(".md") && f !== "MEMORY.md") memoryFiles++;
|
|
2047
|
+
}
|
|
2048
|
+
}
|
|
2049
|
+
if (existsSync4(join6(projectDir, "CLAUDE.md"))) knowledgeFiles++;
|
|
2050
|
+
for (const f of readdirSync3(projectDir)) {
|
|
2051
|
+
if (f.endsWith(".jsonl")) sessionFiles++;
|
|
2052
|
+
}
|
|
2053
|
+
}
|
|
2054
|
+
return { memoryFiles, knowledgeFiles, sessionFiles };
|
|
2055
|
+
}
|
|
2056
|
+
importMemories(db, embed, project) {
|
|
2057
|
+
const result = { imported: 0, skipped: 0, errors: [] };
|
|
2058
|
+
const projectsDir = join6(this.basePath, "projects");
|
|
2059
|
+
if (!existsSync4(projectsDir)) return result;
|
|
2060
|
+
for (const projectEntry of readdirSync3(projectsDir, { withFileTypes: true })) {
|
|
2061
|
+
if (!projectEntry.isDirectory()) continue;
|
|
2062
|
+
const projectDir = join6(projectsDir, projectEntry.name);
|
|
2063
|
+
const memoryDir = join6(projectDir, "memory");
|
|
2064
|
+
if (!existsSync4(memoryDir)) continue;
|
|
2065
|
+
for (const filename of readdirSync3(memoryDir)) {
|
|
2066
|
+
if (!filename.endsWith(".md") || filename === "MEMORY.md") continue;
|
|
2067
|
+
const filePath = join6(memoryDir, filename);
|
|
2068
|
+
try {
|
|
2069
|
+
const raw = readFileSync4(filePath, "utf8");
|
|
2070
|
+
const hash = contentHash(raw);
|
|
2071
|
+
if (isAlreadyImported(db, hash)) {
|
|
2072
|
+
result.skipped++;
|
|
2073
|
+
continue;
|
|
2074
|
+
}
|
|
2075
|
+
const { frontmatter, content } = parseFrontmatter(raw);
|
|
2076
|
+
let tier = "warm";
|
|
2077
|
+
let type = "memory";
|
|
2078
|
+
if (frontmatter?.type === "reference") {
|
|
2079
|
+
tier = "cold";
|
|
2080
|
+
type = "knowledge";
|
|
2081
|
+
}
|
|
2082
|
+
const entryId = insertEntry(db, tier, type, {
|
|
2083
|
+
content,
|
|
2084
|
+
summary: frontmatter?.description ?? null,
|
|
2085
|
+
source: filePath,
|
|
2086
|
+
source_tool: "claude-code",
|
|
2087
|
+
project: project ?? null,
|
|
2088
|
+
tags: frontmatter?.name ? [frontmatter.name] : []
|
|
2089
|
+
});
|
|
2090
|
+
insertEmbedding(db, tier, type, entryId, embed(content));
|
|
2091
|
+
logImport(db, "claude-code", filePath, hash, entryId, tier, type);
|
|
2092
|
+
result.imported++;
|
|
2093
|
+
} catch (err) {
|
|
2094
|
+
result.errors.push(`${filePath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
2095
|
+
}
|
|
2096
|
+
}
|
|
2097
|
+
}
|
|
2098
|
+
return result;
|
|
2099
|
+
}
|
|
2100
|
+
importKnowledge(db, embed) {
|
|
2101
|
+
const result = { imported: 0, skipped: 0, errors: [] };
|
|
2102
|
+
const claudeMdPath = join6(this.basePath, "CLAUDE.md");
|
|
2103
|
+
if (!existsSync4(claudeMdPath)) return result;
|
|
2104
|
+
try {
|
|
2105
|
+
const raw = readFileSync4(claudeMdPath, "utf8");
|
|
2106
|
+
const hash = contentHash(raw);
|
|
2107
|
+
if (isAlreadyImported(db, hash)) {
|
|
2108
|
+
result.skipped++;
|
|
2109
|
+
return result;
|
|
2110
|
+
}
|
|
2111
|
+
const { content } = parseFrontmatter(raw);
|
|
2112
|
+
const entryId = insertEntry(db, "warm", "knowledge", {
|
|
2113
|
+
content,
|
|
2114
|
+
source: claudeMdPath,
|
|
2115
|
+
source_tool: "claude-code",
|
|
2116
|
+
tags: ["pinned"]
|
|
2117
|
+
});
|
|
2118
|
+
insertEmbedding(db, "warm", "knowledge", entryId, embed(content));
|
|
2119
|
+
logImport(db, "claude-code", claudeMdPath, hash, entryId, "warm", "knowledge");
|
|
2120
|
+
result.imported++;
|
|
2121
|
+
} catch (err) {
|
|
2122
|
+
result.errors.push(
|
|
2123
|
+
`${claudeMdPath}: ${err instanceof Error ? err.message : String(err)}`
|
|
2124
|
+
);
|
|
2125
|
+
}
|
|
2126
|
+
return result;
|
|
2127
|
+
}
|
|
2128
|
+
};
|
|
2129
|
+
|
|
2130
|
+
// src/importers/copilot-cli.ts
|
|
2131
|
+
import { existsSync as existsSync5, readdirSync as readdirSync4, readFileSync as readFileSync5 } from "fs";
|
|
2132
|
+
import { join as join7 } from "path";
|
|
2133
|
+
import { homedir as homedir2 } from "os";
|
|
2134
|
+
import { createHash as createHash2 } from "crypto";
|
|
2135
|
+
function contentHash2(text) {
|
|
2136
|
+
return createHash2("sha256").update(text).digest("hex");
|
|
2137
|
+
}
|
|
2138
|
+
function importLogId2(sourceTool, sourcePath, hash) {
|
|
2139
|
+
return createHash2("md5").update(`${sourceTool}:${sourcePath}:${hash}`).digest("hex");
|
|
2140
|
+
}
|
|
2141
|
+
function isAlreadyImported2(db, hash) {
|
|
2142
|
+
const row = db.prepare("SELECT id FROM import_log WHERE content_hash = ?").get(hash);
|
|
2143
|
+
return row !== void 0;
|
|
2144
|
+
}
|
|
2145
|
+
function logImport2(db, sourceTool, sourcePath, hash, entryId, tier, type) {
|
|
2146
|
+
const id = importLogId2(sourceTool, sourcePath, hash);
|
|
2147
|
+
db.prepare(`
|
|
2148
|
+
INSERT OR IGNORE INTO import_log
|
|
2149
|
+
(id, timestamp, source_tool, source_path, content_hash, target_entry_id, target_tier, target_type)
|
|
2150
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
2151
|
+
`).run(id, Date.now(), sourceTool, sourcePath, hash, entryId, tier, type);
|
|
2152
|
+
}
|
|
2153
|
+
var CopilotCliImporter = class {
|
|
2154
|
+
name = "copilot-cli";
|
|
2155
|
+
basePath;
|
|
2156
|
+
constructor(basePath) {
|
|
2157
|
+
this.basePath = basePath ?? join7(homedir2(), ".copilot");
|
|
2158
|
+
}
|
|
2159
|
+
detect() {
|
|
2160
|
+
return existsSync5(this.basePath) && existsSync5(join7(this.basePath, "session-state"));
|
|
2161
|
+
}
|
|
2162
|
+
scan() {
|
|
2163
|
+
let knowledgeFiles = 0;
|
|
2164
|
+
let sessionFiles = 0;
|
|
2165
|
+
const sessionStateDir = join7(this.basePath, "session-state");
|
|
2166
|
+
if (!existsSync5(sessionStateDir)) {
|
|
2167
|
+
return { memoryFiles: 0, knowledgeFiles, sessionFiles };
|
|
2168
|
+
}
|
|
2169
|
+
for (const entry of readdirSync4(sessionStateDir, { withFileTypes: true })) {
|
|
2170
|
+
if (!entry.isDirectory()) continue;
|
|
2171
|
+
const sessionDir = join7(sessionStateDir, entry.name);
|
|
2172
|
+
if (existsSync5(join7(sessionDir, "plan.md"))) knowledgeFiles++;
|
|
2173
|
+
for (const f of readdirSync4(sessionDir)) {
|
|
2174
|
+
if (f.endsWith(".jsonl")) sessionFiles++;
|
|
2175
|
+
}
|
|
2176
|
+
}
|
|
2177
|
+
return { memoryFiles: 0, knowledgeFiles, sessionFiles };
|
|
2178
|
+
}
|
|
2179
|
+
importMemories(_db2, _embed, _project) {
|
|
2180
|
+
return { imported: 0, skipped: 0, errors: [] };
|
|
2181
|
+
}
|
|
2182
|
+
importKnowledge(db, embed) {
|
|
2183
|
+
const result = { imported: 0, skipped: 0, errors: [] };
|
|
2184
|
+
const sessionStateDir = join7(this.basePath, "session-state");
|
|
2185
|
+
if (!existsSync5(sessionStateDir)) return result;
|
|
2186
|
+
for (const entry of readdirSync4(sessionStateDir, { withFileTypes: true })) {
|
|
2187
|
+
if (!entry.isDirectory()) continue;
|
|
2188
|
+
const planPath = join7(sessionStateDir, entry.name, "plan.md");
|
|
2189
|
+
if (!existsSync5(planPath)) continue;
|
|
2190
|
+
try {
|
|
2191
|
+
const raw = readFileSync5(planPath, "utf8");
|
|
2192
|
+
const hash = contentHash2(raw);
|
|
2193
|
+
if (isAlreadyImported2(db, hash)) {
|
|
2194
|
+
result.skipped++;
|
|
2195
|
+
continue;
|
|
2196
|
+
}
|
|
2197
|
+
const entryId = insertEntry(db, "cold", "knowledge", {
|
|
2198
|
+
content: raw,
|
|
2199
|
+
source: planPath,
|
|
2200
|
+
source_tool: "copilot-cli"
|
|
2201
|
+
});
|
|
2202
|
+
insertEmbedding(db, "cold", "knowledge", entryId, embed(raw));
|
|
2203
|
+
logImport2(db, "copilot-cli", planPath, hash, entryId, "cold", "knowledge");
|
|
2204
|
+
result.imported++;
|
|
2205
|
+
} catch (err) {
|
|
2206
|
+
result.errors.push(`${planPath}: ${err instanceof Error ? err.message : String(err)}`);
|
|
2207
|
+
}
|
|
2208
|
+
}
|
|
2209
|
+
return result;
|
|
2210
|
+
}
|
|
2211
|
+
};
|
|
2212
|
+
|
|
2213
|
+
// src/tools/import-tools.ts
|
|
2214
|
+
var IMPORT_TOOLS = [
|
|
2215
|
+
{
|
|
2216
|
+
name: "import_host",
|
|
2217
|
+
description: "Detect and import memories/knowledge from installed host tools (Claude Code, Copilot CLI)",
|
|
2218
|
+
inputSchema: {
|
|
2219
|
+
type: "object",
|
|
2220
|
+
properties: {
|
|
2221
|
+
source: { type: "string", description: "Optional: restrict to a specific source ('claude-code' or 'copilot-cli')" }
|
|
2222
|
+
},
|
|
2223
|
+
required: []
|
|
2224
|
+
}
|
|
2225
|
+
}
|
|
2226
|
+
];
|
|
2227
|
+
async function handleImportTool(name, args, ctx) {
|
|
2228
|
+
if (name === "import_host") {
|
|
2229
|
+
const source = args.source;
|
|
2230
|
+
await ctx.embedder.ensureLoaded();
|
|
2231
|
+
const embedFn = ctx.embedder.makeSyncEmbedFn();
|
|
2232
|
+
const importers = [
|
|
2233
|
+
new ClaudeCodeImporter(),
|
|
2234
|
+
new CopilotCliImporter()
|
|
2235
|
+
];
|
|
2236
|
+
const results = [];
|
|
2237
|
+
for (const importer of importers) {
|
|
2238
|
+
if (source && importer.name !== source) continue;
|
|
2239
|
+
const detected = importer.detect();
|
|
2240
|
+
if (!detected) {
|
|
2241
|
+
results.push({ tool: importer.name, detected: false });
|
|
2242
|
+
continue;
|
|
2243
|
+
}
|
|
2244
|
+
const scan = importer.scan();
|
|
2245
|
+
const memoriesResult = importer.importMemories(ctx.db, embedFn);
|
|
2246
|
+
const knowledgeResult = importer.importKnowledge(ctx.db, embedFn);
|
|
2247
|
+
results.push({
|
|
2248
|
+
tool: importer.name,
|
|
2249
|
+
detected: true,
|
|
2250
|
+
scan,
|
|
2251
|
+
memoriesResult,
|
|
2252
|
+
knowledgeResult
|
|
2253
|
+
});
|
|
2254
|
+
}
|
|
2255
|
+
return { content: [{ type: "text", text: JSON.stringify({ results }) }] };
|
|
2256
|
+
}
|
|
2257
|
+
return null;
|
|
2258
|
+
}
|
|
2259
|
+
function registerImportTools() {
|
|
2260
|
+
return IMPORT_TOOLS;
|
|
2261
|
+
}
|
|
2262
|
+
|
|
2263
|
+
// src/tools/session-tools.ts
|
|
2264
|
+
import { randomUUID as randomUUID4 } from "crypto";
|
|
2265
|
+
|
|
2266
|
+
// src/compaction/compactor.ts
|
|
2267
|
+
import { randomUUID as randomUUID3 } from "crypto";
|
|
2268
|
+
|
|
2269
|
+
// src/memory/decay.ts
|
|
2270
|
+
var MS_PER_HOUR = 60 * 60 * 1e3;
|
|
2271
|
+
var TYPE_WEIGHTS = {
|
|
2272
|
+
correction: 1.5,
|
|
2273
|
+
preference: 1.3,
|
|
2274
|
+
decision: 1,
|
|
2275
|
+
surfaced: 0.8,
|
|
2276
|
+
imported: 1.1,
|
|
2277
|
+
compacted: 1,
|
|
2278
|
+
ingested: 0.9
|
|
2279
|
+
};
|
|
2280
|
+
function calculateDecayScore(entry, compactionConfig, now = Date.now()) {
|
|
2281
|
+
const hoursSinceAccess = (now - entry.last_accessed_at) / MS_PER_HOUR;
|
|
2282
|
+
const timeFactor = Math.exp(-hoursSinceAccess / compactionConfig.decay_half_life_hours);
|
|
2283
|
+
const freqFactor = 1 + Math.log2(1 + entry.access_count);
|
|
2284
|
+
const typeWeight = TYPE_WEIGHTS[entry.type] ?? 1;
|
|
2285
|
+
return timeFactor * freqFactor * typeWeight;
|
|
2286
|
+
}
|
|
2287
|
+
|
|
2288
|
+
// src/compaction/compactor.ts
|
|
2289
|
+
function logCompactionEvent(db, opts) {
|
|
2290
|
+
const id = randomUUID3();
|
|
2291
|
+
const timestamp = Date.now();
|
|
2292
|
+
db.prepare(`
|
|
2293
|
+
INSERT INTO compaction_log
|
|
2294
|
+
(id, timestamp, session_id, source_tier, target_tier, source_entry_ids,
|
|
2295
|
+
target_entry_id, semantic_drift, facts_preserved, facts_in_original,
|
|
2296
|
+
preservation_ratio, decay_scores, reason, config_snapshot_id)
|
|
2297
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
2298
|
+
`).run(
|
|
2299
|
+
id,
|
|
2300
|
+
timestamp,
|
|
2301
|
+
opts.sessionId,
|
|
2302
|
+
opts.sourceTier,
|
|
2303
|
+
opts.targetTier,
|
|
2304
|
+
JSON.stringify(opts.sourceEntryIds),
|
|
2305
|
+
opts.targetEntryId,
|
|
2306
|
+
null,
|
|
2307
|
+
null,
|
|
2308
|
+
null,
|
|
2309
|
+
null,
|
|
2310
|
+
JSON.stringify(opts.decayScores),
|
|
2311
|
+
opts.reason,
|
|
2312
|
+
opts.configSnapshotId
|
|
2313
|
+
);
|
|
2314
|
+
}
|
|
2315
|
+
function compactHotTier(db, embed, config, sessionId, configSnapshotId) {
|
|
2316
|
+
const snapshotId = configSnapshotId ?? "default";
|
|
2317
|
+
const entries = listEntries(db, "hot", "memory");
|
|
2318
|
+
const now = Date.now();
|
|
2319
|
+
const carryForward = [];
|
|
2320
|
+
const promoted = [];
|
|
2321
|
+
const discarded = [];
|
|
2322
|
+
for (const entry of entries) {
|
|
2323
|
+
const entryType = entry.metadata?.entry_type ?? "decision";
|
|
2324
|
+
const score = calculateDecayScore(
|
|
2325
|
+
{
|
|
2326
|
+
last_accessed_at: entry.last_accessed_at,
|
|
2327
|
+
created_at: entry.created_at,
|
|
2328
|
+
access_count: entry.access_count,
|
|
2329
|
+
type: entryType
|
|
2330
|
+
},
|
|
2331
|
+
config,
|
|
2332
|
+
now
|
|
2333
|
+
);
|
|
2334
|
+
if (score > config.promote_threshold) {
|
|
2335
|
+
carryForward.push(entry.id);
|
|
2336
|
+
} else if (score >= config.warm_threshold) {
|
|
2337
|
+
promoteEntry(db, embed, entry.id, "hot", "memory", "warm", "memory");
|
|
2338
|
+
promoted.push(entry.id);
|
|
2339
|
+
logCompactionEvent(db, {
|
|
2340
|
+
sessionId,
|
|
2341
|
+
sourceTier: "hot",
|
|
2342
|
+
targetTier: "warm",
|
|
2343
|
+
sourceEntryIds: [entry.id],
|
|
2344
|
+
targetEntryId: entry.id,
|
|
2345
|
+
decayScores: { [entry.id]: score },
|
|
2346
|
+
reason: "decay_score_below_promote_threshold",
|
|
2347
|
+
configSnapshotId: snapshotId
|
|
2348
|
+
});
|
|
2349
|
+
} else {
|
|
2350
|
+
deleteEmbedding(db, "hot", "memory", entry.id);
|
|
2351
|
+
deleteEntry(db, "hot", "memory", entry.id);
|
|
2352
|
+
discarded.push(entry.id);
|
|
2353
|
+
logCompactionEvent(db, {
|
|
2354
|
+
sessionId,
|
|
2355
|
+
sourceTier: "hot",
|
|
2356
|
+
targetTier: null,
|
|
2357
|
+
sourceEntryIds: [entry.id],
|
|
2358
|
+
targetEntryId: null,
|
|
2359
|
+
decayScores: { [entry.id]: score },
|
|
2360
|
+
reason: "decay_score_below_warm_threshold",
|
|
2361
|
+
configSnapshotId: snapshotId
|
|
2362
|
+
});
|
|
2363
|
+
}
|
|
2364
|
+
}
|
|
2365
|
+
return { carryForward, promoted, discarded };
|
|
2366
|
+
}
|
|
2367
|
+
|
|
2368
|
+
// src/tools/session-tools.ts
|
|
2369
|
+
var SESSION_TOOLS = [
|
|
2370
|
+
{
|
|
2371
|
+
name: "session_start",
|
|
2372
|
+
description: "Initialize a session: sync host tool imports and assemble hot tier context",
|
|
2373
|
+
inputSchema: {
|
|
2374
|
+
type: "object",
|
|
2375
|
+
properties: {},
|
|
2376
|
+
required: []
|
|
2377
|
+
}
|
|
2378
|
+
},
|
|
2379
|
+
{
|
|
2380
|
+
name: "session_end",
|
|
2381
|
+
description: "End a session: compact the hot tier and return compaction results",
|
|
2382
|
+
inputSchema: {
|
|
2383
|
+
type: "object",
|
|
2384
|
+
properties: {},
|
|
2385
|
+
required: []
|
|
2386
|
+
}
|
|
2387
|
+
},
|
|
2388
|
+
{
|
|
2389
|
+
name: "session_context",
|
|
2390
|
+
description: "Return current hot tier entries as formatted context text",
|
|
2391
|
+
inputSchema: {
|
|
2392
|
+
type: "object",
|
|
2393
|
+
properties: {},
|
|
2394
|
+
required: []
|
|
2395
|
+
}
|
|
2396
|
+
}
|
|
2397
|
+
];
|
|
2398
|
+
async function handleSessionTool(name, args, ctx) {
|
|
2399
|
+
if (name === "session_start") {
|
|
2400
|
+
await ctx.embedder.ensureLoaded();
|
|
2401
|
+
const embedFn = ctx.embedder.makeSyncEmbedFn();
|
|
2402
|
+
const importers = [
|
|
2403
|
+
new ClaudeCodeImporter(),
|
|
2404
|
+
new CopilotCliImporter()
|
|
2405
|
+
];
|
|
2406
|
+
const importSummary = [];
|
|
2407
|
+
for (const importer of importers) {
|
|
2408
|
+
if (!importer.detect()) continue;
|
|
2409
|
+
const memResult = importer.importMemories(ctx.db, embedFn);
|
|
2410
|
+
const kbResult = importer.importKnowledge(ctx.db, embedFn);
|
|
2411
|
+
importSummary.push({
|
|
2412
|
+
tool: importer.name,
|
|
2413
|
+
memoriesImported: memResult.imported,
|
|
2414
|
+
knowledgeImported: kbResult.imported
|
|
2415
|
+
});
|
|
2416
|
+
}
|
|
2417
|
+
const hotEntries = listEntries(ctx.db, "hot", "memory");
|
|
2418
|
+
const contextLines = hotEntries.map((e) => {
|
|
2419
|
+
const tags = e.tags.length > 0 ? ` [${e.tags.join(", ")}]` : "";
|
|
2420
|
+
return `- ${e.content}${tags}`;
|
|
2421
|
+
});
|
|
2422
|
+
const contextText = contextLines.join("\n");
|
|
2423
|
+
return {
|
|
2424
|
+
content: [
|
|
2425
|
+
{
|
|
2426
|
+
type: "text",
|
|
2427
|
+
text: JSON.stringify({
|
|
2428
|
+
sessionId: ctx.sessionId,
|
|
2429
|
+
importSummary,
|
|
2430
|
+
hotEntryCount: hotEntries.length,
|
|
2431
|
+
context: contextText
|
|
2432
|
+
})
|
|
2433
|
+
}
|
|
2434
|
+
]
|
|
2435
|
+
};
|
|
2436
|
+
}
|
|
2437
|
+
if (name === "session_end") {
|
|
2438
|
+
await ctx.embedder.ensureLoaded();
|
|
2439
|
+
const embedFn = ctx.embedder.makeSyncEmbedFn();
|
|
2440
|
+
const sessionId = ctx.sessionId ?? randomUUID4();
|
|
2441
|
+
const result = compactHotTier(ctx.db, embedFn, ctx.config.compaction, sessionId);
|
|
2442
|
+
return {
|
|
2443
|
+
content: [
|
|
2444
|
+
{
|
|
2445
|
+
type: "text",
|
|
2446
|
+
text: JSON.stringify({
|
|
2447
|
+
sessionId,
|
|
2448
|
+
carryForward: result.carryForward.length,
|
|
2449
|
+
promoted: result.promoted.length,
|
|
2450
|
+
discarded: result.discarded.length,
|
|
2451
|
+
details: result
|
|
2452
|
+
})
|
|
2453
|
+
}
|
|
2454
|
+
]
|
|
2455
|
+
};
|
|
2456
|
+
}
|
|
2457
|
+
if (name === "session_context") {
|
|
2458
|
+
const hotMemories = listEntries(ctx.db, "hot", "memory");
|
|
2459
|
+
const hotKnowledge = listEntries(ctx.db, "hot", "knowledge");
|
|
2460
|
+
const allEntries = [...hotMemories, ...hotKnowledge];
|
|
2461
|
+
const lines = allEntries.map((e) => {
|
|
2462
|
+
const tags = e.tags.length > 0 ? ` [${e.tags.join(", ")}]` : "";
|
|
2463
|
+
const project = e.project ? ` (project: ${e.project})` : "";
|
|
2464
|
+
return `- ${e.content}${tags}${project}`;
|
|
2465
|
+
});
|
|
2466
|
+
const contextText = lines.length > 0 ? lines.join("\n") : "(no hot tier entries)";
|
|
2467
|
+
return {
|
|
2468
|
+
content: [
|
|
2469
|
+
{
|
|
2470
|
+
type: "text",
|
|
2471
|
+
text: JSON.stringify({
|
|
2472
|
+
entryCount: allEntries.length,
|
|
2473
|
+
context: contextText
|
|
2474
|
+
})
|
|
2475
|
+
}
|
|
2476
|
+
]
|
|
2477
|
+
};
|
|
2478
|
+
}
|
|
2479
|
+
return null;
|
|
2480
|
+
}
|
|
2481
|
+
function registerSessionTools() {
|
|
2482
|
+
return SESSION_TOOLS;
|
|
2483
|
+
}
|
|
2484
|
+
|
|
2485
|
+
// src/tools/registry.ts
|
|
2486
|
+
async function startServer(ctx) {
|
|
2487
|
+
const server = new Server(
|
|
2488
|
+
{ name: "total-recall", version: "0.1.0" },
|
|
2489
|
+
{ capabilities: { tools: {} } }
|
|
2490
|
+
);
|
|
2491
|
+
const allTools = [
|
|
2492
|
+
...MEMORY_TOOLS,
|
|
2493
|
+
...SYSTEM_TOOLS,
|
|
2494
|
+
...registerKbTools(),
|
|
2495
|
+
...registerEvalTools(),
|
|
2496
|
+
...registerImportTools(),
|
|
2497
|
+
...registerSessionTools()
|
|
2498
|
+
];
|
|
2499
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
2500
|
+
return { tools: allTools };
|
|
2501
|
+
});
|
|
2502
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
2503
|
+
const { name, arguments: rawArgs } = request.params;
|
|
2504
|
+
const args = rawArgs ?? {};
|
|
2505
|
+
const memResult = await handleMemoryTool(name, args ?? {}, ctx);
|
|
2506
|
+
if (memResult !== null) return memResult;
|
|
2507
|
+
const sysResult = handleSystemTool(name, args ?? {}, ctx);
|
|
2508
|
+
if (sysResult !== null) return sysResult;
|
|
2509
|
+
const kbResult = await handleKbTool(name, args ?? {}, ctx);
|
|
2510
|
+
if (kbResult !== null) return kbResult;
|
|
2511
|
+
const evalResult = await handleEvalTool(name, args ?? {}, ctx);
|
|
2512
|
+
if (evalResult !== null) return evalResult;
|
|
2513
|
+
const importResult = await handleImportTool(name, args ?? {}, ctx);
|
|
2514
|
+
if (importResult !== null) return importResult;
|
|
2515
|
+
const sessionResult = await handleSessionTool(name, args ?? {}, ctx);
|
|
2516
|
+
if (sessionResult !== null) return sessionResult;
|
|
2517
|
+
return {
|
|
2518
|
+
content: [
|
|
2519
|
+
{
|
|
2520
|
+
type: "text",
|
|
2521
|
+
text: JSON.stringify({ error: `Unknown tool: ${name}` })
|
|
2522
|
+
}
|
|
2523
|
+
],
|
|
2524
|
+
isError: true
|
|
2525
|
+
};
|
|
2526
|
+
});
|
|
2527
|
+
const transport = new StdioServerTransport();
|
|
2528
|
+
await server.connect(transport);
|
|
2529
|
+
}
|
|
2530
|
+
|
|
2531
|
+
// src/index.ts
|
|
2532
|
+
async function main() {
|
|
2533
|
+
const config = loadConfig();
|
|
2534
|
+
const db = getDb();
|
|
2535
|
+
const embedder = new Embedder({
|
|
2536
|
+
model: config.embedding.model,
|
|
2537
|
+
dimensions: config.embedding.dimensions
|
|
2538
|
+
});
|
|
2539
|
+
const sessionId = randomUUID5();
|
|
2540
|
+
process.stderr.write(`total-recall: MCP server starting (db: ${getDataDir()}/total-recall.db)
|
|
2541
|
+
`);
|
|
2542
|
+
await startServer({ db, config, embedder, sessionId });
|
|
2543
|
+
const cleanup = () => {
|
|
2544
|
+
closeDb();
|
|
2545
|
+
process.exit(0);
|
|
2546
|
+
};
|
|
2547
|
+
process.on("SIGINT", cleanup);
|
|
2548
|
+
process.on("SIGTERM", cleanup);
|
|
2549
|
+
}
|
|
2550
|
+
main().catch((err) => {
|
|
2551
|
+
process.stderr.write(`total-recall: fatal error: ${err}
|
|
2552
|
+
`);
|
|
2553
|
+
process.exit(1);
|
|
2554
|
+
});
|