@context-vault/core 2.17.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/capture.d.ts +21 -0
- package/dist/capture.d.ts.map +1 -0
- package/dist/capture.js +269 -0
- package/dist/capture.js.map +1 -0
- package/dist/categories.d.ts +6 -0
- package/dist/categories.d.ts.map +1 -0
- package/dist/categories.js +50 -0
- package/dist/categories.js.map +1 -0
- package/dist/config.d.ts +4 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +190 -0
- package/dist/config.js.map +1 -0
- package/dist/constants.d.ts +33 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +23 -0
- package/dist/constants.js.map +1 -0
- package/dist/db.d.ts +13 -0
- package/dist/db.d.ts.map +1 -0
- package/dist/db.js +191 -0
- package/dist/db.js.map +1 -0
- package/dist/embed.d.ts +5 -0
- package/dist/embed.d.ts.map +1 -0
- package/dist/embed.js +78 -0
- package/dist/embed.js.map +1 -0
- package/dist/files.d.ts +13 -0
- package/dist/files.d.ts.map +1 -0
- package/dist/files.js +66 -0
- package/dist/files.js.map +1 -0
- package/dist/formatters.d.ts +8 -0
- package/dist/formatters.d.ts.map +1 -0
- package/dist/formatters.js +18 -0
- package/dist/formatters.js.map +1 -0
- package/dist/frontmatter.d.ts +12 -0
- package/dist/frontmatter.d.ts.map +1 -0
- package/dist/frontmatter.js +101 -0
- package/dist/frontmatter.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +297 -0
- package/dist/index.js.map +1 -0
- package/dist/ingest-url.d.ts +20 -0
- package/dist/ingest-url.d.ts.map +1 -0
- package/dist/ingest-url.js +113 -0
- package/dist/ingest-url.js.map +1 -0
- package/dist/main.d.ts +14 -0
- package/dist/main.d.ts.map +1 -0
- package/dist/main.js +25 -0
- package/dist/main.js.map +1 -0
- package/dist/search.d.ts +18 -0
- package/dist/search.d.ts.map +1 -0
- package/dist/search.js +238 -0
- package/dist/search.js.map +1 -0
- package/dist/types.d.ts +176 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +66 -17
- package/src/capture.ts +308 -0
- package/src/categories.ts +54 -0
- package/src/{core/config.js → config.ts} +34 -33
- package/src/{constants.js → constants.ts} +6 -3
- package/src/db.ts +229 -0
- package/src/{index/embed.js → embed.ts} +10 -35
- package/src/files.ts +80 -0
- package/src/{capture/formatters.js → formatters.ts} +13 -11
- package/src/{core/frontmatter.js → frontmatter.ts} +27 -33
- package/src/index.ts +351 -0
- package/src/ingest-url.ts +99 -0
- package/src/main.ts +111 -0
- package/src/search.ts +285 -0
- package/src/types.ts +166 -0
- package/src/capture/file-ops.js +0 -97
- package/src/capture/import-pipeline.js +0 -46
- package/src/capture/importers.js +0 -387
- package/src/capture/index.js +0 -236
- package/src/capture/ingest-url.js +0 -252
- package/src/consolidation/index.js +0 -112
- package/src/core/categories.js +0 -72
- package/src/core/error-log.js +0 -54
- package/src/core/files.js +0 -108
- package/src/core/status.js +0 -350
- package/src/core/telemetry.js +0 -90
- package/src/index/db.js +0 -416
- package/src/index/index.js +0 -522
- package/src/index.js +0 -66
- package/src/retrieve/index.js +0 -500
- package/src/server/helpers.js +0 -44
- package/src/server/tools/clear-context.js +0 -47
- package/src/server/tools/context-status.js +0 -182
- package/src/server/tools/create-snapshot.js +0 -231
- package/src/server/tools/delete-context.js +0 -60
- package/src/server/tools/get-context.js +0 -678
- package/src/server/tools/ingest-project.js +0 -244
- package/src/server/tools/ingest-url.js +0 -88
- package/src/server/tools/list-buckets.js +0 -116
- package/src/server/tools/list-context.js +0 -163
- package/src/server/tools/save-context.js +0 -609
- package/src/server/tools/session-start.js +0 -285
- package/src/server/tools/submit-feedback.js +0 -55
- package/src/server/tools.js +0 -174
- package/src/sync/sync.js +0 -235
package/src/retrieve/index.js
DELETED
|
@@ -1,500 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Retrieve Layer — Public API
|
|
3
|
-
*
|
|
4
|
-
* All read-path query logic: hybrid semantic search and any future
|
|
5
|
-
* query patterns (scoped, recency-weighted, etc.).
|
|
6
|
-
*
|
|
7
|
-
* Agent Constraint: Read-only access to DB. Never writes.
|
|
8
|
-
*/
|
|
9
|
-
|
|
10
|
-
const NEAR_DUP_THRESHOLD = 0.92;
|
|
11
|
-
|
|
12
|
-
const RRF_K = 60;
|
|
13
|
-
|
|
14
|
-
const MMR_LAMBDA = 0.7;
|
|
15
|
-
|
|
16
|
-
/**
|
|
17
|
-
* Exponential recency decay score based on updated_at timestamp.
|
|
18
|
-
* Returns e^(-decayRate * ageDays) for valid dates, or 0.5 as a neutral
|
|
19
|
-
* score when updatedAt is null/undefined.
|
|
20
|
-
*
|
|
21
|
-
* @param {string|null|undefined} updatedAt - ISO timestamp
|
|
22
|
-
* @param {number} decayRate - Decay rate per day (default 0.05)
|
|
23
|
-
* @returns {number} Score in [0, 1]
|
|
24
|
-
*/
|
|
25
|
-
export function recencyDecayScore(updatedAt, decayRate = 0.05) {
|
|
26
|
-
if (updatedAt == null) return 0.5;
|
|
27
|
-
const ageDays = (Date.now() - new Date(updatedAt).getTime()) / 86400000;
|
|
28
|
-
return Math.exp(-decayRate * ageDays);
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Dot product of two Float32Array vectors (cosine similarity for unit vectors).
|
|
33
|
-
*/
|
|
34
|
-
export function dotProduct(a, b) {
|
|
35
|
-
let sum = 0;
|
|
36
|
-
for (let i = 0; i < a.length; i++) sum += a[i] * b[i];
|
|
37
|
-
return sum;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/**
|
|
41
|
-
* Build a tiered FTS5 query that prioritises phrase match, then proximity,
|
|
42
|
-
* then AND. Multi-word queries become:
|
|
43
|
-
* "word1 word2" OR NEAR("word1" "word2", 10) OR "word1" AND "word2"
|
|
44
|
-
* Single-word queries remain a simple quoted term.
|
|
45
|
-
* Returns null if no valid words remain after stripping FTS5 metacharacters.
|
|
46
|
-
*/
|
|
47
|
-
export function buildFtsQuery(query) {
|
|
48
|
-
const words = query
|
|
49
|
-
.split(/[\s-]+/)
|
|
50
|
-
.map((w) => w.replace(/[*"():^~{}]/g, ""))
|
|
51
|
-
.filter((w) => w.length > 0);
|
|
52
|
-
if (!words.length) return null;
|
|
53
|
-
if (words.length === 1) return `"${words[0]}"`;
|
|
54
|
-
const phrase = `"${words.join(" ")}"`;
|
|
55
|
-
const near = `NEAR(${words.map((w) => `"${w}"`).join(" ")}, 10)`;
|
|
56
|
-
const and = words.map((w) => `"${w}"`).join(" AND ");
|
|
57
|
-
return `${phrase} OR ${near} OR ${and}`;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
/**
|
|
61
|
-
* Category-aware recency decay:
|
|
62
|
-
* knowledge + entity: no decay (enduring)
|
|
63
|
-
* event: steeper decay (~0.5 at 30 days)
|
|
64
|
-
*/
|
|
65
|
-
export function recencyBoost(createdAt, category, decayDays = 30) {
|
|
66
|
-
if (category !== "event") return 1.0;
|
|
67
|
-
const ageDays = (Date.now() - new Date(createdAt).getTime()) / 86400000;
|
|
68
|
-
return 1 / (1 + ageDays / decayDays);
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
/**
|
|
72
|
-
* Build additional WHERE clauses for category/time filtering.
|
|
73
|
-
* Returns { clauses: string[], params: any[] }
|
|
74
|
-
*/
|
|
75
|
-
export function buildFilterClauses({
|
|
76
|
-
categoryFilter,
|
|
77
|
-
excludeEvents = false,
|
|
78
|
-
since,
|
|
79
|
-
until,
|
|
80
|
-
userIdFilter,
|
|
81
|
-
teamIdFilter,
|
|
82
|
-
includeSuperseeded = false,
|
|
83
|
-
}) {
|
|
84
|
-
const clauses = [];
|
|
85
|
-
const params = [];
|
|
86
|
-
if (userIdFilter !== undefined) {
|
|
87
|
-
clauses.push("e.user_id = ?");
|
|
88
|
-
params.push(userIdFilter);
|
|
89
|
-
}
|
|
90
|
-
if (teamIdFilter) {
|
|
91
|
-
clauses.push("e.team_id = ?");
|
|
92
|
-
params.push(teamIdFilter);
|
|
93
|
-
}
|
|
94
|
-
if (categoryFilter) {
|
|
95
|
-
clauses.push("e.category = ?");
|
|
96
|
-
params.push(categoryFilter);
|
|
97
|
-
}
|
|
98
|
-
if (excludeEvents && !categoryFilter) {
|
|
99
|
-
clauses.push("e.category != 'event'");
|
|
100
|
-
}
|
|
101
|
-
if (since) {
|
|
102
|
-
clauses.push("e.created_at >= ?");
|
|
103
|
-
params.push(since);
|
|
104
|
-
}
|
|
105
|
-
if (until) {
|
|
106
|
-
clauses.push("e.created_at <= ?");
|
|
107
|
-
params.push(until);
|
|
108
|
-
}
|
|
109
|
-
clauses.push("(e.expires_at IS NULL OR e.expires_at > datetime('now'))");
|
|
110
|
-
if (!includeSuperseeded) {
|
|
111
|
-
clauses.push("e.superseded_by IS NULL");
|
|
112
|
-
}
|
|
113
|
-
return { clauses, params };
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
/**
|
|
117
|
-
* Reciprocal Rank Fusion: merge multiple ranked lists into a single score.
|
|
118
|
-
* Each document receives 1/(k + rank) from each list it appears in.
|
|
119
|
-
*
|
|
120
|
-
* @param {Array<string[]>} rankedLists - Arrays of document IDs in rank order (best first).
|
|
121
|
-
* @param {number} k - Smoothing constant (default RRF_K = 60).
|
|
122
|
-
* @returns {Map<string, number>} Map of id -> RRF score.
|
|
123
|
-
*/
|
|
124
|
-
export function reciprocalRankFusion(rankedLists, k = RRF_K) {
|
|
125
|
-
const scores = new Map();
|
|
126
|
-
for (const list of rankedLists) {
|
|
127
|
-
for (let rank = 0; rank < list.length; rank++) {
|
|
128
|
-
const id = list[rank];
|
|
129
|
-
scores.set(id, (scores.get(id) ?? 0) + 1 / (k + rank + 1));
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
return scores;
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
/**
|
|
136
|
-
* Jaccard similarity between two strings based on word sets.
|
|
137
|
-
* Used as a fallback for MMR when embedding vectors are unavailable.
|
|
138
|
-
*
|
|
139
|
-
* @param {string} a
|
|
140
|
-
* @param {string} b
|
|
141
|
-
* @returns {number} Similarity in [0, 1].
|
|
142
|
-
*/
|
|
143
|
-
export function jaccardSimilarity(a, b) {
|
|
144
|
-
const wordsA = new Set((a ?? "").toLowerCase().split(/\W+/).filter(Boolean));
|
|
145
|
-
const wordsB = new Set((b ?? "").toLowerCase().split(/\W+/).filter(Boolean));
|
|
146
|
-
if (wordsA.size === 0 && wordsB.size === 0) return 1;
|
|
147
|
-
if (wordsA.size === 0 || wordsB.size === 0) return 0;
|
|
148
|
-
let intersection = 0;
|
|
149
|
-
for (const w of wordsA) if (wordsB.has(w)) intersection++;
|
|
150
|
-
return intersection / (wordsA.size + wordsB.size - intersection);
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
/**
|
|
154
|
-
* Maximal Marginal Relevance reranking.
|
|
155
|
-
*
|
|
156
|
-
* Selects up to n candidates that balance relevance to the query and
|
|
157
|
-
* diversity from already-selected results.
|
|
158
|
-
*
|
|
159
|
-
* MMR_score = lambda * querySim(doc) - (1 - lambda) * max(sim(doc, selected))
|
|
160
|
-
*
|
|
161
|
-
* @param {Array<object>} candidates - Entries with at least {id, title, body}.
|
|
162
|
-
* @param {Map<string, number>} querySimMap - Map of id -> relevance score.
|
|
163
|
-
* @param {Map<string, Float32Array|null>} embeddingMap - Map of id -> embedding (null if unavailable).
|
|
164
|
-
* @param {number} n - Number of results to select.
|
|
165
|
-
* @param {number} lambda - Trade-off weight (default MMR_LAMBDA = 0.7).
|
|
166
|
-
* @returns {Array<object>} Reranked subset of candidates (length <= n).
|
|
167
|
-
*/
|
|
168
|
-
export function maximalMarginalRelevance(
|
|
169
|
-
candidates,
|
|
170
|
-
querySimMap,
|
|
171
|
-
embeddingMap,
|
|
172
|
-
n,
|
|
173
|
-
lambda = MMR_LAMBDA,
|
|
174
|
-
) {
|
|
175
|
-
if (candidates.length === 0) return [];
|
|
176
|
-
|
|
177
|
-
const remaining = [...candidates];
|
|
178
|
-
const selected = [];
|
|
179
|
-
const selectedVecs = [];
|
|
180
|
-
const selectedEntries = [];
|
|
181
|
-
|
|
182
|
-
while (selected.length < n && remaining.length > 0) {
|
|
183
|
-
let bestIdx = -1;
|
|
184
|
-
let bestScore = -Infinity;
|
|
185
|
-
|
|
186
|
-
for (let i = 0; i < remaining.length; i++) {
|
|
187
|
-
const candidate = remaining[i];
|
|
188
|
-
const relevance = querySimMap.get(candidate.id) ?? 0;
|
|
189
|
-
|
|
190
|
-
let maxRedundancy = 0;
|
|
191
|
-
if (selectedVecs.length > 0) {
|
|
192
|
-
const vec = embeddingMap.get(candidate.id);
|
|
193
|
-
for (let j = 0; j < selectedVecs.length; j++) {
|
|
194
|
-
let sim;
|
|
195
|
-
if (vec && selectedVecs[j]) {
|
|
196
|
-
sim = dotProduct(vec, selectedVecs[j]);
|
|
197
|
-
} else {
|
|
198
|
-
const selEntry = selectedEntries[j];
|
|
199
|
-
sim = jaccardSimilarity(
|
|
200
|
-
`${candidate.title} ${candidate.body}`,
|
|
201
|
-
`${selEntry.title} ${selEntry.body}`,
|
|
202
|
-
);
|
|
203
|
-
}
|
|
204
|
-
if (sim > maxRedundancy) maxRedundancy = sim;
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
const score = lambda * relevance - (1 - lambda) * maxRedundancy;
|
|
209
|
-
if (score > bestScore) {
|
|
210
|
-
bestScore = score;
|
|
211
|
-
bestIdx = i;
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
if (bestIdx === -1) break;
|
|
216
|
-
|
|
217
|
-
const chosen = remaining.splice(bestIdx, 1)[0];
|
|
218
|
-
selected.push(chosen);
|
|
219
|
-
selectedVecs.push(embeddingMap.get(chosen.id) ?? null);
|
|
220
|
-
selectedEntries.push(chosen);
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
return selected;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
/**
|
|
227
|
-
* Hybrid search combining FTS5 text matching and vector similarity,
|
|
228
|
-
* with RRF merging and MMR reranking for diversity.
|
|
229
|
-
*
|
|
230
|
-
* Pipeline:
|
|
231
|
-
* 1. FTS5 ranked list
|
|
232
|
-
* 2. Vector (semantic) ranked list
|
|
233
|
-
* 3. RRF: merge the two ranked lists into a single score
|
|
234
|
-
* 4. Apply recency decay to RRF scores
|
|
235
|
-
* 5. MMR: rerank top candidates for diversity (uses embeddings or Jaccard fallback)
|
|
236
|
-
* 6. Near-duplicate suppression on the final selection
|
|
237
|
-
*
|
|
238
|
-
* @param {import('../server/types.js').BaseCtx} ctx
|
|
239
|
-
* @param {string} query
|
|
240
|
-
* @param {{ kindFilter?: string|null, categoryFilter?: string|null, since?: string|null, until?: string|null, limit?: number, offset?: number }} opts
|
|
241
|
-
* @returns {Promise<Array<{id, kind, category, title, body, meta, tags, source, file_path, created_at, score}>>}
|
|
242
|
-
*/
|
|
243
|
-
export async function hybridSearch(
|
|
244
|
-
ctx,
|
|
245
|
-
query,
|
|
246
|
-
{
|
|
247
|
-
kindFilter = null,
|
|
248
|
-
categoryFilter = null,
|
|
249
|
-
excludeEvents = false,
|
|
250
|
-
since = null,
|
|
251
|
-
until = null,
|
|
252
|
-
limit = 20,
|
|
253
|
-
offset = 0,
|
|
254
|
-
decayDays = 30,
|
|
255
|
-
userIdFilter,
|
|
256
|
-
teamIdFilter = null,
|
|
257
|
-
includeSuperseeded = false,
|
|
258
|
-
} = {},
|
|
259
|
-
) {
|
|
260
|
-
const rowMap = new Map();
|
|
261
|
-
const idToRowid = new Map();
|
|
262
|
-
let queryVec = null;
|
|
263
|
-
|
|
264
|
-
const extraFilters = buildFilterClauses({
|
|
265
|
-
categoryFilter,
|
|
266
|
-
excludeEvents,
|
|
267
|
-
since,
|
|
268
|
-
until,
|
|
269
|
-
userIdFilter,
|
|
270
|
-
teamIdFilter,
|
|
271
|
-
includeSuperseeded,
|
|
272
|
-
});
|
|
273
|
-
|
|
274
|
-
const ftsRankedIds = [];
|
|
275
|
-
|
|
276
|
-
// Stage 1a: FTS5 — collect ranked list of IDs
|
|
277
|
-
const ftsQuery = buildFtsQuery(query);
|
|
278
|
-
if (ftsQuery) {
|
|
279
|
-
try {
|
|
280
|
-
const whereParts = ["vault_fts MATCH ?"];
|
|
281
|
-
const ftsParams = [ftsQuery];
|
|
282
|
-
|
|
283
|
-
if (kindFilter) {
|
|
284
|
-
whereParts.push("e.kind = ?");
|
|
285
|
-
ftsParams.push(kindFilter);
|
|
286
|
-
}
|
|
287
|
-
whereParts.push(...extraFilters.clauses);
|
|
288
|
-
ftsParams.push(...extraFilters.params);
|
|
289
|
-
|
|
290
|
-
const ftsSQL = `SELECT e.*, rank FROM vault_fts f JOIN vault e ON f.rowid = e.rowid WHERE ${whereParts.join(" AND ")} ORDER BY rank LIMIT 15`;
|
|
291
|
-
const rows = ctx.db.prepare(ftsSQL).all(...ftsParams);
|
|
292
|
-
|
|
293
|
-
for (const { rank: _rank, ...row } of rows) {
|
|
294
|
-
ftsRankedIds.push(row.id);
|
|
295
|
-
if (!rowMap.has(row.id)) rowMap.set(row.id, row);
|
|
296
|
-
}
|
|
297
|
-
} catch (err) {
|
|
298
|
-
if (!err.message?.includes("fts5: syntax error")) {
|
|
299
|
-
console.error(`[retrieve] FTS search error: ${err.message}`);
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
const vecRankedIds = [];
|
|
305
|
-
const vecSimMap = new Map();
|
|
306
|
-
|
|
307
|
-
// Stage 1b: Vector similarity — collect ranked list of IDs and raw similarity scores
|
|
308
|
-
try {
|
|
309
|
-
const vecCount = ctx.db
|
|
310
|
-
.prepare("SELECT COUNT(*) as c FROM vault_vec")
|
|
311
|
-
.get().c;
|
|
312
|
-
if (vecCount > 0) {
|
|
313
|
-
queryVec = await ctx.embed(query);
|
|
314
|
-
if (queryVec) {
|
|
315
|
-
const hasPostFilter = userIdFilter !== undefined || teamIdFilter;
|
|
316
|
-
const vecLimit = hasPostFilter
|
|
317
|
-
? kindFilter
|
|
318
|
-
? 60
|
|
319
|
-
: 30
|
|
320
|
-
: kindFilter
|
|
321
|
-
? 30
|
|
322
|
-
: 15;
|
|
323
|
-
const vecRows = ctx.db
|
|
324
|
-
.prepare(
|
|
325
|
-
`SELECT v.rowid, v.distance FROM vault_vec v WHERE embedding MATCH ? ORDER BY distance LIMIT ?`,
|
|
326
|
-
)
|
|
327
|
-
.all(queryVec, vecLimit);
|
|
328
|
-
|
|
329
|
-
if (vecRows.length) {
|
|
330
|
-
const rowids = vecRows.map((vr) => vr.rowid);
|
|
331
|
-
const placeholders = rowids.map(() => "?").join(",");
|
|
332
|
-
const hydrated = ctx.db
|
|
333
|
-
.prepare(
|
|
334
|
-
`SELECT rowid, * FROM vault WHERE rowid IN (${placeholders})`,
|
|
335
|
-
)
|
|
336
|
-
.all(...rowids);
|
|
337
|
-
|
|
338
|
-
const byRowid = new Map();
|
|
339
|
-
for (const row of hydrated) byRowid.set(row.rowid, row);
|
|
340
|
-
|
|
341
|
-
for (const vr of vecRows) {
|
|
342
|
-
const row = byRowid.get(vr.rowid);
|
|
343
|
-
if (!row) continue;
|
|
344
|
-
if (userIdFilter !== undefined && row.user_id !== userIdFilter)
|
|
345
|
-
continue;
|
|
346
|
-
if (teamIdFilter && row.team_id !== teamIdFilter) continue;
|
|
347
|
-
if (kindFilter && row.kind !== kindFilter) continue;
|
|
348
|
-
if (categoryFilter && row.category !== categoryFilter) continue;
|
|
349
|
-
if (excludeEvents && row.category === "event") continue;
|
|
350
|
-
if (since && row.created_at < since) continue;
|
|
351
|
-
if (until && row.created_at > until) continue;
|
|
352
|
-
if (row.expires_at && new Date(row.expires_at) <= new Date())
|
|
353
|
-
continue;
|
|
354
|
-
|
|
355
|
-
const { rowid: _rowid, ...cleanRow } = row;
|
|
356
|
-
idToRowid.set(cleanRow.id, Number(row.rowid));
|
|
357
|
-
|
|
358
|
-
// sqlite-vec returns L2 distance [0, 2] for normalized vectors.
|
|
359
|
-
// Convert to similarity [0, 1]: 1 - distance/2
|
|
360
|
-
const vecSim = Math.max(0, 1 - vr.distance / 2);
|
|
361
|
-
vecSimMap.set(cleanRow.id, vecSim);
|
|
362
|
-
vecRankedIds.push(cleanRow.id);
|
|
363
|
-
|
|
364
|
-
if (!rowMap.has(cleanRow.id)) rowMap.set(cleanRow.id, cleanRow);
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
}
|
|
369
|
-
} catch (err) {
|
|
370
|
-
if (!err.message?.includes("no such table")) {
|
|
371
|
-
console.error(`[retrieve] Vector search error: ${err.message}`);
|
|
372
|
-
}
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
if (rowMap.size === 0) return [];
|
|
376
|
-
|
|
377
|
-
// Stage 2: RRF — merge FTS and vector ranked lists into a single score
|
|
378
|
-
const rrfScores = reciprocalRankFusion([ftsRankedIds, vecRankedIds]);
|
|
379
|
-
|
|
380
|
-
// Stage 3: Apply category-aware recency boost to RRF scores
|
|
381
|
-
for (const [id, entry] of rowMap) {
|
|
382
|
-
const boost = recencyBoost(entry.created_at, entry.category, decayDays);
|
|
383
|
-
rrfScores.set(id, (rrfScores.get(id) ?? 0) * boost);
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
// Stage 3b: Frequency signal — log(1 + hit_count) / log(1 + max_hit_count)
|
|
387
|
-
const allRows = [...rowMap.values()];
|
|
388
|
-
const maxHitCount = Math.max(...allRows.map((e) => e.hit_count || 0), 0);
|
|
389
|
-
if (maxHitCount > 0) {
|
|
390
|
-
const logMax = Math.log(1 + maxHitCount);
|
|
391
|
-
for (const entry of allRows) {
|
|
392
|
-
const freqScore = Math.log(1 + (entry.hit_count || 0)) / logMax;
|
|
393
|
-
rrfScores.set(
|
|
394
|
-
entry.id,
|
|
395
|
-
(rrfScores.get(entry.id) ?? 0) + freqScore * 0.13,
|
|
396
|
-
);
|
|
397
|
-
}
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
// Attach final score to each entry and sort by RRF score descending
|
|
401
|
-
const candidates = [...rowMap.values()].map((entry) => ({
|
|
402
|
-
...entry,
|
|
403
|
-
score: rrfScores.get(entry.id) ?? 0,
|
|
404
|
-
}));
|
|
405
|
-
candidates.sort((a, b) => b.score - a.score);
|
|
406
|
-
|
|
407
|
-
// Stage 4: Fetch embeddings for all candidates that have a rowid
|
|
408
|
-
const embeddingMap = new Map();
|
|
409
|
-
if (queryVec && idToRowid.size > 0) {
|
|
410
|
-
const rowidToId = new Map();
|
|
411
|
-
for (const [id, rowid] of idToRowid) rowidToId.set(rowid, id);
|
|
412
|
-
|
|
413
|
-
const rowidsToFetch = [...idToRowid.values()];
|
|
414
|
-
try {
|
|
415
|
-
const placeholders = rowidsToFetch.map(() => "?").join(",");
|
|
416
|
-
const vecData = ctx.db
|
|
417
|
-
.prepare(
|
|
418
|
-
`SELECT rowid, embedding FROM vault_vec WHERE rowid IN (${placeholders})`,
|
|
419
|
-
)
|
|
420
|
-
.all(...rowidsToFetch);
|
|
421
|
-
for (const row of vecData) {
|
|
422
|
-
const id = rowidToId.get(Number(row.rowid));
|
|
423
|
-
const buf = row.embedding;
|
|
424
|
-
if (id && buf) {
|
|
425
|
-
embeddingMap.set(
|
|
426
|
-
id,
|
|
427
|
-
new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4),
|
|
428
|
-
);
|
|
429
|
-
}
|
|
430
|
-
}
|
|
431
|
-
} catch (_) {
|
|
432
|
-
// Embeddings unavailable — MMR will fall back to Jaccard similarity
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
// Use vecSim as the query-relevance signal for MMR; fall back to RRF score
|
|
437
|
-
const querySimMap = new Map();
|
|
438
|
-
for (const candidate of candidates) {
|
|
439
|
-
querySimMap.set(
|
|
440
|
-
candidate.id,
|
|
441
|
-
vecSimMap.has(candidate.id)
|
|
442
|
-
? vecSimMap.get(candidate.id)
|
|
443
|
-
: candidate.score,
|
|
444
|
-
);
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
// Stage 5: MMR — rerank for diversity using embeddings or Jaccard fallback
|
|
448
|
-
const mmrSelected = maximalMarginalRelevance(
|
|
449
|
-
candidates,
|
|
450
|
-
querySimMap,
|
|
451
|
-
embeddingMap,
|
|
452
|
-
offset + limit,
|
|
453
|
-
);
|
|
454
|
-
|
|
455
|
-
// Stage 6: Near-duplicate suppression (hard filter, not reorder)
|
|
456
|
-
if (queryVec && embeddingMap.size > 0 && mmrSelected.length > limit) {
|
|
457
|
-
const selected = [];
|
|
458
|
-
const selectedVecs = [];
|
|
459
|
-
for (const candidate of mmrSelected) {
|
|
460
|
-
if (selected.length >= offset + limit) break;
|
|
461
|
-
const vec = embeddingMap.get(candidate.id);
|
|
462
|
-
if (vec && selectedVecs.length > 0) {
|
|
463
|
-
let maxSim = 0;
|
|
464
|
-
for (const sv of selectedVecs) {
|
|
465
|
-
const sim = dotProduct(sv, vec);
|
|
466
|
-
if (sim > maxSim) maxSim = sim;
|
|
467
|
-
}
|
|
468
|
-
if (maxSim > NEAR_DUP_THRESHOLD) continue;
|
|
469
|
-
}
|
|
470
|
-
selected.push(candidate);
|
|
471
|
-
if (vec) selectedVecs.push(vec);
|
|
472
|
-
}
|
|
473
|
-
const dedupedPage = selected.slice(offset, offset + limit);
|
|
474
|
-
trackAccess(ctx.db, dedupedPage);
|
|
475
|
-
return dedupedPage;
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
const finalPage = mmrSelected.slice(offset, offset + limit);
|
|
479
|
-
trackAccess(ctx.db, finalPage);
|
|
480
|
-
return finalPage;
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
/**
|
|
484
|
-
* Increment hit_count and set last_accessed_at for a batch of retrieved entries.
|
|
485
|
-
* Single batched UPDATE for efficiency.
|
|
486
|
-
*
|
|
487
|
-
* @param {import('node:sqlite').DatabaseSync} db
|
|
488
|
-
* @param {Array<{id: string}>} entries
|
|
489
|
-
*/
|
|
490
|
-
function trackAccess(db, entries) {
|
|
491
|
-
if (!entries.length) return;
|
|
492
|
-
try {
|
|
493
|
-
const placeholders = entries.map(() => "?").join(",");
|
|
494
|
-
db.prepare(
|
|
495
|
-
`UPDATE vault SET hit_count = hit_count + 1, last_accessed_at = datetime('now') WHERE id IN (${placeholders})`,
|
|
496
|
-
).run(...entries.map((e) => e.id));
|
|
497
|
-
} catch (_) {
|
|
498
|
-
// Non-fatal: frequency tracking is best-effort
|
|
499
|
-
}
|
|
500
|
-
}
|
package/src/server/helpers.js
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* helpers.js — Shared MCP response helpers and validation
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import pkg from "../../package.json" with { type: "json" };
|
|
6
|
-
|
|
7
|
-
export function ok(text) {
|
|
8
|
-
return { content: [{ type: "text", text }] };
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export function err(text, code = "UNKNOWN", meta = {}) {
|
|
12
|
-
return {
|
|
13
|
-
content: [{ type: "text", text }],
|
|
14
|
-
isError: true,
|
|
15
|
-
code,
|
|
16
|
-
_meta: {
|
|
17
|
-
cv_version: pkg.version,
|
|
18
|
-
node_version: process.version,
|
|
19
|
-
platform: process.platform,
|
|
20
|
-
arch: process.arch,
|
|
21
|
-
...meta,
|
|
22
|
-
},
|
|
23
|
-
};
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
export function ensureVaultExists(config) {
|
|
27
|
-
if (!config.vaultDirExists) {
|
|
28
|
-
return err(
|
|
29
|
-
`Vault directory not found: ${config.vaultDir}. Run context-status for diagnostics.`,
|
|
30
|
-
"VAULT_NOT_FOUND",
|
|
31
|
-
);
|
|
32
|
-
}
|
|
33
|
-
return null;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export function ensureValidKind(kind) {
|
|
37
|
-
if (!/^[a-z][a-z0-9_-]*$/.test(kind)) {
|
|
38
|
-
return err(
|
|
39
|
-
"Required: kind (lowercase alphanumeric, e.g. 'insight', 'reference')",
|
|
40
|
-
"INVALID_KIND",
|
|
41
|
-
);
|
|
42
|
-
}
|
|
43
|
-
return null;
|
|
44
|
-
}
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
import { z } from "zod";
|
|
2
|
-
import { ok } from "../helpers.js";
|
|
3
|
-
|
|
4
|
-
export const name = "clear_context";
|
|
5
|
-
|
|
6
|
-
export const description =
|
|
7
|
-
"Reset active in-memory session context without deleting vault entries. Call this when switching projects or topics mid-session. With `scope`, all subsequent get_context calls should filter to that tag/project. Vault data is never modified.";
|
|
8
|
-
|
|
9
|
-
export const inputSchema = {
|
|
10
|
-
scope: z
|
|
11
|
-
.string()
|
|
12
|
-
.optional()
|
|
13
|
-
.describe(
|
|
14
|
-
"Optional tag or project name to focus on going forward. When provided, treat subsequent get_context calls as if filtered to this tag.",
|
|
15
|
-
),
|
|
16
|
-
};
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* @param {object} args
|
|
20
|
-
* @param {import('../types.js').BaseCtx & Partial<import('../types.js').HostedCtxExtensions>} _ctx
|
|
21
|
-
*/
|
|
22
|
-
export function handler({ scope } = {}) {
|
|
23
|
-
const lines = [
|
|
24
|
-
"## Context Reset",
|
|
25
|
-
"",
|
|
26
|
-
"Active session context has been cleared. All previous context from this session should be disregarded.",
|
|
27
|
-
"",
|
|
28
|
-
"Vault entries are unchanged — no data was deleted.",
|
|
29
|
-
];
|
|
30
|
-
|
|
31
|
-
if (scope?.trim()) {
|
|
32
|
-
const trimmed = scope.trim();
|
|
33
|
-
lines.push(
|
|
34
|
-
"",
|
|
35
|
-
`### Active Scope: \`${trimmed}\``,
|
|
36
|
-
"",
|
|
37
|
-
`Going forward, treat \`get_context\` calls as scoped to the tag or project **"${trimmed}"** unless the user explicitly requests a different scope or passes their own tag filters.`,
|
|
38
|
-
);
|
|
39
|
-
} else {
|
|
40
|
-
lines.push(
|
|
41
|
-
"",
|
|
42
|
-
"No scope set. Use `get_context` normally — all vault entries are accessible.",
|
|
43
|
-
);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
return ok(lines.join("\n"));
|
|
47
|
-
}
|