@aeriondyseti/vector-memory-mcp 2.3.0 → 2.4.4-dev.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aeriondyseti/vector-memory-mcp",
3
- "version": "2.3.0",
3
+ "version": "2.4.4-dev.1",
4
4
  "description": "A zero-configuration RAG memory server for MCP clients",
5
5
  "type": "module",
6
6
  "main": "server/index.ts",
@@ -9,7 +9,6 @@
9
9
  },
10
10
  "files": [
11
11
  "server",
12
- "scripts",
13
12
  "README.md",
14
13
  "LICENSE"
15
14
  ],
@@ -31,6 +30,7 @@
31
30
  "test:quick": "bun test",
32
31
  "test:coverage": "bun test --preload ./tests/preload.ts --coverage",
33
32
  "benchmark": "bun test tests/benchmark.test.ts --preload ./tests/preload.ts",
33
+ "benchmark:update": "bun run scripts/update-benchmarks.ts",
34
34
  "test:preload": "bun run tests/preload.ts",
35
35
  "smoke": "bun run scripts/smoke-test.ts",
36
36
  "warmup": "bun run scripts/warmup.ts",
@@ -47,18 +47,18 @@
47
47
  ],
48
48
  "license": "MIT",
49
49
  "dependencies": {
50
- "@huggingface/transformers": "^3.8.0",
50
+ "@huggingface/tokenizers": "^0.1.3",
51
51
  "@lancedb/lancedb": "^0.26.2",
52
52
  "@modelcontextprotocol/sdk": "^1.0.0",
53
53
  "arg": "^5.0.2",
54
- "hono": "^4.11.3"
54
+ "hono": "^4.11.3",
55
+ "onnxruntime-node": "^1.21.0"
55
56
  },
56
57
  "devDependencies": {
57
58
  "@types/bun": "latest",
58
59
  "typescript": "^5.0.0"
59
60
  },
60
61
  "trustedDependencies": [
61
- "protobufjs",
62
- "sharp"
62
+ "protobufjs"
63
63
  ]
64
64
  }
@@ -1,7 +1,7 @@
1
1
  import { Database } from "bun:sqlite";
2
2
  import { existsSync, mkdirSync } from "fs";
3
3
  import { dirname } from "path";
4
- import { removeVec0Tables, runMigrations } from "./migrations.js";
4
+ import { removeVec0Tables, runMigrations } from "./migrations";
5
5
 
6
6
  /**
7
7
  * Open (or create) a SQLite database at the given path
@@ -2,15 +2,15 @@ import type { Database } from "bun:sqlite";
2
2
  import type {
3
3
  ConversationHybridRow,
4
4
  HistoryFilters,
5
- } from "./conversation.js";
5
+ } from "./conversation";
6
6
  import {
7
7
  serializeVector,
8
8
  safeParseJsonObject,
9
9
  sanitizeFtsQuery,
10
- hybridRRF,
10
+ hybridRRFWithSignals,
11
11
  topByRRF,
12
12
  knnSearch,
13
- } from "./sqlite-utils.js";
13
+ } from "./sqlite-utils";
14
14
 
15
15
  export class ConversationRepository {
16
16
  constructor(private db: Database) {}
@@ -105,13 +105,102 @@ export class ConversationRepository {
105
105
  tx();
106
106
  }
107
107
 
108
+ async replaceSession(
109
+ sessionId: string,
110
+ rows: Array<{
111
+ id: string;
112
+ vector: number[];
113
+ content: string;
114
+ metadata: string;
115
+ created_at: number;
116
+ session_id: string;
117
+ role: string;
118
+ message_index_start: number;
119
+ message_index_end: number;
120
+ project: string;
121
+ }>
122
+ ): Promise<void> {
123
+ const insertMain = this.db.prepare(
124
+ `INSERT OR REPLACE INTO conversation_history
125
+ (id, content, metadata, created_at, session_id, role, message_index_start, message_index_end, project)
126
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
127
+ );
128
+ const deleteVec = this.db.prepare(
129
+ `DELETE FROM conversation_history_vec WHERE id = ?`
130
+ );
131
+ const insertVec = this.db.prepare(
132
+ `INSERT INTO conversation_history_vec (id, vector) VALUES (?, ?)`
133
+ );
134
+ const deleteFts = this.db.prepare(
135
+ `DELETE FROM conversation_history_fts WHERE id = ?`
136
+ );
137
+ const insertFts = this.db.prepare(
138
+ `INSERT INTO conversation_history_fts (id, content) VALUES (?, ?)`
139
+ );
140
+
141
+ const tx = this.db.transaction(() => {
142
+ // Delete old chunks first
143
+ const idRows = this.db
144
+ .prepare(`SELECT id FROM conversation_history WHERE session_id = ?`)
145
+ .all(sessionId) as Array<{ id: string }>;
146
+
147
+ if (idRows.length > 0) {
148
+ const ids = idRows.map((r) => r.id);
149
+ const placeholders = ids.map(() => "?").join(", ");
150
+ this.db
151
+ .prepare(
152
+ `DELETE FROM conversation_history_vec WHERE id IN (${placeholders})`
153
+ )
154
+ .run(...ids);
155
+ this.db
156
+ .prepare(
157
+ `DELETE FROM conversation_history_fts WHERE id IN (${placeholders})`
158
+ )
159
+ .run(...ids);
160
+ this.db
161
+ .prepare(`DELETE FROM conversation_history WHERE session_id = ?`)
162
+ .run(sessionId);
163
+ }
164
+
165
+ // Insert new chunks
166
+ for (const row of rows) {
167
+ insertMain.run(
168
+ row.id,
169
+ row.content,
170
+ row.metadata,
171
+ row.created_at,
172
+ row.session_id,
173
+ row.role,
174
+ row.message_index_start,
175
+ row.message_index_end,
176
+ row.project
177
+ );
178
+ deleteVec.run(row.id);
179
+ insertVec.run(row.id, serializeVector(row.vector));
180
+ deleteFts.run(row.id);
181
+ insertFts.run(row.id, row.content);
182
+ }
183
+ });
184
+
185
+ tx();
186
+ }
187
+
188
+ /**
189
+ * Hybrid search combining vector KNN and FTS5, fused with Reciprocal Rank Fusion.
190
+ *
191
+ * NOTE: Filters (session, role, project, date) are applied AFTER candidate selection
192
+ * and RRF scoring, not pushed into the KNN/FTS queries. This is an intentional
193
+ * performance tradeoff — KNN is brute-force JS-side (no SQL pre-filter possible),
194
+ * and filtering post-RRF avoids duplicating filter logic across both retrieval paths.
195
+ * The consequence is that filtered queries may return fewer than `limit` results.
196
+ */
108
197
  async findHybrid(
109
198
  embedding: number[],
110
199
  query: string,
111
200
  limit: number,
112
201
  filters?: HistoryFilters
113
202
  ): Promise<ConversationHybridRow[]> {
114
- const candidateCount = limit * 3;
203
+ const candidateCount = limit * 5;
115
204
 
116
205
  // Vector KNN search (brute-force cosine similarity in JS)
117
206
  const vecResults = knnSearch(this.db, "conversation_history_vec", embedding, candidateCount);
@@ -127,8 +216,10 @@ export class ConversationRepository {
127
216
  )
128
217
  .all(ftsQuery, candidateCount) as Array<{ id: string }>;
129
218
 
130
- // Compute RRF scores and get top ids
131
- const rrfScores = hybridRRF(vecResults, ftsResults);
219
+ // Compute RRF scores with search signals for confidence scoring
220
+ const signalsMap = hybridRRFWithSignals(vecResults, ftsResults);
221
+ const rrfScores = new Map<string, number>();
222
+ for (const [id, s] of signalsMap) rrfScores.set(id, s.rrfScore);
132
223
  const topIds = topByRRF(rrfScores, limit);
133
224
 
134
225
  if (topIds.length === 0) return [];
@@ -185,17 +276,23 @@ export class ConversationRepository {
185
276
  project: string;
186
277
  }>;
187
278
 
188
- // Build a lookup for ordering by RRF score
189
- const scoreMap = new Map(topIds.map((id) => [id, rrfScores.get(id)!]));
190
-
191
279
  return fullRows
192
- .map((row) => ({
193
- id: row.id,
194
- content: row.content,
195
- metadata: safeParseJsonObject(row.metadata),
196
- createdAt: new Date(row.created_at),
197
- rrfScore: scoreMap.get(row.id) ?? 0,
198
- }))
280
+ .map((row) => {
281
+ const signals = signalsMap.get(row.id)!;
282
+ return {
283
+ id: row.id,
284
+ content: row.content,
285
+ metadata: safeParseJsonObject(row.metadata),
286
+ createdAt: new Date(row.created_at),
287
+ rrfScore: signals.rrfScore,
288
+ signals: {
289
+ cosineSimilarity: signals.cosineSimilarity,
290
+ ftsMatch: signals.ftsMatch,
291
+ knnRank: signals.knnRank,
292
+ ftsRank: signals.ftsRank,
293
+ },
294
+ };
295
+ })
199
296
  .sort((a, b) => b.rrfScore - a.rrfScore);
200
297
  }
201
298
  }
@@ -1,7 +1,7 @@
1
1
  import { createHash } from "crypto";
2
2
  import { readFile, writeFile, mkdir } from "fs/promises";
3
3
  import { dirname, join } from "path";
4
- import type { ConversationRepository } from "./conversation.repository.js";
4
+ import type { ConversationRepository } from "./conversation.repository";
5
5
  import type {
6
6
  ConversationChunk,
7
7
  ConversationHybridRow,
@@ -10,12 +10,12 @@ import type {
10
10
  ParsedMessage,
11
11
  SessionFileInfo,
12
12
  SessionIndexDetail,
13
- } from "./conversation.js";
14
- import type { ConversationHistoryConfig } from "../config/index.js";
15
- import { resolveSessionLogPath } from "../config/index.js";
16
- import type { EmbeddingsService } from "./embeddings.service.js";
17
- import type { SessionLogParser } from "./parsers/types.js";
18
- import { ClaudeCodeSessionParser } from "./parsers/claude-code.parser.js";
13
+ } from "./conversation";
14
+ import type { ConversationHistoryConfig } from "../config/index";
15
+ import { resolveSessionLogPath } from "../config/index";
16
+ import type { EmbeddingsService } from "./embeddings.service";
17
+ import type { SessionLogParser } from "./parsers/types";
18
+ import { ClaudeCodeSessionParser } from "./parsers/claude-code.parser";
19
19
 
20
20
  /**
21
21
  * Generate a deterministic chunk ID from session ID and message indices.
@@ -78,12 +78,7 @@ export function chunkMessages(
78
78
  messageIndexEnd: lastMsg.messageIndex,
79
79
  project: firstMsg.project,
80
80
  metadata: {
81
- session_id: firstMsg.sessionId,
82
81
  timestamp: firstMsg.timestamp.toISOString(),
83
- role,
84
- message_index_start: firstMsg.messageIndex,
85
- message_index_end: lastMsg.messageIndex,
86
- project: firstMsg.project,
87
82
  git_branch: firstMsg.gitBranch,
88
83
  is_subagent: firstMsg.isSubagent,
89
84
  agent_id: firstMsg.agentId,
@@ -273,20 +268,24 @@ export class ConversationHistoryService {
273
268
  this.config.chunkOverlap
274
269
  );
275
270
 
276
- // Delete existing chunks for re-indexing
277
- await this.repository.deleteBySessionId(file.sessionId);
278
-
279
- // Embed all chunks
271
+ // Embed all chunks FIRST (pure computation, no DB side effects)
280
272
  const embeddings = await this.embeddings.embedBatch(
281
273
  chunks.map((c) => c.content)
282
274
  );
283
275
 
284
- // Insert all chunks
276
+ // Build rows
285
277
  const rows = chunks.map((chunk, i) => ({
286
278
  id: chunk.id,
287
279
  vector: embeddings[i],
288
280
  content: chunk.content,
289
- metadata: JSON.stringify(chunk.metadata),
281
+ metadata: JSON.stringify({
282
+ ...chunk.metadata,
283
+ session_id: chunk.sessionId,
284
+ role: chunk.role,
285
+ message_index_start: chunk.messageIndexStart,
286
+ message_index_end: chunk.messageIndexEnd,
287
+ project: chunk.project,
288
+ }),
290
289
  created_at: chunk.timestamp.getTime(),
291
290
  session_id: chunk.sessionId,
292
291
  role: chunk.role,
@@ -295,7 +294,8 @@ export class ConversationHistoryService {
295
294
  project: chunk.project,
296
295
  }));
297
296
 
298
- await this.repository.insertBatch(rows);
297
+ // Atomically replace old chunks with new ones
298
+ await this.repository.replaceSession(file.sessionId, rows);
299
299
 
300
300
  // Update index state
301
301
  const session: IndexedSession = {
@@ -14,12 +14,7 @@ export interface ParsedMessage {
14
14
 
15
15
  /** Metadata stored per conversation chunk in the database */
16
16
  export interface ConversationChunkMetadata {
17
- session_id: string;
18
17
  timestamp: string;
19
- role: string;
20
- message_index_start: number;
21
- message_index_end: number;
22
- project: string;
23
18
  git_branch?: string;
24
19
  is_subagent: boolean;
25
20
  agent_id?: string;
@@ -52,6 +47,8 @@ export interface IndexedSession {
52
47
  lastMessageAt: Date;
53
48
  }
54
49
 
50
+ import type { SearchSignals } from "./memory";
51
+
55
52
  /** Raw row from conversation_history table with RRF score */
56
53
  export interface ConversationHybridRow {
57
54
  id: string;
@@ -59,6 +56,7 @@ export interface ConversationHybridRow {
59
56
  metadata: Record<string, unknown>;
60
57
  createdAt: Date;
61
58
  rrfScore: number;
59
+ signals: SearchSignals;
62
60
  }
63
61
 
64
62
  /** Unified search result with source provenance */
@@ -70,6 +68,8 @@ export interface SearchResult {
70
68
  updatedAt: Date;
71
69
  source: "memory" | "conversation_history";
72
70
  score: number;
71
+ /** Absolute relevance confidence (0.0-1.0). Based on cosine similarity + retrieval agreement. */
72
+ confidence: number;
73
73
  // Memory-specific fields
74
74
  supersededBy: string | null;
75
75
  usefulness?: number;
@@ -114,6 +114,8 @@ export interface HistoryFilters {
114
114
 
115
115
  /** Options for the integrated search across both sources */
116
116
  export interface SearchOptions {
117
+ limit?: number;
118
+ includeDeleted?: boolean;
117
119
  includeHistory?: boolean;
118
120
  historyOnly?: boolean;
119
121
  historyWeight?: number;
@@ -1,9 +1,17 @@
1
- import { pipeline, type FeatureExtractionPipeline } from "@huggingface/transformers";
1
+ import * as ort from "onnxruntime-node";
2
+ import { Tokenizer } from "@huggingface/tokenizers";
3
+ import { join, dirname } from "path";
4
+ import { mkdir } from "fs/promises";
5
+ import { existsSync } from "fs";
6
+
7
+ const HF_CDN = "https://huggingface.co";
8
+ const MAX_SEQ_LENGTH = 512;
2
9
 
3
10
  export class EmbeddingsService {
4
11
  private modelName: string;
5
- private extractor: FeatureExtractionPipeline | null = null;
6
- private initPromise: Promise<FeatureExtractionPipeline> | null = null;
12
+ private session: ort.InferenceSession | null = null;
13
+ private tokenizer: Tokenizer | null = null;
14
+ private initPromise: Promise<void> | null = null;
7
15
  private _dimension: number;
8
16
 
9
17
  constructor(modelName: string, dimension: number) {
@@ -15,27 +23,79 @@ export class EmbeddingsService {
15
23
  return this._dimension;
16
24
  }
17
25
 
18
- private async getExtractor(): Promise<FeatureExtractionPipeline> {
19
- if (this.extractor) {
20
- return this.extractor;
21
- }
26
+ get isReady(): boolean {
27
+ return this.session !== null;
28
+ }
22
29
 
30
+ async warmup(): Promise<void> {
31
+ await this.initialize();
32
+ }
33
+
34
+ private async initialize(): Promise<void> {
35
+ if (this.session) return;
23
36
  if (!this.initPromise) {
24
- this.initPromise = pipeline(
25
- "feature-extraction",
26
- this.modelName,
27
- { dtype: "fp32" } as any
28
- ) as Promise<FeatureExtractionPipeline>;
37
+ this.initPromise = this._init();
29
38
  }
39
+ await this.initPromise;
40
+ }
41
+
42
+ private get cacheDir(): string {
43
+ const packageRoot = join(dirname(Bun.main), "..");
44
+ return join(packageRoot, ".cache", "models", this.modelName);
45
+ }
46
+
47
+ private async downloadIfMissing(fileName: string): Promise<string> {
48
+ const filePath = join(this.cacheDir, fileName);
49
+ if (existsSync(filePath)) return filePath;
50
+
51
+ const url = `${HF_CDN}/${this.modelName}/resolve/main/${fileName}`;
52
+ await mkdir(dirname(filePath), { recursive: true });
53
+ const response = await fetch(url);
54
+ if (!response.ok) throw new Error(`Failed to download ${url}: ${response.status}`);
55
+ const buffer = await response.arrayBuffer();
56
+ await Bun.write(filePath, buffer);
57
+ return filePath;
58
+ }
59
+
60
+ private async _init(): Promise<void> {
61
+ const modelPath = await this.downloadIfMissing("onnx/model.onnx");
62
+ const tokenizerJsonPath = await this.downloadIfMissing("tokenizer.json");
63
+ const tokenizerConfigPath = await this.downloadIfMissing("tokenizer_config.json");
64
+
65
+ this.session = await ort.InferenceSession.create(modelPath, {
66
+ executionProviders: ["cpu"],
67
+ });
30
68
 
31
- this.extractor = await this.initPromise;
32
- return this.extractor;
69
+ const tokenizerJson = await Bun.file(tokenizerJsonPath).json();
70
+ const tokenizerConfig = await Bun.file(tokenizerConfigPath).json();
71
+ this.tokenizer = new Tokenizer(tokenizerJson, tokenizerConfig);
33
72
  }
34
73
 
35
74
  async embed(text: string): Promise<number[]> {
36
- const extractor = await this.getExtractor();
37
- const output = await extractor(text, { pooling: "mean", normalize: true });
38
- return Array.from(output.data as Float32Array);
75
+ await this.initialize();
76
+
77
+ const encoded = this.tokenizer!.encode(text);
78
+
79
+ // Truncate to model's max sequence length
80
+ const seqLen = Math.min(encoded.ids.length, MAX_SEQ_LENGTH);
81
+ const ids = encoded.ids.slice(0, seqLen);
82
+ const mask = encoded.attention_mask.slice(0, seqLen);
83
+
84
+ const inputIds = BigInt64Array.from(ids.map(BigInt));
85
+ const attentionMask = BigInt64Array.from(mask.map(BigInt));
86
+ const tokenTypeIds = new BigInt64Array(seqLen); // zeros for single-sequence input
87
+
88
+ const feeds: Record<string, ort.Tensor> = {
89
+ input_ids: new ort.Tensor("int64", inputIds, [1, seqLen]),
90
+ attention_mask: new ort.Tensor("int64", attentionMask, [1, seqLen]),
91
+ token_type_ids: new ort.Tensor("int64", tokenTypeIds, [1, seqLen]),
92
+ };
93
+
94
+ const output = await this.session!.run(feeds);
95
+ const lastHidden = output["last_hidden_state"];
96
+
97
+ const pooled = this.meanPool(lastHidden.data as Float32Array, mask, seqLen);
98
+ return this.normalize(pooled);
39
99
  }
40
100
 
41
101
  async embedBatch(texts: string[]): Promise<number[][]> {
@@ -45,4 +105,35 @@ export class EmbeddingsService {
45
105
  }
46
106
  return results;
47
107
  }
108
+
109
+ private meanPool(data: Float32Array, mask: number[], seqLen: number): number[] {
110
+ const dim = this._dimension;
111
+ const expectedLen = seqLen * dim;
112
+ if (data.length < expectedLen) {
113
+ throw new Error(
114
+ `ONNX output size ${data.length} < expected ${expectedLen} (seqLen=${seqLen}, dim=${dim}). Model/dimension mismatch?`,
115
+ );
116
+ }
117
+ const pooled = new Array(dim).fill(0);
118
+ let maskSum = 0;
119
+ for (let t = 0; t < seqLen; t++) {
120
+ if (mask[t]) {
121
+ maskSum += 1;
122
+ for (let d = 0; d < dim; d++) {
123
+ pooled[d] += data[t * dim + d];
124
+ }
125
+ }
126
+ }
127
+ for (let d = 0; d < dim; d++) {
128
+ pooled[d] /= maskSum;
129
+ }
130
+ return pooled;
131
+ }
132
+
133
+ private normalize(vec: number[]): number[] {
134
+ let norm = 0;
135
+ for (const v of vec) norm += v * v;
136
+ norm = Math.sqrt(norm);
137
+ return vec.map(v => v / norm);
138
+ }
48
139
  }
@@ -4,15 +4,17 @@ import {
4
4
  deserializeVector,
5
5
  safeParseJsonObject,
6
6
  sanitizeFtsQuery,
7
- hybridRRF,
7
+ hybridRRFWithSignals,
8
8
  topByRRF,
9
9
  knnSearch,
10
- } from "./sqlite-utils.js";
10
+ batchedQuery,
11
+ SQLITE_BATCH_SIZE,
12
+ } from "./sqlite-utils";
11
13
  import {
12
14
  type Memory,
13
15
  type HybridRow,
14
16
  DELETED_TOMBSTONE,
15
- } from "./memory.js";
17
+ } from "./memory";
16
18
 
17
19
  export class MemoryRepository {
18
20
  constructor(private db: Database) {}
@@ -144,14 +146,16 @@ export class MemoryRepository {
144
146
  async findByIds(ids: string[]): Promise<Memory[]> {
145
147
  if (ids.length === 0) return [];
146
148
 
147
- const placeholders = ids.map(() => "?").join(", ");
148
- const rows = this.db
149
- .prepare(`SELECT * FROM memories WHERE id IN (${placeholders})`)
150
- .all(...ids) as Array<Record<string, unknown>>;
149
+ return batchedQuery(this.db, ids, (batch) => {
150
+ const placeholders = batch.map(() => "?").join(", ");
151
+ const rows = this.db
152
+ .prepare(`SELECT * FROM memories WHERE id IN (${placeholders})`)
153
+ .all(...batch) as Array<Record<string, unknown>>;
151
154
 
152
- return rows.map((row) => {
153
- const embedding = this.getEmbedding(row.id as string);
154
- return this.rowToMemory(row, embedding);
155
+ return rows.map((row) => {
156
+ const embedding = this.getEmbedding(row.id as string);
157
+ return this.rowToMemory(row, embedding);
158
+ });
155
159
  });
156
160
  }
157
161
 
@@ -165,6 +169,28 @@ export class MemoryRepository {
165
169
  return result.changes > 0;
166
170
  }
167
171
 
172
+ /**
173
+ * Increment access_count and update last_accessed for multiple memories in batch.
174
+ * Uses batched IN clauses to stay within SQLite parameter limits.
175
+ */
176
+ bulkUpdateAccess(ids: string[], now: Date): void {
177
+ if (ids.length === 0) return;
178
+ const ts = now.getTime();
179
+
180
+ const runBatch = (batch: string[]) => {
181
+ const placeholders = batch.map(() => "?").join(", ");
182
+ this.db
183
+ .prepare(
184
+ `UPDATE memories SET access_count = access_count + 1, last_accessed = ? WHERE id IN (${placeholders})`
185
+ )
186
+ .run(ts, ...batch);
187
+ };
188
+
189
+ for (let i = 0; i < ids.length; i += SQLITE_BATCH_SIZE) {
190
+ runBatch(ids.slice(i, i + SQLITE_BATCH_SIZE));
191
+ }
192
+ }
193
+
168
194
  /**
169
195
  * Hybrid search combining vector KNN and FTS5, fused with Reciprocal Rank Fusion.
170
196
  */
@@ -173,7 +199,7 @@ export class MemoryRepository {
173
199
  query: string,
174
200
  limit: number,
175
201
  ): Promise<HybridRow[]> {
176
- const candidateLimit = limit * 3;
202
+ const candidateLimit = limit * 5;
177
203
 
178
204
  // Vector KNN search (brute-force cosine similarity in JS)
179
205
  const vectorResults = knnSearch(this.db, "memories_vec", embedding, candidateLimit);
@@ -188,8 +214,10 @@ export class MemoryRepository {
188
214
  .all(ftsQuery, candidateLimit) as Array<{ id: string }>)
189
215
  : [];
190
216
 
191
- // Compute RRF scores and pick top ids
192
- const rrfScores = hybridRRF(vectorResults, ftsResults);
217
+ // Compute RRF scores with search signals for confidence scoring
218
+ const signalsMap = hybridRRFWithSignals(vectorResults, ftsResults);
219
+ const rrfScores = new Map<string, number>();
220
+ for (const [id, s] of signalsMap) rrfScores.set(id, s.rrfScore);
193
221
  const topIds = topByRRF(rrfScores, limit);
194
222
 
195
223
  if (topIds.length === 0) return [];
@@ -216,9 +244,16 @@ export class MemoryRepository {
216
244
 
217
245
  const memEmbedding = this.getEmbedding(id);
218
246
  const memory = this.rowToMemory(row, memEmbedding);
247
+ const signals = signalsMap.get(id)!;
219
248
  results.push({
220
249
  ...memory,
221
- rrfScore: rrfScores.get(id) ?? 0,
250
+ rrfScore: signals.rrfScore,
251
+ signals: {
252
+ cosineSimilarity: signals.cosineSimilarity,
253
+ ftsMatch: signals.ftsMatch,
254
+ knnRank: signals.knnRank,
255
+ ftsRank: signals.ftsRank,
256
+ },
222
257
  });
223
258
  }
224
259