@aeriondyseti/vector-memory-mcp 2.3.0 → 2.4.4-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +6 -6
- package/server/core/connection.ts +1 -1
- package/server/core/conversation.repository.ts +113 -16
- package/server/core/conversation.service.ts +19 -19
- package/server/core/conversation.ts +7 -5
- package/server/core/embeddings.service.ts +108 -17
- package/server/core/memory.repository.ts +49 -14
- package/server/core/memory.service.ts +47 -42
- package/server/core/memory.ts +40 -1
- package/server/core/migration.service.ts +3 -3
- package/server/core/migrations.ts +60 -20
- package/server/core/parsers/claude-code.parser.ts +3 -3
- package/server/core/parsers/types.ts +1 -1
- package/server/core/sqlite-utils.ts +67 -2
- package/server/index.ts +13 -15
- package/server/transports/http/mcp-transport.ts +5 -5
- package/server/transports/http/server.ts +19 -6
- package/server/transports/mcp/handlers.ts +47 -23
- package/server/transports/mcp/server.ts +5 -5
- package/scripts/lancedb-extract.ts +0 -181
- package/scripts/smoke-test.ts +0 -699
- package/scripts/sync-version.ts +0 -35
- package/scripts/test-runner.ts +0 -76
- package/scripts/warmup.ts +0 -72
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aeriondyseti/vector-memory-mcp",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.4.4-dev.1",
|
|
4
4
|
"description": "A zero-configuration RAG memory server for MCP clients",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "server/index.ts",
|
|
@@ -9,7 +9,6 @@
|
|
|
9
9
|
},
|
|
10
10
|
"files": [
|
|
11
11
|
"server",
|
|
12
|
-
"scripts",
|
|
13
12
|
"README.md",
|
|
14
13
|
"LICENSE"
|
|
15
14
|
],
|
|
@@ -31,6 +30,7 @@
|
|
|
31
30
|
"test:quick": "bun test",
|
|
32
31
|
"test:coverage": "bun test --preload ./tests/preload.ts --coverage",
|
|
33
32
|
"benchmark": "bun test tests/benchmark.test.ts --preload ./tests/preload.ts",
|
|
33
|
+
"benchmark:update": "bun run scripts/update-benchmarks.ts",
|
|
34
34
|
"test:preload": "bun run tests/preload.ts",
|
|
35
35
|
"smoke": "bun run scripts/smoke-test.ts",
|
|
36
36
|
"warmup": "bun run scripts/warmup.ts",
|
|
@@ -47,18 +47,18 @@
|
|
|
47
47
|
],
|
|
48
48
|
"license": "MIT",
|
|
49
49
|
"dependencies": {
|
|
50
|
-
"@huggingface/
|
|
50
|
+
"@huggingface/tokenizers": "^0.1.3",
|
|
51
51
|
"@lancedb/lancedb": "^0.26.2",
|
|
52
52
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
53
53
|
"arg": "^5.0.2",
|
|
54
|
-
"hono": "^4.11.3"
|
|
54
|
+
"hono": "^4.11.3",
|
|
55
|
+
"onnxruntime-node": "^1.21.0"
|
|
55
56
|
},
|
|
56
57
|
"devDependencies": {
|
|
57
58
|
"@types/bun": "latest",
|
|
58
59
|
"typescript": "^5.0.0"
|
|
59
60
|
},
|
|
60
61
|
"trustedDependencies": [
|
|
61
|
-
"protobufjs"
|
|
62
|
-
"sharp"
|
|
62
|
+
"protobufjs"
|
|
63
63
|
]
|
|
64
64
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Database } from "bun:sqlite";
|
|
2
2
|
import { existsSync, mkdirSync } from "fs";
|
|
3
3
|
import { dirname } from "path";
|
|
4
|
-
import { removeVec0Tables, runMigrations } from "./migrations
|
|
4
|
+
import { removeVec0Tables, runMigrations } from "./migrations";
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* Open (or create) a SQLite database at the given path
|
|
@@ -2,15 +2,15 @@ import type { Database } from "bun:sqlite";
|
|
|
2
2
|
import type {
|
|
3
3
|
ConversationHybridRow,
|
|
4
4
|
HistoryFilters,
|
|
5
|
-
} from "./conversation
|
|
5
|
+
} from "./conversation";
|
|
6
6
|
import {
|
|
7
7
|
serializeVector,
|
|
8
8
|
safeParseJsonObject,
|
|
9
9
|
sanitizeFtsQuery,
|
|
10
|
-
|
|
10
|
+
hybridRRFWithSignals,
|
|
11
11
|
topByRRF,
|
|
12
12
|
knnSearch,
|
|
13
|
-
} from "./sqlite-utils
|
|
13
|
+
} from "./sqlite-utils";
|
|
14
14
|
|
|
15
15
|
export class ConversationRepository {
|
|
16
16
|
constructor(private db: Database) {}
|
|
@@ -105,13 +105,102 @@ export class ConversationRepository {
|
|
|
105
105
|
tx();
|
|
106
106
|
}
|
|
107
107
|
|
|
108
|
+
async replaceSession(
|
|
109
|
+
sessionId: string,
|
|
110
|
+
rows: Array<{
|
|
111
|
+
id: string;
|
|
112
|
+
vector: number[];
|
|
113
|
+
content: string;
|
|
114
|
+
metadata: string;
|
|
115
|
+
created_at: number;
|
|
116
|
+
session_id: string;
|
|
117
|
+
role: string;
|
|
118
|
+
message_index_start: number;
|
|
119
|
+
message_index_end: number;
|
|
120
|
+
project: string;
|
|
121
|
+
}>
|
|
122
|
+
): Promise<void> {
|
|
123
|
+
const insertMain = this.db.prepare(
|
|
124
|
+
`INSERT OR REPLACE INTO conversation_history
|
|
125
|
+
(id, content, metadata, created_at, session_id, role, message_index_start, message_index_end, project)
|
|
126
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
127
|
+
);
|
|
128
|
+
const deleteVec = this.db.prepare(
|
|
129
|
+
`DELETE FROM conversation_history_vec WHERE id = ?`
|
|
130
|
+
);
|
|
131
|
+
const insertVec = this.db.prepare(
|
|
132
|
+
`INSERT INTO conversation_history_vec (id, vector) VALUES (?, ?)`
|
|
133
|
+
);
|
|
134
|
+
const deleteFts = this.db.prepare(
|
|
135
|
+
`DELETE FROM conversation_history_fts WHERE id = ?`
|
|
136
|
+
);
|
|
137
|
+
const insertFts = this.db.prepare(
|
|
138
|
+
`INSERT INTO conversation_history_fts (id, content) VALUES (?, ?)`
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
const tx = this.db.transaction(() => {
|
|
142
|
+
// Delete old chunks first
|
|
143
|
+
const idRows = this.db
|
|
144
|
+
.prepare(`SELECT id FROM conversation_history WHERE session_id = ?`)
|
|
145
|
+
.all(sessionId) as Array<{ id: string }>;
|
|
146
|
+
|
|
147
|
+
if (idRows.length > 0) {
|
|
148
|
+
const ids = idRows.map((r) => r.id);
|
|
149
|
+
const placeholders = ids.map(() => "?").join(", ");
|
|
150
|
+
this.db
|
|
151
|
+
.prepare(
|
|
152
|
+
`DELETE FROM conversation_history_vec WHERE id IN (${placeholders})`
|
|
153
|
+
)
|
|
154
|
+
.run(...ids);
|
|
155
|
+
this.db
|
|
156
|
+
.prepare(
|
|
157
|
+
`DELETE FROM conversation_history_fts WHERE id IN (${placeholders})`
|
|
158
|
+
)
|
|
159
|
+
.run(...ids);
|
|
160
|
+
this.db
|
|
161
|
+
.prepare(`DELETE FROM conversation_history WHERE session_id = ?`)
|
|
162
|
+
.run(sessionId);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Insert new chunks
|
|
166
|
+
for (const row of rows) {
|
|
167
|
+
insertMain.run(
|
|
168
|
+
row.id,
|
|
169
|
+
row.content,
|
|
170
|
+
row.metadata,
|
|
171
|
+
row.created_at,
|
|
172
|
+
row.session_id,
|
|
173
|
+
row.role,
|
|
174
|
+
row.message_index_start,
|
|
175
|
+
row.message_index_end,
|
|
176
|
+
row.project
|
|
177
|
+
);
|
|
178
|
+
deleteVec.run(row.id);
|
|
179
|
+
insertVec.run(row.id, serializeVector(row.vector));
|
|
180
|
+
deleteFts.run(row.id);
|
|
181
|
+
insertFts.run(row.id, row.content);
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
tx();
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Hybrid search combining vector KNN and FTS5, fused with Reciprocal Rank Fusion.
|
|
190
|
+
*
|
|
191
|
+
* NOTE: Filters (session, role, project, date) are applied AFTER candidate selection
|
|
192
|
+
* and RRF scoring, not pushed into the KNN/FTS queries. This is an intentional
|
|
193
|
+
* performance tradeoff — KNN is brute-force JS-side (no SQL pre-filter possible),
|
|
194
|
+
* and filtering post-RRF avoids duplicating filter logic across both retrieval paths.
|
|
195
|
+
* The consequence is that filtered queries may return fewer than `limit` results.
|
|
196
|
+
*/
|
|
108
197
|
async findHybrid(
|
|
109
198
|
embedding: number[],
|
|
110
199
|
query: string,
|
|
111
200
|
limit: number,
|
|
112
201
|
filters?: HistoryFilters
|
|
113
202
|
): Promise<ConversationHybridRow[]> {
|
|
114
|
-
const candidateCount = limit *
|
|
203
|
+
const candidateCount = limit * 5;
|
|
115
204
|
|
|
116
205
|
// Vector KNN search (brute-force cosine similarity in JS)
|
|
117
206
|
const vecResults = knnSearch(this.db, "conversation_history_vec", embedding, candidateCount);
|
|
@@ -127,8 +216,10 @@ export class ConversationRepository {
|
|
|
127
216
|
)
|
|
128
217
|
.all(ftsQuery, candidateCount) as Array<{ id: string }>;
|
|
129
218
|
|
|
130
|
-
// Compute RRF scores
|
|
131
|
-
const
|
|
219
|
+
// Compute RRF scores with search signals for confidence scoring
|
|
220
|
+
const signalsMap = hybridRRFWithSignals(vecResults, ftsResults);
|
|
221
|
+
const rrfScores = new Map<string, number>();
|
|
222
|
+
for (const [id, s] of signalsMap) rrfScores.set(id, s.rrfScore);
|
|
132
223
|
const topIds = topByRRF(rrfScores, limit);
|
|
133
224
|
|
|
134
225
|
if (topIds.length === 0) return [];
|
|
@@ -185,17 +276,23 @@ export class ConversationRepository {
|
|
|
185
276
|
project: string;
|
|
186
277
|
}>;
|
|
187
278
|
|
|
188
|
-
// Build a lookup for ordering by RRF score
|
|
189
|
-
const scoreMap = new Map(topIds.map((id) => [id, rrfScores.get(id)!]));
|
|
190
|
-
|
|
191
279
|
return fullRows
|
|
192
|
-
.map((row) =>
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
280
|
+
.map((row) => {
|
|
281
|
+
const signals = signalsMap.get(row.id)!;
|
|
282
|
+
return {
|
|
283
|
+
id: row.id,
|
|
284
|
+
content: row.content,
|
|
285
|
+
metadata: safeParseJsonObject(row.metadata),
|
|
286
|
+
createdAt: new Date(row.created_at),
|
|
287
|
+
rrfScore: signals.rrfScore,
|
|
288
|
+
signals: {
|
|
289
|
+
cosineSimilarity: signals.cosineSimilarity,
|
|
290
|
+
ftsMatch: signals.ftsMatch,
|
|
291
|
+
knnRank: signals.knnRank,
|
|
292
|
+
ftsRank: signals.ftsRank,
|
|
293
|
+
},
|
|
294
|
+
};
|
|
295
|
+
})
|
|
199
296
|
.sort((a, b) => b.rrfScore - a.rrfScore);
|
|
200
297
|
}
|
|
201
298
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { createHash } from "crypto";
|
|
2
2
|
import { readFile, writeFile, mkdir } from "fs/promises";
|
|
3
3
|
import { dirname, join } from "path";
|
|
4
|
-
import type { ConversationRepository } from "./conversation.repository
|
|
4
|
+
import type { ConversationRepository } from "./conversation.repository";
|
|
5
5
|
import type {
|
|
6
6
|
ConversationChunk,
|
|
7
7
|
ConversationHybridRow,
|
|
@@ -10,12 +10,12 @@ import type {
|
|
|
10
10
|
ParsedMessage,
|
|
11
11
|
SessionFileInfo,
|
|
12
12
|
SessionIndexDetail,
|
|
13
|
-
} from "./conversation
|
|
14
|
-
import type { ConversationHistoryConfig } from "../config/index
|
|
15
|
-
import { resolveSessionLogPath } from "../config/index
|
|
16
|
-
import type { EmbeddingsService } from "./embeddings.service
|
|
17
|
-
import type { SessionLogParser } from "./parsers/types
|
|
18
|
-
import { ClaudeCodeSessionParser } from "./parsers/claude-code.parser
|
|
13
|
+
} from "./conversation";
|
|
14
|
+
import type { ConversationHistoryConfig } from "../config/index";
|
|
15
|
+
import { resolveSessionLogPath } from "../config/index";
|
|
16
|
+
import type { EmbeddingsService } from "./embeddings.service";
|
|
17
|
+
import type { SessionLogParser } from "./parsers/types";
|
|
18
|
+
import { ClaudeCodeSessionParser } from "./parsers/claude-code.parser";
|
|
19
19
|
|
|
20
20
|
/**
|
|
21
21
|
* Generate a deterministic chunk ID from session ID and message indices.
|
|
@@ -78,12 +78,7 @@ export function chunkMessages(
|
|
|
78
78
|
messageIndexEnd: lastMsg.messageIndex,
|
|
79
79
|
project: firstMsg.project,
|
|
80
80
|
metadata: {
|
|
81
|
-
session_id: firstMsg.sessionId,
|
|
82
81
|
timestamp: firstMsg.timestamp.toISOString(),
|
|
83
|
-
role,
|
|
84
|
-
message_index_start: firstMsg.messageIndex,
|
|
85
|
-
message_index_end: lastMsg.messageIndex,
|
|
86
|
-
project: firstMsg.project,
|
|
87
82
|
git_branch: firstMsg.gitBranch,
|
|
88
83
|
is_subagent: firstMsg.isSubagent,
|
|
89
84
|
agent_id: firstMsg.agentId,
|
|
@@ -273,20 +268,24 @@ export class ConversationHistoryService {
|
|
|
273
268
|
this.config.chunkOverlap
|
|
274
269
|
);
|
|
275
270
|
|
|
276
|
-
//
|
|
277
|
-
await this.repository.deleteBySessionId(file.sessionId);
|
|
278
|
-
|
|
279
|
-
// Embed all chunks
|
|
271
|
+
// Embed all chunks FIRST (pure computation, no DB side effects)
|
|
280
272
|
const embeddings = await this.embeddings.embedBatch(
|
|
281
273
|
chunks.map((c) => c.content)
|
|
282
274
|
);
|
|
283
275
|
|
|
284
|
-
//
|
|
276
|
+
// Build rows
|
|
285
277
|
const rows = chunks.map((chunk, i) => ({
|
|
286
278
|
id: chunk.id,
|
|
287
279
|
vector: embeddings[i],
|
|
288
280
|
content: chunk.content,
|
|
289
|
-
metadata: JSON.stringify(
|
|
281
|
+
metadata: JSON.stringify({
|
|
282
|
+
...chunk.metadata,
|
|
283
|
+
session_id: chunk.sessionId,
|
|
284
|
+
role: chunk.role,
|
|
285
|
+
message_index_start: chunk.messageIndexStart,
|
|
286
|
+
message_index_end: chunk.messageIndexEnd,
|
|
287
|
+
project: chunk.project,
|
|
288
|
+
}),
|
|
290
289
|
created_at: chunk.timestamp.getTime(),
|
|
291
290
|
session_id: chunk.sessionId,
|
|
292
291
|
role: chunk.role,
|
|
@@ -295,7 +294,8 @@ export class ConversationHistoryService {
|
|
|
295
294
|
project: chunk.project,
|
|
296
295
|
}));
|
|
297
296
|
|
|
298
|
-
|
|
297
|
+
// Atomically replace old chunks with new ones
|
|
298
|
+
await this.repository.replaceSession(file.sessionId, rows);
|
|
299
299
|
|
|
300
300
|
// Update index state
|
|
301
301
|
const session: IndexedSession = {
|
|
@@ -14,12 +14,7 @@ export interface ParsedMessage {
|
|
|
14
14
|
|
|
15
15
|
/** Metadata stored per conversation chunk in the database */
|
|
16
16
|
export interface ConversationChunkMetadata {
|
|
17
|
-
session_id: string;
|
|
18
17
|
timestamp: string;
|
|
19
|
-
role: string;
|
|
20
|
-
message_index_start: number;
|
|
21
|
-
message_index_end: number;
|
|
22
|
-
project: string;
|
|
23
18
|
git_branch?: string;
|
|
24
19
|
is_subagent: boolean;
|
|
25
20
|
agent_id?: string;
|
|
@@ -52,6 +47,8 @@ export interface IndexedSession {
|
|
|
52
47
|
lastMessageAt: Date;
|
|
53
48
|
}
|
|
54
49
|
|
|
50
|
+
import type { SearchSignals } from "./memory";
|
|
51
|
+
|
|
55
52
|
/** Raw row from conversation_history table with RRF score */
|
|
56
53
|
export interface ConversationHybridRow {
|
|
57
54
|
id: string;
|
|
@@ -59,6 +56,7 @@ export interface ConversationHybridRow {
|
|
|
59
56
|
metadata: Record<string, unknown>;
|
|
60
57
|
createdAt: Date;
|
|
61
58
|
rrfScore: number;
|
|
59
|
+
signals: SearchSignals;
|
|
62
60
|
}
|
|
63
61
|
|
|
64
62
|
/** Unified search result with source provenance */
|
|
@@ -70,6 +68,8 @@ export interface SearchResult {
|
|
|
70
68
|
updatedAt: Date;
|
|
71
69
|
source: "memory" | "conversation_history";
|
|
72
70
|
score: number;
|
|
71
|
+
/** Absolute relevance confidence (0.0-1.0). Based on cosine similarity + retrieval agreement. */
|
|
72
|
+
confidence: number;
|
|
73
73
|
// Memory-specific fields
|
|
74
74
|
supersededBy: string | null;
|
|
75
75
|
usefulness?: number;
|
|
@@ -114,6 +114,8 @@ export interface HistoryFilters {
|
|
|
114
114
|
|
|
115
115
|
/** Options for the integrated search across both sources */
|
|
116
116
|
export interface SearchOptions {
|
|
117
|
+
limit?: number;
|
|
118
|
+
includeDeleted?: boolean;
|
|
117
119
|
includeHistory?: boolean;
|
|
118
120
|
historyOnly?: boolean;
|
|
119
121
|
historyWeight?: number;
|
|
@@ -1,9 +1,17 @@
|
|
|
1
|
-
import
|
|
1
|
+
import * as ort from "onnxruntime-node";
|
|
2
|
+
import { Tokenizer } from "@huggingface/tokenizers";
|
|
3
|
+
import { join, dirname } from "path";
|
|
4
|
+
import { mkdir } from "fs/promises";
|
|
5
|
+
import { existsSync } from "fs";
|
|
6
|
+
|
|
7
|
+
const HF_CDN = "https://huggingface.co";
|
|
8
|
+
const MAX_SEQ_LENGTH = 512;
|
|
2
9
|
|
|
3
10
|
export class EmbeddingsService {
|
|
4
11
|
private modelName: string;
|
|
5
|
-
private
|
|
6
|
-
private
|
|
12
|
+
private session: ort.InferenceSession | null = null;
|
|
13
|
+
private tokenizer: Tokenizer | null = null;
|
|
14
|
+
private initPromise: Promise<void> | null = null;
|
|
7
15
|
private _dimension: number;
|
|
8
16
|
|
|
9
17
|
constructor(modelName: string, dimension: number) {
|
|
@@ -15,27 +23,79 @@ export class EmbeddingsService {
|
|
|
15
23
|
return this._dimension;
|
|
16
24
|
}
|
|
17
25
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
}
|
|
26
|
+
get isReady(): boolean {
|
|
27
|
+
return this.session !== null;
|
|
28
|
+
}
|
|
22
29
|
|
|
30
|
+
async warmup(): Promise<void> {
|
|
31
|
+
await this.initialize();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
private async initialize(): Promise<void> {
|
|
35
|
+
if (this.session) return;
|
|
23
36
|
if (!this.initPromise) {
|
|
24
|
-
this.initPromise =
|
|
25
|
-
"feature-extraction",
|
|
26
|
-
this.modelName,
|
|
27
|
-
{ dtype: "fp32" } as any
|
|
28
|
-
) as Promise<FeatureExtractionPipeline>;
|
|
37
|
+
this.initPromise = this._init();
|
|
29
38
|
}
|
|
39
|
+
await this.initPromise;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
private get cacheDir(): string {
|
|
43
|
+
const packageRoot = join(dirname(Bun.main), "..");
|
|
44
|
+
return join(packageRoot, ".cache", "models", this.modelName);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
private async downloadIfMissing(fileName: string): Promise<string> {
|
|
48
|
+
const filePath = join(this.cacheDir, fileName);
|
|
49
|
+
if (existsSync(filePath)) return filePath;
|
|
50
|
+
|
|
51
|
+
const url = `${HF_CDN}/${this.modelName}/resolve/main/${fileName}`;
|
|
52
|
+
await mkdir(dirname(filePath), { recursive: true });
|
|
53
|
+
const response = await fetch(url);
|
|
54
|
+
if (!response.ok) throw new Error(`Failed to download ${url}: ${response.status}`);
|
|
55
|
+
const buffer = await response.arrayBuffer();
|
|
56
|
+
await Bun.write(filePath, buffer);
|
|
57
|
+
return filePath;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
private async _init(): Promise<void> {
|
|
61
|
+
const modelPath = await this.downloadIfMissing("onnx/model.onnx");
|
|
62
|
+
const tokenizerJsonPath = await this.downloadIfMissing("tokenizer.json");
|
|
63
|
+
const tokenizerConfigPath = await this.downloadIfMissing("tokenizer_config.json");
|
|
64
|
+
|
|
65
|
+
this.session = await ort.InferenceSession.create(modelPath, {
|
|
66
|
+
executionProviders: ["cpu"],
|
|
67
|
+
});
|
|
30
68
|
|
|
31
|
-
|
|
32
|
-
|
|
69
|
+
const tokenizerJson = await Bun.file(tokenizerJsonPath).json();
|
|
70
|
+
const tokenizerConfig = await Bun.file(tokenizerConfigPath).json();
|
|
71
|
+
this.tokenizer = new Tokenizer(tokenizerJson, tokenizerConfig);
|
|
33
72
|
}
|
|
34
73
|
|
|
35
74
|
async embed(text: string): Promise<number[]> {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
75
|
+
await this.initialize();
|
|
76
|
+
|
|
77
|
+
const encoded = this.tokenizer!.encode(text);
|
|
78
|
+
|
|
79
|
+
// Truncate to model's max sequence length
|
|
80
|
+
const seqLen = Math.min(encoded.ids.length, MAX_SEQ_LENGTH);
|
|
81
|
+
const ids = encoded.ids.slice(0, seqLen);
|
|
82
|
+
const mask = encoded.attention_mask.slice(0, seqLen);
|
|
83
|
+
|
|
84
|
+
const inputIds = BigInt64Array.from(ids.map(BigInt));
|
|
85
|
+
const attentionMask = BigInt64Array.from(mask.map(BigInt));
|
|
86
|
+
const tokenTypeIds = new BigInt64Array(seqLen); // zeros for single-sequence input
|
|
87
|
+
|
|
88
|
+
const feeds: Record<string, ort.Tensor> = {
|
|
89
|
+
input_ids: new ort.Tensor("int64", inputIds, [1, seqLen]),
|
|
90
|
+
attention_mask: new ort.Tensor("int64", attentionMask, [1, seqLen]),
|
|
91
|
+
token_type_ids: new ort.Tensor("int64", tokenTypeIds, [1, seqLen]),
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
const output = await this.session!.run(feeds);
|
|
95
|
+
const lastHidden = output["last_hidden_state"];
|
|
96
|
+
|
|
97
|
+
const pooled = this.meanPool(lastHidden.data as Float32Array, mask, seqLen);
|
|
98
|
+
return this.normalize(pooled);
|
|
39
99
|
}
|
|
40
100
|
|
|
41
101
|
async embedBatch(texts: string[]): Promise<number[][]> {
|
|
@@ -45,4 +105,35 @@ export class EmbeddingsService {
|
|
|
45
105
|
}
|
|
46
106
|
return results;
|
|
47
107
|
}
|
|
108
|
+
|
|
109
|
+
private meanPool(data: Float32Array, mask: number[], seqLen: number): number[] {
|
|
110
|
+
const dim = this._dimension;
|
|
111
|
+
const expectedLen = seqLen * dim;
|
|
112
|
+
if (data.length < expectedLen) {
|
|
113
|
+
throw new Error(
|
|
114
|
+
`ONNX output size ${data.length} < expected ${expectedLen} (seqLen=${seqLen}, dim=${dim}). Model/dimension mismatch?`,
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
const pooled = new Array(dim).fill(0);
|
|
118
|
+
let maskSum = 0;
|
|
119
|
+
for (let t = 0; t < seqLen; t++) {
|
|
120
|
+
if (mask[t]) {
|
|
121
|
+
maskSum += 1;
|
|
122
|
+
for (let d = 0; d < dim; d++) {
|
|
123
|
+
pooled[d] += data[t * dim + d];
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
for (let d = 0; d < dim; d++) {
|
|
128
|
+
pooled[d] /= maskSum;
|
|
129
|
+
}
|
|
130
|
+
return pooled;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
private normalize(vec: number[]): number[] {
|
|
134
|
+
let norm = 0;
|
|
135
|
+
for (const v of vec) norm += v * v;
|
|
136
|
+
norm = Math.sqrt(norm);
|
|
137
|
+
return vec.map(v => v / norm);
|
|
138
|
+
}
|
|
48
139
|
}
|
|
@@ -4,15 +4,17 @@ import {
|
|
|
4
4
|
deserializeVector,
|
|
5
5
|
safeParseJsonObject,
|
|
6
6
|
sanitizeFtsQuery,
|
|
7
|
-
|
|
7
|
+
hybridRRFWithSignals,
|
|
8
8
|
topByRRF,
|
|
9
9
|
knnSearch,
|
|
10
|
-
|
|
10
|
+
batchedQuery,
|
|
11
|
+
SQLITE_BATCH_SIZE,
|
|
12
|
+
} from "./sqlite-utils";
|
|
11
13
|
import {
|
|
12
14
|
type Memory,
|
|
13
15
|
type HybridRow,
|
|
14
16
|
DELETED_TOMBSTONE,
|
|
15
|
-
} from "./memory
|
|
17
|
+
} from "./memory";
|
|
16
18
|
|
|
17
19
|
export class MemoryRepository {
|
|
18
20
|
constructor(private db: Database) {}
|
|
@@ -144,14 +146,16 @@ export class MemoryRepository {
|
|
|
144
146
|
async findByIds(ids: string[]): Promise<Memory[]> {
|
|
145
147
|
if (ids.length === 0) return [];
|
|
146
148
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
149
|
+
return batchedQuery(this.db, ids, (batch) => {
|
|
150
|
+
const placeholders = batch.map(() => "?").join(", ");
|
|
151
|
+
const rows = this.db
|
|
152
|
+
.prepare(`SELECT * FROM memories WHERE id IN (${placeholders})`)
|
|
153
|
+
.all(...batch) as Array<Record<string, unknown>>;
|
|
151
154
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
+
return rows.map((row) => {
|
|
156
|
+
const embedding = this.getEmbedding(row.id as string);
|
|
157
|
+
return this.rowToMemory(row, embedding);
|
|
158
|
+
});
|
|
155
159
|
});
|
|
156
160
|
}
|
|
157
161
|
|
|
@@ -165,6 +169,28 @@ export class MemoryRepository {
|
|
|
165
169
|
return result.changes > 0;
|
|
166
170
|
}
|
|
167
171
|
|
|
172
|
+
/**
|
|
173
|
+
* Increment access_count and update last_accessed for multiple memories in batch.
|
|
174
|
+
* Uses batched IN clauses to stay within SQLite parameter limits.
|
|
175
|
+
*/
|
|
176
|
+
bulkUpdateAccess(ids: string[], now: Date): void {
|
|
177
|
+
if (ids.length === 0) return;
|
|
178
|
+
const ts = now.getTime();
|
|
179
|
+
|
|
180
|
+
const runBatch = (batch: string[]) => {
|
|
181
|
+
const placeholders = batch.map(() => "?").join(", ");
|
|
182
|
+
this.db
|
|
183
|
+
.prepare(
|
|
184
|
+
`UPDATE memories SET access_count = access_count + 1, last_accessed = ? WHERE id IN (${placeholders})`
|
|
185
|
+
)
|
|
186
|
+
.run(ts, ...batch);
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
for (let i = 0; i < ids.length; i += SQLITE_BATCH_SIZE) {
|
|
190
|
+
runBatch(ids.slice(i, i + SQLITE_BATCH_SIZE));
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
168
194
|
/**
|
|
169
195
|
* Hybrid search combining vector KNN and FTS5, fused with Reciprocal Rank Fusion.
|
|
170
196
|
*/
|
|
@@ -173,7 +199,7 @@ export class MemoryRepository {
|
|
|
173
199
|
query: string,
|
|
174
200
|
limit: number,
|
|
175
201
|
): Promise<HybridRow[]> {
|
|
176
|
-
const candidateLimit = limit *
|
|
202
|
+
const candidateLimit = limit * 5;
|
|
177
203
|
|
|
178
204
|
// Vector KNN search (brute-force cosine similarity in JS)
|
|
179
205
|
const vectorResults = knnSearch(this.db, "memories_vec", embedding, candidateLimit);
|
|
@@ -188,8 +214,10 @@ export class MemoryRepository {
|
|
|
188
214
|
.all(ftsQuery, candidateLimit) as Array<{ id: string }>)
|
|
189
215
|
: [];
|
|
190
216
|
|
|
191
|
-
// Compute RRF scores
|
|
192
|
-
const
|
|
217
|
+
// Compute RRF scores with search signals for confidence scoring
|
|
218
|
+
const signalsMap = hybridRRFWithSignals(vectorResults, ftsResults);
|
|
219
|
+
const rrfScores = new Map<string, number>();
|
|
220
|
+
for (const [id, s] of signalsMap) rrfScores.set(id, s.rrfScore);
|
|
193
221
|
const topIds = topByRRF(rrfScores, limit);
|
|
194
222
|
|
|
195
223
|
if (topIds.length === 0) return [];
|
|
@@ -216,9 +244,16 @@ export class MemoryRepository {
|
|
|
216
244
|
|
|
217
245
|
const memEmbedding = this.getEmbedding(id);
|
|
218
246
|
const memory = this.rowToMemory(row, memEmbedding);
|
|
247
|
+
const signals = signalsMap.get(id)!;
|
|
219
248
|
results.push({
|
|
220
249
|
...memory,
|
|
221
|
-
rrfScore:
|
|
250
|
+
rrfScore: signals.rrfScore,
|
|
251
|
+
signals: {
|
|
252
|
+
cosineSimilarity: signals.cosineSimilarity,
|
|
253
|
+
ftsMatch: signals.ftsMatch,
|
|
254
|
+
knnRank: signals.knnRank,
|
|
255
|
+
ftsRank: signals.ftsRank,
|
|
256
|
+
},
|
|
222
257
|
});
|
|
223
258
|
}
|
|
224
259
|
|