@usewhisper/mcp-server 0.2.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -23
- package/dist/autosubscribe-GHO6YR5A.js +4068 -0
- package/dist/chunk-52VJYCZ7.js +455 -0
- package/dist/chunk-5KBZQHDL.js +189 -0
- package/dist/chunk-7SN3CKDK.js +1076 -0
- package/dist/chunk-EI5CE3EY.js +616 -0
- package/dist/chunk-JO3ORBZD.js +616 -0
- package/dist/chunk-LMEYV4JD.js +368 -0
- package/dist/chunk-MEFLJ4PV.js +8385 -0
- package/dist/chunk-PPGYJJED.js +271 -0
- package/dist/chunk-T7KMSTWP.js +399 -0
- package/dist/chunk-TWEIYHI6.js +399 -0
- package/dist/consolidation-2GCKI4RE.js +220 -0
- package/dist/consolidation-4JOPW6BG.js +220 -0
- package/dist/context-sharing-4ITCNKG4.js +307 -0
- package/dist/context-sharing-GYKLXHZA.js +307 -0
- package/dist/context-sharing-Y6LTZZOF.js +307 -0
- package/dist/cost-optimization-7DVSTL6R.js +307 -0
- package/dist/ingest-7T5FAZNC.js +15 -0
- package/dist/ingest-EBNIE7XB.js +15 -0
- package/dist/ingest-FSHT5BCS.js +15 -0
- package/dist/oracle-3RLQF3DP.js +259 -0
- package/dist/oracle-FKRTQUUG.js +282 -0
- package/dist/search-EG6TYWWW.js +13 -0
- package/dist/search-I22QQA7T.js +13 -0
- package/dist/search-T7H5G6DW.js +13 -0
- package/dist/server.js +1124 -1094
- package/package.json +2 -6
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
import {
|
|
2
|
+
calculateTemporalRelevance,
|
|
3
|
+
parseTemporalQuery
|
|
4
|
+
} from "./chunk-5KBZQHDL.js";
|
|
5
|
+
import {
|
|
6
|
+
db,
|
|
7
|
+
embedSingle
|
|
8
|
+
} from "./chunk-MEFLJ4PV.js";
|
|
9
|
+
|
|
10
|
+
// ../src/engine/cache.ts
|
|
11
|
+
import crypto from "crypto";
|
|
12
|
+
var semanticCache = /* @__PURE__ */ new Map();
|
|
13
|
+
var MAX_SEMANTIC_CACHE_SIZE = 500;
|
|
14
|
+
var SEMANTIC_THRESHOLD = 0.92;
|
|
15
|
+
var DEFAULT_CONFIG = {
|
|
16
|
+
ttl: 3600,
|
|
17
|
+
// 1 hour
|
|
18
|
+
enabled: true,
|
|
19
|
+
keyPrefix: "whisper:context:"
|
|
20
|
+
};
|
|
21
|
+
var cacheHits = 0;
|
|
22
|
+
var cacheMisses = 0;
|
|
23
|
+
function recordCacheHit() {
|
|
24
|
+
cacheHits++;
|
|
25
|
+
}
|
|
26
|
+
function recordCacheMiss() {
|
|
27
|
+
cacheMisses++;
|
|
28
|
+
}
|
|
29
|
+
function cosineSimilarity(a, b) {
|
|
30
|
+
if (a.length !== b.length) return 0;
|
|
31
|
+
let dotProduct = 0;
|
|
32
|
+
let normA = 0;
|
|
33
|
+
let normB = 0;
|
|
34
|
+
for (let i = 0; i < a.length; i++) {
|
|
35
|
+
dotProduct += a[i] * b[i];
|
|
36
|
+
normA += a[i] * a[i];
|
|
37
|
+
normB += b[i] * b[i];
|
|
38
|
+
}
|
|
39
|
+
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
40
|
+
}
|
|
41
|
+
async function getFromSemanticCache(queryEmbedding) {
|
|
42
|
+
if (!DEFAULT_CONFIG.enabled) return null;
|
|
43
|
+
const now = Date.now();
|
|
44
|
+
let bestMatch = null;
|
|
45
|
+
let bestSimilarity = 0;
|
|
46
|
+
for (const [key, entry] of semanticCache.entries()) {
|
|
47
|
+
if (entry.expiry < now) {
|
|
48
|
+
semanticCache.delete(key);
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
const similarity = cosineSimilarity(queryEmbedding, entry.embedding);
|
|
52
|
+
if (similarity > bestSimilarity) {
|
|
53
|
+
bestSimilarity = similarity;
|
|
54
|
+
bestMatch = { key, ...entry, similarity };
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
if (bestMatch && bestMatch.similarity >= SEMANTIC_THRESHOLD) {
|
|
58
|
+
recordCacheHit();
|
|
59
|
+
return { results: bestMatch.results, similarity: bestMatch.similarity };
|
|
60
|
+
}
|
|
61
|
+
recordCacheMiss();
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
async function setInSemanticCache(queryEmbedding, results) {
|
|
65
|
+
if (!DEFAULT_CONFIG.enabled) return;
|
|
66
|
+
const now = Date.now();
|
|
67
|
+
const key = `sem:${crypto.createHash("md5").update(JSON.stringify(queryEmbedding.slice(0, 10))).digest("hex").substring(0, 8)}`;
|
|
68
|
+
semanticCache.set(key, {
|
|
69
|
+
embedding: queryEmbedding,
|
|
70
|
+
results,
|
|
71
|
+
expiry: now + DEFAULT_CONFIG.ttl * 1e3
|
|
72
|
+
});
|
|
73
|
+
if (semanticCache.size > MAX_SEMANTIC_CACHE_SIZE) {
|
|
74
|
+
let oldestKey = null;
|
|
75
|
+
let oldestExpiry = Infinity;
|
|
76
|
+
for (const [k, v] of semanticCache.entries()) {
|
|
77
|
+
if (v.expiry < oldestExpiry) {
|
|
78
|
+
oldestExpiry = v.expiry;
|
|
79
|
+
oldestKey = k;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
if (oldestKey) {
|
|
83
|
+
semanticCache.delete(oldestKey);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ../src/engine/memory/search.ts
|
|
89
|
+
var EARLY_EXIT_SIMILARITY = 0.92;
|
|
90
|
+
async function searchMemories(params) {
|
|
91
|
+
const {
|
|
92
|
+
query,
|
|
93
|
+
questionDate,
|
|
94
|
+
userId,
|
|
95
|
+
projectId,
|
|
96
|
+
orgId,
|
|
97
|
+
sessionId,
|
|
98
|
+
topK = 10,
|
|
99
|
+
includeInactive = false,
|
|
100
|
+
memoryTypes
|
|
101
|
+
} = params;
|
|
102
|
+
const queryEmbedding = await embedSingle(query);
|
|
103
|
+
const cached = await getFromSemanticCache(queryEmbedding);
|
|
104
|
+
if (cached && cached.similarity >= EARLY_EXIT_SIMILARITY) {
|
|
105
|
+
console.log(`\u26A1 Semantic cache hit (similarity: ${cached.similarity.toFixed(3)})`);
|
|
106
|
+
return cached.results.slice(0, topK);
|
|
107
|
+
}
|
|
108
|
+
const temporal = params.temporalFilter || await parseTemporalQuery(query, questionDate);
|
|
109
|
+
const semanticResults = await vectorSearchMemories({
|
|
110
|
+
embedding: queryEmbedding,
|
|
111
|
+
userId,
|
|
112
|
+
projectId,
|
|
113
|
+
orgId,
|
|
114
|
+
sessionId,
|
|
115
|
+
temporal,
|
|
116
|
+
includeInactive,
|
|
117
|
+
memoryTypes,
|
|
118
|
+
limit: topK * 3
|
|
119
|
+
// Get more for reranking
|
|
120
|
+
});
|
|
121
|
+
if (semanticResults.length === 0) {
|
|
122
|
+
return [];
|
|
123
|
+
}
|
|
124
|
+
if (semanticResults.length > 0 && semanticResults[0].similarity >= EARLY_EXIT_SIMILARITY) {
|
|
125
|
+
console.log(`\u26A1 Early exit at ${semanticResults[0].similarity.toFixed(3)}`);
|
|
126
|
+
const topMemories2 = semanticResults.slice(0, topK);
|
|
127
|
+
await setInSemanticCache(queryEmbedding, topMemories2);
|
|
128
|
+
return topMemories2;
|
|
129
|
+
}
|
|
130
|
+
const enriched = await enrichWithRelations(semanticResults, topK * 2);
|
|
131
|
+
const scored = enriched.map((memory) => ({
|
|
132
|
+
...memory,
|
|
133
|
+
temporalScore: memory.documentDate ? calculateTemporalRelevance(memory.documentDate, questionDate) : 0.5
|
|
134
|
+
}));
|
|
135
|
+
const combined = scored.map((m) => ({
|
|
136
|
+
...m,
|
|
137
|
+
finalScore: m.similarity * 0.7 + m.temporalScore * 0.3
|
|
138
|
+
}));
|
|
139
|
+
combined.sort((a, b) => b.finalScore - a.finalScore);
|
|
140
|
+
const topMemories = combined.slice(0, topK);
|
|
141
|
+
const results = await injectSourceChunks(topMemories);
|
|
142
|
+
await setInSemanticCache(queryEmbedding, results);
|
|
143
|
+
return results;
|
|
144
|
+
}
|
|
145
|
+
async function vectorSearchMemories(params) {
|
|
146
|
+
const {
|
|
147
|
+
embedding,
|
|
148
|
+
userId,
|
|
149
|
+
projectId,
|
|
150
|
+
orgId,
|
|
151
|
+
sessionId,
|
|
152
|
+
temporal,
|
|
153
|
+
includeInactive,
|
|
154
|
+
memoryTypes,
|
|
155
|
+
limit
|
|
156
|
+
} = params;
|
|
157
|
+
const whereConditions = [
|
|
158
|
+
{ projectId }
|
|
159
|
+
];
|
|
160
|
+
if (orgId) {
|
|
161
|
+
whereConditions.push({ orgId });
|
|
162
|
+
}
|
|
163
|
+
if (userId) {
|
|
164
|
+
whereConditions.push({ userId });
|
|
165
|
+
}
|
|
166
|
+
if (sessionId) {
|
|
167
|
+
whereConditions.push({ sessionId });
|
|
168
|
+
}
|
|
169
|
+
if (!includeInactive) {
|
|
170
|
+
whereConditions.push({ isActive: true });
|
|
171
|
+
}
|
|
172
|
+
whereConditions.push({
|
|
173
|
+
OR: [
|
|
174
|
+
{ validUntil: null },
|
|
175
|
+
{ validUntil: { gt: /* @__PURE__ */ new Date() } }
|
|
176
|
+
]
|
|
177
|
+
});
|
|
178
|
+
if (memoryTypes && memoryTypes.length > 0) {
|
|
179
|
+
whereConditions.push({
|
|
180
|
+
memoryType: { in: memoryTypes }
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
if (temporal.hasTemporalConstraint && temporal.dateRange) {
|
|
184
|
+
whereConditions.push({
|
|
185
|
+
documentDate: {
|
|
186
|
+
gte: temporal.dateRange.start,
|
|
187
|
+
lte: temporal.dateRange.end
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
const embeddingStr = `[${embedding.join(",")}]`;
|
|
192
|
+
let whereClause = `"projectId" = '${projectId}'`;
|
|
193
|
+
if (orgId) whereClause += ` AND "orgId" = '${orgId}'`;
|
|
194
|
+
if (userId) whereClause += ` AND "userId" = '${userId}'`;
|
|
195
|
+
if (sessionId) whereClause += ` AND "sessionId" = '${sessionId}'`;
|
|
196
|
+
if (!includeInactive) whereClause += ` AND "isActive" = true`;
|
|
197
|
+
whereClause += ` AND ("validUntil" IS NULL OR "validUntil" > NOW())`;
|
|
198
|
+
if (memoryTypes && memoryTypes.length > 0) {
|
|
199
|
+
const typesStr = memoryTypes.map((t) => `'${t.replace(/'/g, "''")}'`).join(",");
|
|
200
|
+
whereClause += ` AND "memoryType" IN (${typesStr})`;
|
|
201
|
+
}
|
|
202
|
+
if (temporal.hasTemporalConstraint && temporal.dateRange) {
|
|
203
|
+
whereClause += ` AND "documentDate" >= '${temporal.dateRange.start.toISOString()}' AND "documentDate" <= '${temporal.dateRange.end.toISOString()}'`;
|
|
204
|
+
}
|
|
205
|
+
const results = await db.$queryRawUnsafe(`
|
|
206
|
+
SELECT
|
|
207
|
+
id,
|
|
208
|
+
content,
|
|
209
|
+
"memoryType" as "memoryType",
|
|
210
|
+
confidence,
|
|
211
|
+
version,
|
|
212
|
+
"documentDate" as "documentDate",
|
|
213
|
+
"eventDate" as "eventDate",
|
|
214
|
+
"validFrom" as "validFrom",
|
|
215
|
+
"validUntil" as "validUntil",
|
|
216
|
+
"sourceChunkId" as "sourceChunkId",
|
|
217
|
+
metadata,
|
|
218
|
+
1 - (embedding <=> '${embeddingStr}'::vector) as similarity
|
|
219
|
+
FROM memories
|
|
220
|
+
WHERE ${whereClause}
|
|
221
|
+
ORDER BY embedding <=> '${embeddingStr}'::vector
|
|
222
|
+
LIMIT ${limit}
|
|
223
|
+
`);
|
|
224
|
+
return results;
|
|
225
|
+
}
|
|
226
|
+
async function enrichWithRelations(memories, maxTotal) {
|
|
227
|
+
if (memories.length === 0) {
|
|
228
|
+
return [];
|
|
229
|
+
}
|
|
230
|
+
const memoryIds = memories.map((m) => m.id);
|
|
231
|
+
const memoryIdsList = memoryIds.map((id) => `'${id}'`).join(",");
|
|
232
|
+
const relationsQuery = await db.$queryRawUnsafe(`
|
|
233
|
+
SELECT
|
|
234
|
+
r.id as relation_id,
|
|
235
|
+
r."fromMemoryId",
|
|
236
|
+
r."toMemoryId",
|
|
237
|
+
r."relationType",
|
|
238
|
+
m.id,
|
|
239
|
+
m.content,
|
|
240
|
+
m."memoryType",
|
|
241
|
+
m.confidence,
|
|
242
|
+
m.version,
|
|
243
|
+
m."documentDate",
|
|
244
|
+
m."eventDate",
|
|
245
|
+
m."validFrom",
|
|
246
|
+
m."validUntil",
|
|
247
|
+
m."sourceChunkId",
|
|
248
|
+
m.metadata
|
|
249
|
+
FROM "memory_relations" r
|
|
250
|
+
LEFT JOIN memories m ON m.id = r."toMemoryId"
|
|
251
|
+
WHERE r."fromMemoryId" IN (${memoryIdsList})
|
|
252
|
+
AND m."isActive" = true
|
|
253
|
+
LIMIT 100
|
|
254
|
+
`);
|
|
255
|
+
if (!relationsQuery || relationsQuery.length === 0) {
|
|
256
|
+
return memories;
|
|
257
|
+
}
|
|
258
|
+
const relatedIds = /* @__PURE__ */ new Set();
|
|
259
|
+
const relationMap = /* @__PURE__ */ new Map();
|
|
260
|
+
for (const row of relationsQuery) {
|
|
261
|
+
if (row.toMemoryId && !memoryIds.includes(row.toMemoryId)) {
|
|
262
|
+
relatedIds.add(row.toMemoryId);
|
|
263
|
+
if (!relationMap.has(row.fromMemoryId)) {
|
|
264
|
+
relationMap.set(row.fromMemoryId, []);
|
|
265
|
+
}
|
|
266
|
+
relationMap.get(row.fromMemoryId)?.push({
|
|
267
|
+
memoryId: row.toMemoryId,
|
|
268
|
+
relationType: row.relationType,
|
|
269
|
+
content: row.content
|
|
270
|
+
});
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
if (relatedIds.size === 0) {
|
|
274
|
+
return memories;
|
|
275
|
+
}
|
|
276
|
+
const relatedIdsList = Array.from(relatedIds).map((id) => `'${id}'`).join(",");
|
|
277
|
+
const relatedMemories = await db.$queryRawUnsafe(`
|
|
278
|
+
SELECT
|
|
279
|
+
id,
|
|
280
|
+
content,
|
|
281
|
+
"memoryType" as "memoryType",
|
|
282
|
+
confidence,
|
|
283
|
+
version,
|
|
284
|
+
"documentDate" as "documentDate",
|
|
285
|
+
"eventDate" as "eventDate",
|
|
286
|
+
"validFrom" as "validFrom",
|
|
287
|
+
"validUntil" as "validUntil",
|
|
288
|
+
"sourceChunkId" as "sourceChunkId",
|
|
289
|
+
metadata
|
|
290
|
+
FROM memories
|
|
291
|
+
WHERE id IN (${relatedIdsList})
|
|
292
|
+
AND "isActive" = true
|
|
293
|
+
LIMIT ${maxTotal - memories.length}
|
|
294
|
+
`);
|
|
295
|
+
const relatedWithScores = relatedMemories.map((m) => ({
|
|
296
|
+
...m,
|
|
297
|
+
similarity: 0.6,
|
|
298
|
+
isRelated: true,
|
|
299
|
+
relations: relationMap.get(m.id) || []
|
|
300
|
+
}));
|
|
301
|
+
return [...memories, ...relatedWithScores];
|
|
302
|
+
}
|
|
303
|
+
async function injectSourceChunks(memories) {
|
|
304
|
+
const chunkIds = memories.map((m) => m.sourceChunkId).filter((id) => id !== null);
|
|
305
|
+
if (chunkIds.length === 0) {
|
|
306
|
+
return memories.map((m) => ({
|
|
307
|
+
memory: {
|
|
308
|
+
id: m.id,
|
|
309
|
+
content: m.content,
|
|
310
|
+
memoryType: m.memoryType,
|
|
311
|
+
entityMentions: m.entityMentions || [],
|
|
312
|
+
confidence: m.confidence,
|
|
313
|
+
version: m.version,
|
|
314
|
+
temporal: {
|
|
315
|
+
documentDate: m.documentDate,
|
|
316
|
+
eventDate: m.eventDate,
|
|
317
|
+
validFrom: m.validFrom,
|
|
318
|
+
validUntil: m.validUntil
|
|
319
|
+
}
|
|
320
|
+
},
|
|
321
|
+
similarity: m.similarity
|
|
322
|
+
}));
|
|
323
|
+
}
|
|
324
|
+
const chunks = await db.chunk.findMany({
|
|
325
|
+
where: {
|
|
326
|
+
id: { in: chunkIds }
|
|
327
|
+
},
|
|
328
|
+
select: {
|
|
329
|
+
id: true,
|
|
330
|
+
content: true,
|
|
331
|
+
metadata: true
|
|
332
|
+
}
|
|
333
|
+
});
|
|
334
|
+
const chunkMap = new Map(chunks.map((c) => [c.id, c]));
|
|
335
|
+
return memories.map((m) => ({
|
|
336
|
+
memory: {
|
|
337
|
+
id: m.id,
|
|
338
|
+
content: m.content,
|
|
339
|
+
memoryType: m.memoryType,
|
|
340
|
+
entityMentions: m.entityMentions || [],
|
|
341
|
+
confidence: m.confidence,
|
|
342
|
+
version: m.version,
|
|
343
|
+
temporal: {
|
|
344
|
+
documentDate: m.documentDate,
|
|
345
|
+
eventDate: m.eventDate,
|
|
346
|
+
validFrom: m.validFrom,
|
|
347
|
+
validUntil: m.validUntil
|
|
348
|
+
}
|
|
349
|
+
},
|
|
350
|
+
chunk: m.sourceChunkId && chunkMap.has(m.sourceChunkId) ? {
|
|
351
|
+
id: chunkMap.get(m.sourceChunkId).id,
|
|
352
|
+
content: chunkMap.get(m.sourceChunkId).content,
|
|
353
|
+
metadata: chunkMap.get(m.sourceChunkId).metadata
|
|
354
|
+
} : void 0,
|
|
355
|
+
similarity: m.similarity,
|
|
356
|
+
relations: m.isRelated ? [] : void 0
|
|
357
|
+
}));
|
|
358
|
+
}
|
|
359
|
+
async function getSessionMemories(params) {
|
|
360
|
+
const { sessionId, projectId, limit = 50, sinceDate } = params;
|
|
361
|
+
const where = {
|
|
362
|
+
sessionId,
|
|
363
|
+
projectId,
|
|
364
|
+
isActive: true
|
|
365
|
+
};
|
|
366
|
+
if (sinceDate) {
|
|
367
|
+
where.createdAt = { gte: sinceDate };
|
|
368
|
+
}
|
|
369
|
+
return db.memory.findMany({
|
|
370
|
+
where,
|
|
371
|
+
orderBy: {
|
|
372
|
+
createdAt: "desc"
|
|
373
|
+
},
|
|
374
|
+
take: limit
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
async function getUserProfile(params) {
|
|
378
|
+
const { userId, projectId, memoryTypes, limit = 50 } = params;
|
|
379
|
+
const where = {
|
|
380
|
+
userId,
|
|
381
|
+
projectId,
|
|
382
|
+
isActive: true,
|
|
383
|
+
scope: "USER"
|
|
384
|
+
};
|
|
385
|
+
if (memoryTypes) {
|
|
386
|
+
where.memoryType = { in: memoryTypes };
|
|
387
|
+
}
|
|
388
|
+
return db.memory.findMany({
|
|
389
|
+
where,
|
|
390
|
+
orderBy: { importance: "desc" },
|
|
391
|
+
take: Math.min(limit, 100)
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
export {
|
|
396
|
+
searchMemories,
|
|
397
|
+
getSessionMemories,
|
|
398
|
+
getUserProfile
|
|
399
|
+
};
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import {
|
|
2
|
+
db,
|
|
3
|
+
embedSingle
|
|
4
|
+
} from "./chunk-MEFLJ4PV.js";
|
|
5
|
+
import "./chunk-QGM4M3NI.js";
|
|
6
|
+
|
|
7
|
+
// ../src/engine/memory/consolidation.ts
|
|
8
|
+
import OpenAI from "openai";
|
|
9
|
+
var openai = new OpenAI({
|
|
10
|
+
apiKey: process.env.OPENAI_API_KEY || ""
|
|
11
|
+
});
|
|
12
|
+
async function findDuplicateMemories(params) {
|
|
13
|
+
const {
|
|
14
|
+
projectId,
|
|
15
|
+
userId,
|
|
16
|
+
similarityThreshold = 0.95,
|
|
17
|
+
limit = 50
|
|
18
|
+
} = params;
|
|
19
|
+
const maxMemories = Math.min(Math.max(limit, 10), 100);
|
|
20
|
+
const memories = await db.memory.findMany({
|
|
21
|
+
where: {
|
|
22
|
+
projectId,
|
|
23
|
+
userId,
|
|
24
|
+
isActive: true,
|
|
25
|
+
validUntil: null
|
|
26
|
+
},
|
|
27
|
+
orderBy: { importance: "desc" },
|
|
28
|
+
take: maxMemories
|
|
29
|
+
});
|
|
30
|
+
const clusters = [];
|
|
31
|
+
const processed = /* @__PURE__ */ new Set();
|
|
32
|
+
for (let i = 0; i < memories.length; i++) {
|
|
33
|
+
const memory = memories[i];
|
|
34
|
+
if (processed.has(memory.id)) continue;
|
|
35
|
+
const similar = [];
|
|
36
|
+
const candidates = memories.slice(i + 1);
|
|
37
|
+
const batchSimilarities = await calculateBatchSimilarity(memory.id, candidates.map((c) => c.id));
|
|
38
|
+
for (let j = 0; j < candidates.length; j++) {
|
|
39
|
+
const other = candidates[j];
|
|
40
|
+
if (processed.has(other.id)) continue;
|
|
41
|
+
const similarity = batchSimilarities[j];
|
|
42
|
+
if (similarity >= similarityThreshold) {
|
|
43
|
+
similar.push({ ...other, similarity });
|
|
44
|
+
processed.add(other.id);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
if (similar.length > 0) {
|
|
48
|
+
clusters.push({
|
|
49
|
+
representative: memory,
|
|
50
|
+
duplicates: similar,
|
|
51
|
+
similarity: similar.reduce((sum, m) => sum + m.similarity, 0) / similar.length
|
|
52
|
+
});
|
|
53
|
+
processed.add(memory.id);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return clusters;
|
|
57
|
+
}
|
|
58
|
+
async function calculateBatchSimilarity(memoryId, otherIds) {
|
|
59
|
+
if (otherIds.length === 0) return [];
|
|
60
|
+
const placeholders = otherIds.map((_, i) => `(m1.embedding <=> $${i + 2}::vector)`).join(" + ");
|
|
61
|
+
const conditions = otherIds.map((id, i) => `m2.id = $${i + 2}`).join(" OR ");
|
|
62
|
+
const result = await db.$queryRaw`
|
|
63
|
+
SELECT
|
|
64
|
+
1 - (m1.embedding <=> m2.embedding) as similarity,
|
|
65
|
+
m2.id as id
|
|
66
|
+
FROM memories m1, memories m2
|
|
67
|
+
WHERE m1.id = ${memoryId} AND (${conditions})
|
|
68
|
+
`;
|
|
69
|
+
const similarityMap = new Map(result.map((r) => [r.id, r.similarity]));
|
|
70
|
+
return otherIds.map((id) => similarityMap.get(id) || 0);
|
|
71
|
+
}
|
|
72
|
+
async function mergeDuplicateMemories(cluster) {
|
|
73
|
+
const memories = [cluster.representative, ...cluster.duplicates];
|
|
74
|
+
const prompt = `You are merging duplicate memories into a single, comprehensive memory.
|
|
75
|
+
|
|
76
|
+
**Memories to merge:**
|
|
77
|
+
${memories.map(
|
|
78
|
+
(m, i) => `${i + 1}. "${m.content}" (confidence: ${m.confidence}, date: ${m.documentDate?.toISOString() || "unknown"})`
|
|
79
|
+
).join("\n")}
|
|
80
|
+
|
|
81
|
+
**Instructions:**
|
|
82
|
+
1. Combine all unique information from these memories
|
|
83
|
+
2. Resolve any contradictions by keeping the most recent or most confident information
|
|
84
|
+
3. Extract all unique entity mentions
|
|
85
|
+
4. Use the highest confidence score
|
|
86
|
+
5. Keep the most recent document date
|
|
87
|
+
|
|
88
|
+
Return JSON:
|
|
89
|
+
{
|
|
90
|
+
"merged_content": "comprehensive merged memory",
|
|
91
|
+
"entity_mentions": ["list", "of", "entities"],
|
|
92
|
+
"confidence": 0.0-1.0,
|
|
93
|
+
"reasoning": "brief explanation of how you merged"
|
|
94
|
+
}`;
|
|
95
|
+
const response = await openai.chat.completions.create({
|
|
96
|
+
model: "gpt-4o",
|
|
97
|
+
max_tokens: 2048,
|
|
98
|
+
temperature: 0,
|
|
99
|
+
messages: [{ role: "user", content: prompt }],
|
|
100
|
+
response_format: { type: "json_object" }
|
|
101
|
+
});
|
|
102
|
+
const text = response.choices[0]?.message?.content?.trim();
|
|
103
|
+
if (!text) {
|
|
104
|
+
throw new Error("Failed to merge memories");
|
|
105
|
+
}
|
|
106
|
+
const jsonMatch = text.match(/```json\n?([\s\S]*?)\n?```/) || text.match(/\{[\s\S]*\}/);
|
|
107
|
+
const jsonStr = jsonMatch ? jsonMatch[1] || jsonMatch[0] : text;
|
|
108
|
+
const result = JSON.parse(jsonStr);
|
|
109
|
+
const embedding = await embedSingle(result.merged_content);
|
|
110
|
+
const mergedMemory = await db.memory.create({
|
|
111
|
+
data: {
|
|
112
|
+
projectId: cluster.representative.projectId,
|
|
113
|
+
orgId: cluster.representative.orgId,
|
|
114
|
+
userId: cluster.representative.userId,
|
|
115
|
+
sessionId: cluster.representative.sessionId,
|
|
116
|
+
memoryType: cluster.representative.memoryType,
|
|
117
|
+
content: result.merged_content,
|
|
118
|
+
embedding,
|
|
119
|
+
entityMentions: result.entity_mentions || [],
|
|
120
|
+
confidence: result.confidence || cluster.representative.confidence,
|
|
121
|
+
documentDate: cluster.representative.documentDate,
|
|
122
|
+
eventDate: cluster.representative.eventDate,
|
|
123
|
+
validFrom: /* @__PURE__ */ new Date(),
|
|
124
|
+
importance: Math.max(...memories.map((m) => m.importance || 0.5)),
|
|
125
|
+
metadata: {
|
|
126
|
+
mergedFrom: memories.map((m) => m.id),
|
|
127
|
+
mergeReasoning: result.reasoning,
|
|
128
|
+
mergedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
});
|
|
132
|
+
for (const memory of memories) {
|
|
133
|
+
await db.memory.update({
|
|
134
|
+
where: { id: memory.id },
|
|
135
|
+
data: {
|
|
136
|
+
isActive: false,
|
|
137
|
+
validUntil: /* @__PURE__ */ new Date(),
|
|
138
|
+
supersededBy: mergedMemory.id
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
return mergedMemory.id;
|
|
143
|
+
}
|
|
144
|
+
async function consolidateMemories(params) {
|
|
145
|
+
const { projectId, userId, similarityThreshold = 0.95, dryRun = false } = params;
|
|
146
|
+
console.log(`\u{1F50D} Finding duplicate memories in project ${projectId}...`);
|
|
147
|
+
const clusters = await findDuplicateMemories({
|
|
148
|
+
projectId,
|
|
149
|
+
userId,
|
|
150
|
+
similarityThreshold
|
|
151
|
+
});
|
|
152
|
+
console.log(`\u{1F4CA} Found ${clusters.length} memory clusters`);
|
|
153
|
+
if (dryRun) {
|
|
154
|
+
for (const cluster of clusters) {
|
|
155
|
+
console.log(`
|
|
156
|
+
Cluster (similarity: ${cluster.similarity.toFixed(2)}):`);
|
|
157
|
+
console.log(` Representative: "${cluster.representative.content}"`);
|
|
158
|
+
console.log(` Duplicates: ${cluster.duplicates.length}`);
|
|
159
|
+
cluster.duplicates.forEach((d) => {
|
|
160
|
+
console.log(` - "${d.content}"`);
|
|
161
|
+
});
|
|
162
|
+
}
|
|
163
|
+
return {
|
|
164
|
+
clustersFound: clusters.length,
|
|
165
|
+
memoriesMerged: 0,
|
|
166
|
+
memoriesDeactivated: 0
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
let memoriesMerged = 0;
|
|
170
|
+
let memoriesDeactivated = 0;
|
|
171
|
+
for (const cluster of clusters) {
|
|
172
|
+
try {
|
|
173
|
+
console.log(`\u{1F517} Merging cluster with ${cluster.duplicates.length + 1} memories...`);
|
|
174
|
+
await mergeDuplicateMemories(cluster);
|
|
175
|
+
memoriesMerged++;
|
|
176
|
+
memoriesDeactivated += cluster.duplicates.length + 1;
|
|
177
|
+
console.log(`\u2705 Merged successfully`);
|
|
178
|
+
} catch (error) {
|
|
179
|
+
console.error(`\u274C Failed to merge cluster:`, error);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
console.log(
|
|
183
|
+
`
|
|
184
|
+
\u2705 Consolidation complete: ${memoriesMerged} clusters merged, ${memoriesDeactivated} memories deactivated`
|
|
185
|
+
);
|
|
186
|
+
return {
|
|
187
|
+
clustersFound: clusters.length,
|
|
188
|
+
memoriesMerged,
|
|
189
|
+
memoriesDeactivated
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
async function scheduledConsolidation(orgId) {
|
|
193
|
+
console.log(`\u{1F504} Running scheduled consolidation for org ${orgId}...`);
|
|
194
|
+
const projects = await db.project.findMany({
|
|
195
|
+
where: { orgId }
|
|
196
|
+
});
|
|
197
|
+
for (const project of projects) {
|
|
198
|
+
try {
|
|
199
|
+
const result = await consolidateMemories({
|
|
200
|
+
projectId: project.id,
|
|
201
|
+
similarityThreshold: 0.92
|
|
202
|
+
// Slightly lower for scheduled runs
|
|
203
|
+
});
|
|
204
|
+
if (result.memoriesMerged > 0) {
|
|
205
|
+
console.log(
|
|
206
|
+
`\u{1F4CA} Project ${project.name}: merged ${result.memoriesMerged} clusters`
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
} catch (error) {
|
|
210
|
+
console.error(`Failed to consolidate project ${project.name}:`, error);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
console.log("\u2705 Scheduled consolidation complete");
|
|
214
|
+
}
|
|
215
|
+
export {
|
|
216
|
+
consolidateMemories,
|
|
217
|
+
findDuplicateMemories,
|
|
218
|
+
mergeDuplicateMemories,
|
|
219
|
+
scheduledConsolidation
|
|
220
|
+
};
|