@usewhisper/mcp-server 0.2.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,616 @@
1
+ import {
2
+ extractEventDate
3
+ } from "./chunk-5KBZQHDL.js";
4
+ import {
5
+ db,
6
+ embedSingle
7
+ } from "./chunk-MEFLJ4PV.js";
8
+
9
+ // ../src/engine/memory/extractor.ts
10
+ import OpenAI from "openai";
11
+ var openai = new OpenAI({
12
+ apiKey: process.env.OPENAI_API_KEY || ""
13
+ });
14
+ var EXTRACTION_PROMPT = `You are an expert memory extraction system. Your job is to extract atomic, unambiguous memories from conversation chunks.
15
+
16
+ **Critical Rules:**
17
+ 1. Each memory must be a SINGLE fact/preference/event/relationship
18
+ 2. Resolve ALL pronouns (he/she/it/they/them) to actual names using context
19
+ 3. Resolve ALL ambiguous references ("the company", "that project") to specific entities
20
+ 4. Extract temporal information when events occurred (not when mentioned)
21
+ 5. Be conservative - only extract high-confidence memories
22
+
23
+ **Memory Types:**
24
+ - factual: Objective facts ("John works at Google")
25
+ - preference: User preferences ("Sarah prefers dark mode")
26
+ - event: Events with timestamps ("Team met on Jan 15, 2024")
27
+ - relationship: Relationships ("Alex reports to Maria")
28
+ - opinion: Subjective views ("User thinks Python is easier than Rust")
29
+ - goal: Future intentions ("User wants to learn machine learning")
30
+ - instruction: Persistent instructions ("Always use formal tone with clients")
31
+
32
+ **Disambiguation:**
33
+ - Replace "he" \u2192 actual name using context
34
+ - Replace "she" \u2192 actual name using context
35
+ - Replace "it" \u2192 specific thing using context
36
+ - Replace "the company" \u2192 company name
37
+ - Replace "that project" \u2192 project name
38
+
39
+ **Example:**
40
+ Input: "He said he prefers using React. The project will launch next week."
41
+ Context: Previous message: "Alex joined the team yesterday. He's working on the dashboard project."
42
+
43
+ Bad Output:
44
+ - "He prefers React" \u274C (ambiguous)
45
+ - "The project launches next week" \u274C (what project?)
46
+
47
+ Good Output:
48
+ - content: "Alex prefers using React for development"
49
+ type: preference
50
+ entities: ["Alex", "React"]
51
+ eventDate: null
52
+
53
+ - content: "Dashboard project launch scheduled for [specific date if mentioned]"
54
+ type: event
55
+ entities: ["Dashboard project"]
56
+ eventDate: [calculated date] or null if not specific`;
57
+ async function extractMemories(chunk, context) {
58
+ const contextStr = buildContextString(context);
59
+ const prompt = `${EXTRACTION_PROMPT}
60
+
61
+ ${contextStr}
62
+
63
+ **Current chunk to analyze:**
64
+ ${chunk}
65
+
66
+ **Document Date (when this was said):** ${context.documentDate.toISOString()}
67
+
68
+ Extract memories and return a JSON array. For each memory:
69
+ {
70
+ "content": "clear, unambiguous statement with no pronouns",
71
+ "memoryType": "factual|preference|event|relationship|opinion|goal|instruction",
72
+ "entityMentions": ["list", "of", "entities", "mentioned"],
73
+ "eventDate": "ISO date string or null",
74
+ "confidence": 0.0-1.0,
75
+ "reasoning": "brief explanation of extraction"
76
+ }
77
+
78
+ Return ONLY the JSON array, no other text.`;
79
+ try {
80
+ const response = await openai.chat.completions.create({
81
+ model: "gpt-4o",
82
+ max_tokens: 4096,
83
+ temperature: 0,
84
+ // Deterministic for extraction
85
+ messages: [
86
+ {
87
+ role: "user",
88
+ content: prompt
89
+ }
90
+ ],
91
+ response_format: { type: "json_object" }
92
+ });
93
+ const text = response.choices[0]?.message?.content?.trim();
94
+ if (!text) {
95
+ throw new Error("No text response from OpenAI");
96
+ }
97
+ const jsonMatch = text.match(/```json\n?([\s\S]*?)\n?```/) || text.match(/\[[\s\S]*\]/);
98
+ const jsonStr = jsonMatch ? jsonMatch[1] || jsonMatch[0] : text;
99
+ const rawMemories = JSON.parse(jsonStr);
100
+ if (!Array.isArray(rawMemories)) {
101
+ console.error("Expected array of memories, got:", rawMemories);
102
+ return [];
103
+ }
104
+ return rawMemories.map((m) => ({
105
+ content: m.content,
106
+ memoryType: m.memoryType,
107
+ entityMentions: m.entityMentions || [],
108
+ eventDate: m.eventDate ? new Date(m.eventDate) : null,
109
+ confidence: m.confidence || 0.7,
110
+ reasoning: m.reasoning
111
+ }));
112
+ } catch (error) {
113
+ console.error("Memory extraction failed:", error);
114
+ return [];
115
+ }
116
+ }
117
+ function buildContextString(context) {
118
+ const parts = [];
119
+ if (context.previousMessages && context.previousMessages.length > 0) {
120
+ parts.push("**Context from previous messages:**");
121
+ parts.push(context.previousMessages.slice(-5).join("\n"));
122
+ }
123
+ if (context.entityContext && context.entityContext.size > 0) {
124
+ parts.push("\n**Known entities:**");
125
+ context.entityContext.forEach((name, pronoun) => {
126
+ parts.push(`- "${pronoun}" refers to ${name}`);
127
+ });
128
+ }
129
+ if (parts.length === 0) {
130
+ return "**Context:** None available";
131
+ }
132
+ return parts.join("\n");
133
+ }
134
+ function buildEntityContext(recentMemories) {
135
+ const entityMap = /* @__PURE__ */ new Map();
136
+ for (const memory of recentMemories) {
137
+ for (const entity of memory.entityMentions) {
138
+ if (/^[A-Z][a-z]+(?:\s[A-Z][a-z]+)*$/.test(entity)) {
139
+ entityMap.set("he", entity);
140
+ entityMap.set("she", entity);
141
+ entityMap.set("they", entity);
142
+ }
143
+ }
144
+ }
145
+ return entityMap;
146
+ }
147
+ function validateMemory(memory) {
148
+ if (memory.confidence < 0.6) {
149
+ return false;
150
+ }
151
+ if (memory.content.length < 10) {
152
+ return false;
153
+ }
154
+ const pronouns = /\b(he|she|it|they|them|his|her|their)\b/i;
155
+ if (pronouns.test(memory.content)) {
156
+ console.warn("Memory contains unresolved pronouns:", memory.content);
157
+ return false;
158
+ }
159
+ const vagueRefs = /\b(the company|that project|this thing|the system)\b/i;
160
+ if (vagueRefs.test(memory.content)) {
161
+ console.warn("Memory contains vague references:", memory.content);
162
+ return false;
163
+ }
164
+ return true;
165
+ }
166
+
167
+ // ../src/engine/memory/relations.ts
168
+ import OpenAI2 from "openai";
169
+ var openai2 = new OpenAI2({
170
+ apiKey: process.env.OPENAI_API_KEY || ""
171
+ });
172
+ var RELATION_DETECTION_PROMPT = `You are an expert at detecting relationships between memories in a knowledge graph.
173
+
174
+ **Relation Types:**
175
+
176
+ 1. **updates** - New memory supersedes/replaces old memory (state mutation)
177
+ Example:
178
+ - Old: "User's favorite color is blue"
179
+ - New: "User's favorite color is green"
180
+ - Relation: updates (green replaces blue)
181
+
182
+ 2. **extends** - New memory adds detail to existing memory without contradiction (refinement)
183
+ Example:
184
+ - Old: "John works at Google"
185
+ - New: "John works at Google as a Senior Engineer"
186
+ - Relation: extends (adds job title)
187
+
188
+ 3. **derives** - New memory is inferred from existing memory/memories (inference)
189
+ Example:
190
+ - Memory 1: "User prefers dark mode"
191
+ - Memory 2: "User prefers high contrast"
192
+ - New: "User likely has vision preferences for accessibility"
193
+ - Relation: derives (inferred from both)
194
+
195
+ 4. **contradicts** - New memory conflicts with existing memory (conflict detection)
196
+ Example:
197
+ - Old: "Meeting scheduled for 3pm"
198
+ - New: "Meeting scheduled for 4pm"
199
+ - Relation: contradicts (should trigger update)
200
+
201
+ 5. **supports** - New memory provides evidence/support for existing memory
202
+ Example:
203
+ - Memory 1: "User is interested in ML"
204
+ - New: "User enrolled in ML course"
205
+ - Relation: supports (confirms interest)
206
+
207
+ **Important:**
208
+ - Only detect relations when there's a clear, meaningful connection
209
+ - Be conservative - if unsure, don't create a relation
210
+ - "updates" should invalidate the old memory (set validUntil)
211
+ - "extends" keeps the old memory valid but adds information
212
+ - "contradicts" should flag for review/resolution`;
213
+ async function detectRelations(newMemory, existingMemories) {
214
+ if (existingMemories.length === 0) {
215
+ return [];
216
+ }
217
+ const relevantMemories = filterRelevantMemories(newMemory, existingMemories);
218
+ if (relevantMemories.length === 0) {
219
+ return [];
220
+ }
221
+ const prompt = `${RELATION_DETECTION_PROMPT}
222
+
223
+ **New memory:**
224
+ "${newMemory.content}"
225
+ Type: ${newMemory.memoryType}
226
+ Entities: ${newMemory.entityMentions.join(", ")}
227
+
228
+ **Existing memories to check against:**
229
+ ${relevantMemories.map((m, i) => `${i}. "${m.content}" (Type: ${m.memoryType}, Date: ${m.documentDate?.toISOString() || "unknown"})`).join("\n")}
230
+
231
+ Analyze if the new memory relates to any existing memories.
232
+
233
+ Return a JSON array of relations:
234
+ [{
235
+ "toMemoryIndex": 0,
236
+ "relationType": "updates|extends|derives|contradicts|supports",
237
+ "confidence": 0.0-1.0,
238
+ "reasoning": "brief explanation why this relation exists"
239
+ }]
240
+
241
+ Return ONLY the JSON array. If no relations found, return [].`;
242
+ try {
243
+ const response = await openai2.chat.completions.create({
244
+ model: "gpt-4o",
245
+ max_tokens: 2048,
246
+ temperature: 0,
247
+ messages: [{ role: "user", content: prompt }],
248
+ response_format: { type: "json_object" }
249
+ });
250
+ const text = response.choices[0]?.message?.content?.trim();
251
+ if (!text) {
252
+ return [];
253
+ }
254
+ const jsonMatch = text.match(/```json\n?([\s\S]*?)\n?```/) || text.match(/\[[\s\S]*\]/);
255
+ const jsonStr = jsonMatch ? jsonMatch[1] || jsonMatch[0] : text;
256
+ const relations = JSON.parse(jsonStr);
257
+ if (!Array.isArray(relations)) {
258
+ return [];
259
+ }
260
+ return relations.filter((r) => r.confidence >= 0.7).map((r) => ({
261
+ toMemoryId: relevantMemories[r.toMemoryIndex].id,
262
+ relationType: r.relationType,
263
+ confidence: r.confidence,
264
+ reasoning: r.reasoning
265
+ }));
266
+ } catch (error) {
267
+ console.error("Relation detection failed:", error);
268
+ return [];
269
+ }
270
+ }
271
+ function filterRelevantMemories(newMemory, existingMemories) {
272
+ return existingMemories.filter((existing) => {
273
+ const sharedEntities = newMemory.entityMentions.some(
274
+ (entity) => existing.entityMentions.includes(entity)
275
+ );
276
+ if (sharedEntities) {
277
+ return true;
278
+ }
279
+ const newWords = extractKeywords(newMemory.content);
280
+ const existingWords = extractKeywords(existing.content);
281
+ const overlap = newWords.filter((w) => existingWords.includes(w));
282
+ return overlap.length >= 2;
283
+ });
284
+ }
285
+ function extractKeywords(text) {
286
+ const stopWords = /* @__PURE__ */ new Set([
287
+ "the",
288
+ "a",
289
+ "an",
290
+ "is",
291
+ "are",
292
+ "was",
293
+ "were",
294
+ "be",
295
+ "been",
296
+ "being",
297
+ "have",
298
+ "has",
299
+ "had",
300
+ "do",
301
+ "does",
302
+ "did",
303
+ "will",
304
+ "would",
305
+ "could",
306
+ "should",
307
+ "may",
308
+ "might",
309
+ "must",
310
+ "can",
311
+ "to",
312
+ "of",
313
+ "in",
314
+ "for",
315
+ "on",
316
+ "at",
317
+ "by",
318
+ "from",
319
+ "with",
320
+ "about"
321
+ ]);
322
+ return text.toLowerCase().split(/\W+/).filter((word) => word.length > 3 && !stopWords.has(word)).slice(0, 10);
323
+ }
324
+ function shouldInvalidateMemory(relationType) {
325
+ return relationType === "updates" || relationType === "contradicts";
326
+ }
327
+
328
+ // ../src/engine/memory/ingest.ts
329
+ async function ingestSession(params) {
330
+ const { sessionId, projectId, orgId, userId, messages } = params;
331
+ const result = {
332
+ memoriesCreated: 0,
333
+ relationsCreated: 0,
334
+ memoriesInvalidated: 0,
335
+ errors: []
336
+ };
337
+ if (messages.length === 0) {
338
+ return result;
339
+ }
340
+ try {
341
+ const context = {
342
+ sessionId,
343
+ userId: userId || "unknown",
344
+ projectId,
345
+ orgId,
346
+ documentDate: messages[messages.length - 1].timestamp,
347
+ previousMessages: messages.slice(0, -1).map((m) => `${m.role}: ${m.content}`)
348
+ };
349
+ const recentMemories = await db.memory.findMany({
350
+ where: {
351
+ sessionId,
352
+ projectId,
353
+ isActive: true
354
+ },
355
+ orderBy: {
356
+ createdAt: "desc"
357
+ },
358
+ take: 20,
359
+ select: {
360
+ content: true,
361
+ entityMentions: true
362
+ }
363
+ });
364
+ context.entityContext = buildEntityContext(recentMemories);
365
+ const latestMessage = messages[messages.length - 1].content;
366
+ const extractedMemories = await extractMemories(latestMessage, context);
367
+ const validMemories = extractedMemories.filter(validateMemory);
368
+ if (validMemories.length === 0) {
369
+ return result;
370
+ }
371
+ const existingMemories = await db.memory.findMany({
372
+ where: {
373
+ projectId,
374
+ userId,
375
+ isActive: true
376
+ },
377
+ orderBy: {
378
+ createdAt: "desc"
379
+ },
380
+ take: 100,
381
+ // Check against last 100 memories
382
+ select: {
383
+ id: true,
384
+ content: true,
385
+ memoryType: true,
386
+ entityMentions: true,
387
+ documentDate: true
388
+ }
389
+ });
390
+ for (const extracted of validMemories) {
391
+ try {
392
+ const relations = await detectRelations(
393
+ {
394
+ content: extracted.content,
395
+ memoryType: extracted.memoryType,
396
+ entityMentions: extracted.entityMentions
397
+ },
398
+ existingMemories
399
+ );
400
+ const eventDate = extracted.eventDate || await extractEventDate(
401
+ extracted.content,
402
+ context.documentDate
403
+ );
404
+ const embedding = await embedSingle(extracted.content);
405
+ const embeddingStr = `[${embedding.join(",")}]`;
406
+ const [memory] = await db.$queryRaw`
407
+ INSERT INTO "memories" (
408
+ id, "projectId", "orgId", "userId", "sessionId", "memoryType",
409
+ content, embedding, entity_mentions, confidence, "documentDate",
410
+ "eventDate", "validFrom", metadata, "createdAt", "updatedAt"
411
+ )
412
+ VALUES (
413
+ gen_random_uuid(), ${projectId}, ${orgId || null}, ${userId || null},
414
+ ${sessionId}, ${extracted.memoryType}, ${extracted.content},
415
+ ${embeddingStr}::vector, ${JSON.stringify(extracted.entityMentions)}::jsonb, ${extracted.confidence},
416
+ ${context.documentDate}, ${eventDate}, NOW(),
417
+ ${JSON.stringify({
418
+ reasoning: extracted.reasoning,
419
+ extractedFrom: "session_ingestion"
420
+ })}::jsonb, NOW(), NOW()
421
+ )
422
+ RETURNING id, "projectId", "orgId", "userId", "sessionId", "memoryType",
423
+ content, confidence, "documentDate",
424
+ "eventDate", "validFrom", metadata, "createdAt", "updatedAt"
425
+ `;
426
+ result.memoriesCreated++;
427
+ for (const relation of relations) {
428
+ try {
429
+ await db.memoryRelation.create({
430
+ data: {
431
+ fromMemoryId: memory.id,
432
+ toMemoryId: relation.toMemoryId,
433
+ relationType: relation.relationType,
434
+ confidence: relation.confidence,
435
+ reasoning: relation.reasoning
436
+ }
437
+ });
438
+ result.relationsCreated++;
439
+ if (shouldInvalidateMemory(relation.relationType)) {
440
+ await db.memory.update({
441
+ where: { id: relation.toMemoryId },
442
+ data: {
443
+ validUntil: /* @__PURE__ */ new Date(),
444
+ supersededBy: memory.id
445
+ }
446
+ });
447
+ const oldMemory = await db.memory.findUnique({
448
+ where: { id: relation.toMemoryId },
449
+ select: { version: true }
450
+ });
451
+ if (oldMemory) {
452
+ await db.memory.update({
453
+ where: { id: memory.id },
454
+ data: { version: oldMemory.version + 1 }
455
+ });
456
+ }
457
+ result.memoriesInvalidated++;
458
+ }
459
+ } catch (error) {
460
+ result.errors.push(`Failed to create relation: ${error}`);
461
+ }
462
+ }
463
+ } catch (error) {
464
+ result.errors.push(`Failed to process memory: ${error}`);
465
+ }
466
+ }
467
+ return result;
468
+ } catch (error) {
469
+ result.errors.push(`Ingestion failed: ${error}`);
470
+ return result;
471
+ }
472
+ }
473
+ async function ingestChunk(params) {
474
+ const { chunkId, chunkContent, projectId, orgId, documentDate, metadata } = params;
475
+ const result = {
476
+ memoriesCreated: 0,
477
+ relationsCreated: 0,
478
+ memoriesInvalidated: 0,
479
+ errors: []
480
+ };
481
+ try {
482
+ const context = {
483
+ sessionId: `chunk_${chunkId}`,
484
+ userId: "system",
485
+ projectId,
486
+ orgId,
487
+ documentDate
488
+ };
489
+ const extractedMemories = await extractMemories(chunkContent, context);
490
+ const validMemories = extractedMemories.filter(validateMemory);
491
+ for (const extracted of validMemories) {
492
+ const eventDate = extracted.eventDate || await extractEventDate(
493
+ extracted.content,
494
+ documentDate
495
+ );
496
+ const embedding = await embedSingle(extracted.content);
497
+ await db.memory.create({
498
+ data: {
499
+ projectId,
500
+ orgId,
501
+ memoryType: extracted.memoryType,
502
+ content: extracted.content,
503
+ embedding,
504
+ entityMentions: extracted.entityMentions,
505
+ confidence: extracted.confidence,
506
+ documentDate,
507
+ eventDate,
508
+ validFrom: /* @__PURE__ */ new Date(),
509
+ sourceChunkId: chunkId,
510
+ scope: "DOCUMENT",
511
+ // Document-level scope
512
+ metadata: {
513
+ ...metadata,
514
+ reasoning: extracted.reasoning
515
+ }
516
+ }
517
+ });
518
+ result.memoriesCreated++;
519
+ }
520
+ return result;
521
+ } catch (error) {
522
+ result.errors.push(`Chunk ingestion failed: ${error}`);
523
+ return result;
524
+ }
525
+ }
526
+ async function ingestChunksBatch(params) {
527
+ const { chunks, projectId, orgId, documentDate } = params;
528
+ const aggregateResult = {
529
+ memoriesCreated: 0,
530
+ relationsCreated: 0,
531
+ memoriesInvalidated: 0,
532
+ errors: []
533
+ };
534
+ const batchSize = 10;
535
+ for (let i = 0; i < chunks.length; i += batchSize) {
536
+ const batch = chunks.slice(i, i + batchSize);
537
+ const results = await Promise.all(
538
+ batch.map(
539
+ (chunk) => ingestChunk({
540
+ chunkId: chunk.id,
541
+ chunkContent: chunk.content,
542
+ projectId,
543
+ orgId,
544
+ documentDate,
545
+ metadata: chunk.metadata
546
+ })
547
+ )
548
+ );
549
+ for (const result of results) {
550
+ aggregateResult.memoriesCreated += result.memoriesCreated;
551
+ aggregateResult.relationsCreated += result.relationsCreated;
552
+ aggregateResult.memoriesInvalidated += result.memoriesInvalidated;
553
+ aggregateResult.errors.push(...result.errors);
554
+ }
555
+ }
556
+ return aggregateResult;
557
+ }
558
+ async function updateMemory(params) {
559
+ const { memoryId, newContent, reasoning } = params;
560
+ const oldMemory = await db.memory.findUnique({
561
+ where: { id: memoryId }
562
+ });
563
+ if (!oldMemory) {
564
+ throw new Error("Memory not found");
565
+ }
566
+ const embedding = await embedSingle(newContent);
567
+ const newMemory = await db.memory.create({
568
+ data: {
569
+ projectId: oldMemory.projectId,
570
+ orgId: oldMemory.orgId,
571
+ userId: oldMemory.userId,
572
+ sessionId: oldMemory.sessionId,
573
+ memoryType: oldMemory.memoryType,
574
+ content: newContent,
575
+ embedding,
576
+ entityMentions: oldMemory.entityMentions,
577
+ confidence: oldMemory.confidence,
578
+ documentDate: oldMemory.documentDate,
579
+ eventDate: oldMemory.eventDate,
580
+ validFrom: /* @__PURE__ */ new Date(),
581
+ version: oldMemory.version + 1,
582
+ scope: oldMemory.scope,
583
+ metadata: {
584
+ ...oldMemory.metadata,
585
+ updateReasoning: reasoning
586
+ }
587
+ }
588
+ });
589
+ await db.memory.update({
590
+ where: { id: memoryId },
591
+ data: {
592
+ validUntil: /* @__PURE__ */ new Date(),
593
+ supersededBy: newMemory.id
594
+ }
595
+ });
596
+ await db.memoryRelation.create({
597
+ data: {
598
+ fromMemoryId: newMemory.id,
599
+ toMemoryId: memoryId,
600
+ relationType: "updates",
601
+ confidence: 1,
602
+ reasoning: reasoning || "Manual update"
603
+ }
604
+ });
605
+ return {
606
+ newMemoryId: newMemory.id,
607
+ oldMemoryId: memoryId
608
+ };
609
+ }
610
+
611
+ export {
612
+ ingestSession,
613
+ ingestChunk,
614
+ ingestChunksBatch,
615
+ updateMemory
616
+ };