@199-bio/engram 0.8.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,444 @@
1
+ /**
2
+ * Consolidation Plan
3
+ *
4
+ * Implements a Standard Operating Procedure for safe consolidation of large backlogs.
5
+ * Prevents damage through:
6
+ * - Assessment and recovery mode detection
7
+ * - Prioritization (recent + high importance first)
8
+ * - Rate limiting with delays between API calls
9
+ * - Budget tracking and cost caps
10
+ * - Checkpointing for resume capability
11
+ * - Validation with soft rollback triggers
12
+ */
13
+
14
+ import { randomUUID } from "crypto";
15
+ import { EngramDatabase, Memory, Episode, ConsolidationCheckpoint } from "../storage/database.js";
16
+
17
+ // Token pricing (Opus 4.5 with extended thinking)
18
+ const PRICING = {
19
+ opus: {
20
+ input: 15 / 1_000_000, // $15 per 1M input tokens
21
+ output: 75 / 1_000_000, // $75 per 1M output tokens
22
+ thinking: 15 / 1_000_000, // $15 per 1M thinking tokens (same as input)
23
+ },
24
+ haiku: {
25
+ input: 0.80 / 1_000_000, // $0.80 per 1M input tokens
26
+ output: 4.00 / 1_000_000, // $4.00 per 1M output tokens
27
+ },
28
+ };
29
+
30
+ // Estimated tokens per operation (conservative estimates)
31
+ const TOKEN_ESTIMATES = {
32
+ episodeBatch: {
33
+ input: 2000, // Conversation text + system prompt
34
+ output: 1000, // Extracted memories JSON
35
+ },
36
+ memoryBatch: {
37
+ input: 3000, // Memories + system prompt
38
+ output: 2000, // Digest + contradictions
39
+ thinking: 10000, // Extended thinking budget
40
+ },
41
+ entityProfile: {
42
+ input: 4000,
43
+ output: 3000,
44
+ thinking: 16000,
45
+ },
46
+ };
47
+
48
+ export interface BacklogAssessment {
49
+ unconsolidatedMemories: number;
50
+ unconsolidatedEpisodes: number;
51
+ isRecoveryMode: boolean;
52
+ estimatedBatches: number;
53
+ estimatedCost: number;
54
+ dailyBudget: number;
55
+ dailySpent: number;
56
+ budgetRemaining: number;
57
+ canProceed: boolean;
58
+ recommendedBatches: number;
59
+ phases: PhasePlan[];
60
+ }
61
+
62
+ export interface PhasePlan {
63
+ phase: "episodes" | "memories" | "decay" | "cleanup";
64
+ itemCount: number;
65
+ batchCount: number;
66
+ estimatedCost: number;
67
+ estimatedTimeMs: number;
68
+ }
69
+
70
+ export interface ConsolidationProgress {
71
+ runId: string;
72
+ phase: ConsolidationCheckpoint["phase"];
73
+ batchesCompleted: number;
74
+ batchesTotal: number;
75
+ memoriesProcessed: number;
76
+ episodesProcessed: number;
77
+ digestsCreated: number;
78
+ contradictionsFound: number;
79
+ tokensUsed: number;
80
+ estimatedCost: number;
81
+ errors: string[];
82
+ startedAt: Date;
83
+ elapsedMs: number;
84
+ }
85
+
86
+ export interface RollbackTrigger {
87
+ type: "error_rate" | "empty_digests" | "contradiction_rate" | "budget_exceeded";
88
+ threshold: number;
89
+ current: number;
90
+ triggered: boolean;
91
+ message: string;
92
+ }
93
+
94
+ export class ConsolidationPlan {
95
+ private db: EngramDatabase;
96
+ private runId: string;
97
+ private errors: string[] = [];
98
+ private emptyDigests: number = 0;
99
+ private totalDigests: number = 0;
100
+ private apiCalls: number = 0;
101
+ private apiErrors: number = 0;
102
+
103
+ constructor(db: EngramDatabase) {
104
+ this.db = db;
105
+ this.runId = randomUUID();
106
+ }
107
+
108
+ /**
109
+ * Assess the current backlog and create a consolidation plan
110
+ */
111
+ assessBacklog(): BacklogAssessment {
112
+ const unconsolidatedMem = this.db.getUnconsolidatedMemories(undefined, 10000);
113
+ const unconsolidatedEp = this.db.getUnconsolidatedEpisodes(10000);
114
+
115
+ const recoveryThreshold = this.db.getConfigNumber("recovery_mode_threshold", 100);
116
+ const isRecoveryMode = unconsolidatedMem.length > recoveryThreshold ||
117
+ unconsolidatedEp.length > recoveryThreshold;
118
+
119
+ const dailyBudget = this.db.getConfigNumber("daily_budget_usd", 5.0);
120
+ const dailySpent = this.db.getDailySpending();
121
+ const budgetRemaining = Math.max(0, dailyBudget - dailySpent);
122
+
123
+ const maxBatchesPerRun = this.db.getConfigNumber("max_batches_per_run", 5);
124
+
125
+ // Calculate phase plans
126
+ const episodeBatches = Math.ceil(unconsolidatedEp.length / 20);
127
+ const memoryBatches = Math.ceil(unconsolidatedMem.length / 15);
128
+ const totalBatches = episodeBatches + memoryBatches;
129
+
130
+ const phases: PhasePlan[] = [];
131
+ const delayMs = this.db.getConfigNumber("delay_between_calls_ms", 2000);
132
+
133
+ // Episode phase (Haiku - cheap)
134
+ if (unconsolidatedEp.length >= 4) {
135
+ const batchCount = Math.min(episodeBatches, maxBatchesPerRun);
136
+ const cost = batchCount * this.estimateEpisodeBatchCost();
137
+ phases.push({
138
+ phase: "episodes",
139
+ itemCount: Math.min(unconsolidatedEp.length, batchCount * 20),
140
+ batchCount,
141
+ estimatedCost: cost,
142
+ estimatedTimeMs: batchCount * (2000 + delayMs), // ~2s per Haiku call + delay
143
+ });
144
+ }
145
+
146
+ // Memory phase (Opus with thinking - expensive)
147
+ if (unconsolidatedMem.length >= 5) {
148
+ const batchCount = Math.min(memoryBatches, maxBatchesPerRun);
149
+ const cost = batchCount * this.estimateMemoryBatchCost();
150
+ phases.push({
151
+ phase: "memories",
152
+ itemCount: Math.min(unconsolidatedMem.length, batchCount * 15),
153
+ batchCount,
154
+ estimatedCost: cost,
155
+ estimatedTimeMs: batchCount * (15000 + delayMs), // ~15s per Opus call + delay
156
+ });
157
+ }
158
+
159
+ // Decay and cleanup phases (no API calls)
160
+ phases.push({ phase: "decay", itemCount: 0, batchCount: 0, estimatedCost: 0, estimatedTimeMs: 100 });
161
+ phases.push({ phase: "cleanup", itemCount: 0, batchCount: 0, estimatedCost: 0, estimatedTimeMs: 100 });
162
+
163
+ const estimatedCost = phases.reduce((sum, p) => sum + p.estimatedCost, 0);
164
+ const canProceed = estimatedCost <= budgetRemaining;
165
+
166
+ // In recovery mode, be more conservative
167
+ const recommendedBatches = isRecoveryMode
168
+ ? Math.min(3, maxBatchesPerRun)
169
+ : maxBatchesPerRun;
170
+
171
+ return {
172
+ unconsolidatedMemories: unconsolidatedMem.length,
173
+ unconsolidatedEpisodes: unconsolidatedEp.length,
174
+ isRecoveryMode,
175
+ estimatedBatches: totalBatches,
176
+ estimatedCost,
177
+ dailyBudget,
178
+ dailySpent,
179
+ budgetRemaining,
180
+ canProceed,
181
+ recommendedBatches,
182
+ phases,
183
+ };
184
+ }
185
+
186
+ /**
187
+ * Get prioritized memories for consolidation
188
+ * Priority: recent + high importance first, then older chronologically
189
+ */
190
+ getPrioritizedMemories(limit: number): Memory[] {
191
+ const allMemories = this.db.getUnconsolidatedMemories(undefined, 10000);
192
+
193
+ // Score each memory
194
+ const now = Date.now();
195
+ const dayMs = 24 * 60 * 60 * 1000;
196
+
197
+ const scored = allMemories.map(m => {
198
+ const ageHours = (now - m.timestamp.getTime()) / (60 * 60 * 1000);
199
+ const ageDays = ageHours / 24;
200
+
201
+ // Recency score: 1.0 for today, decays over 7 days
202
+ const recencyScore = Math.max(0, 1 - (ageDays / 7));
203
+
204
+ // Importance score: 0-1
205
+ const importanceScore = m.importance;
206
+
207
+ // Emotional weight: 0-1
208
+ const emotionalScore = m.emotional_weight;
209
+
210
+ // Access frequency bonus
211
+ const accessBonus = Math.min(0.2, m.access_count * 0.05);
212
+
213
+ // Combined priority (weights: recency 40%, importance 30%, emotional 20%, access 10%)
214
+ const priority = (recencyScore * 0.4) +
215
+ (importanceScore * 0.3) +
216
+ (emotionalScore * 0.2) +
217
+ (accessBonus * 0.1);
218
+
219
+ return { memory: m, priority };
220
+ });
221
+
222
+ // Sort by priority (highest first)
223
+ scored.sort((a, b) => b.priority - a.priority);
224
+
225
+ return scored.slice(0, limit).map(s => s.memory);
226
+ }
227
+
228
+ /**
229
+ * Get prioritized episodes for consolidation
230
+ */
231
+ getPrioritizedEpisodes(limit: number): Episode[] {
232
+ const episodes = this.db.getUnconsolidatedEpisodes(limit);
233
+
234
+ // Sort by timestamp (oldest first for episodes - process in order)
235
+ episodes.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime());
236
+
237
+ return episodes;
238
+ }
239
+
240
+ /**
241
+ * Create checkpoint for this run
242
+ */
243
+ createCheckpoint(phase: ConsolidationCheckpoint["phase"], batchesTotal: number): ConsolidationCheckpoint {
244
+ return this.db.createCheckpoint(this.runId, phase, batchesTotal);
245
+ }
246
+
247
+ /**
248
+ * Update checkpoint progress
249
+ */
250
+ updateProgress(updates: Partial<{
251
+ phase: ConsolidationCheckpoint["phase"];
252
+ batchesCompleted: number;
253
+ batchesTotal: number;
254
+ memoriesProcessed: number;
255
+ episodesProcessed: number;
256
+ digestsCreated: number;
257
+ contradictionsFound: number;
258
+ tokensUsed: number;
259
+ estimatedCost: number;
260
+ }>): void {
261
+ this.db.updateCheckpoint(this.runId, {
262
+ phase: updates.phase,
263
+ batches_completed: updates.batchesCompleted,
264
+ batches_total: updates.batchesTotal,
265
+ memories_processed: updates.memoriesProcessed,
266
+ episodes_processed: updates.episodesProcessed,
267
+ digests_created: updates.digestsCreated,
268
+ contradictions_found: updates.contradictionsFound,
269
+ tokens_used: updates.tokensUsed,
270
+ estimated_cost_usd: updates.estimatedCost,
271
+ });
272
+ }
273
+
274
+ /**
275
+ * Mark run as complete
276
+ */
277
+ complete(): void {
278
+ this.db.completeCheckpoint(this.runId);
279
+ }
280
+
281
+ /**
282
+ * Mark run as failed
283
+ */
284
+ fail(error: string): void {
285
+ this.db.updateCheckpoint(this.runId, { error });
286
+ }
287
+
288
+ /**
289
+ * Get current progress
290
+ */
291
+ getProgress(): ConsolidationProgress {
292
+ const checkpoint = this.db.getCheckpoint(this.runId);
293
+
294
+ return {
295
+ runId: this.runId,
296
+ phase: checkpoint?.phase || "episodes",
297
+ batchesCompleted: checkpoint?.batches_completed || 0,
298
+ batchesTotal: checkpoint?.batches_total || 0,
299
+ memoriesProcessed: checkpoint?.memories_processed || 0,
300
+ episodesProcessed: checkpoint?.episodes_processed || 0,
301
+ digestsCreated: checkpoint?.digests_created || 0,
302
+ contradictionsFound: checkpoint?.contradictions_found || 0,
303
+ tokensUsed: checkpoint?.tokens_used || 0,
304
+ estimatedCost: checkpoint?.estimated_cost_usd || 0,
305
+ errors: this.errors,
306
+ startedAt: checkpoint?.started_at || new Date(),
307
+ elapsedMs: checkpoint ? Date.now() - checkpoint.started_at.getTime() : 0,
308
+ };
309
+ }
310
+
311
+ /**
312
+ * Check if we should resume a previous incomplete run
313
+ */
314
+ checkForResume(): ConsolidationCheckpoint | null {
315
+ return this.db.getIncompleteCheckpoint();
316
+ }
317
+
318
+ /**
319
+ * Resume from a previous checkpoint
320
+ */
321
+ resumeFrom(checkpoint: ConsolidationCheckpoint): void {
322
+ this.runId = checkpoint.run_id;
323
+ }
324
+
325
+ /**
326
+ * Record an API call result for tracking
327
+ */
328
+ recordApiCall(success: boolean, tokensUsed?: number): void {
329
+ this.apiCalls++;
330
+ if (!success) {
331
+ this.apiErrors++;
332
+ }
333
+ }
334
+
335
+ /**
336
+ * Record a digest creation result
337
+ */
338
+ recordDigest(isEmpty: boolean): void {
339
+ this.totalDigests++;
340
+ if (isEmpty) {
341
+ this.emptyDigests++;
342
+ }
343
+ }
344
+
345
+ /**
346
+ * Record an error
347
+ */
348
+ recordError(error: string): void {
349
+ this.errors.push(error);
350
+ }
351
+
352
+ /**
353
+ * Check rollback triggers and return any that fired
354
+ */
355
+ checkRollbackTriggers(): RollbackTrigger[] {
356
+ const triggers: RollbackTrigger[] = [];
357
+
358
+ // Error rate threshold
359
+ const errorRateThreshold = this.db.getConfigNumber("error_rate_threshold", 0.3);
360
+ if (this.apiCalls >= 3) {
361
+ const errorRate = this.apiErrors / this.apiCalls;
362
+ triggers.push({
363
+ type: "error_rate",
364
+ threshold: errorRateThreshold,
365
+ current: errorRate,
366
+ triggered: errorRate > errorRateThreshold,
367
+ message: `API error rate ${(errorRate * 100).toFixed(1)}% exceeds ${(errorRateThreshold * 100).toFixed(0)}%`,
368
+ });
369
+ }
370
+
371
+ // Empty digest threshold
372
+ const emptyDigestThreshold = this.db.getConfigNumber("empty_digest_threshold", 0.2);
373
+ if (this.totalDigests >= 3) {
374
+ const emptyRate = this.emptyDigests / this.totalDigests;
375
+ triggers.push({
376
+ type: "empty_digests",
377
+ threshold: emptyDigestThreshold,
378
+ current: emptyRate,
379
+ triggered: emptyRate > emptyDigestThreshold,
380
+ message: `Empty digest rate ${(emptyRate * 100).toFixed(1)}% exceeds ${(emptyDigestThreshold * 100).toFixed(0)}%`,
381
+ });
382
+ }
383
+
384
+ // Budget exceeded
385
+ const dailyBudget = this.db.getConfigNumber("daily_budget_usd", 5.0);
386
+ const dailySpent = this.db.getDailySpending();
387
+ triggers.push({
388
+ type: "budget_exceeded",
389
+ threshold: dailyBudget,
390
+ current: dailySpent,
391
+ triggered: dailySpent > dailyBudget,
392
+ message: `Daily spending $${dailySpent.toFixed(2)} exceeds budget $${dailyBudget.toFixed(2)}`,
393
+ });
394
+
395
+ return triggers;
396
+ }
397
+
398
+ /**
399
+ * Delay between API calls (rate limiting)
400
+ */
401
+ async delay(): Promise<void> {
402
+ const delayMs = this.db.getConfigNumber("delay_between_calls_ms", 2000);
403
+ await new Promise(resolve => setTimeout(resolve, delayMs));
404
+ }
405
+
406
+ /**
407
+ * Estimate cost for an episode batch (Haiku)
408
+ */
409
+ private estimateEpisodeBatchCost(): number {
410
+ const { input, output } = TOKEN_ESTIMATES.episodeBatch;
411
+ return (input * PRICING.haiku.input) + (output * PRICING.haiku.output);
412
+ }
413
+
414
+ /**
415
+ * Estimate cost for a memory batch (Opus with thinking)
416
+ */
417
+ private estimateMemoryBatchCost(): number {
418
+ const { input, output, thinking } = TOKEN_ESTIMATES.memoryBatch;
419
+ return (input * PRICING.opus.input) +
420
+ (output * PRICING.opus.output) +
421
+ (thinking * PRICING.opus.thinking);
422
+ }
423
+
424
+ /**
425
+ * Calculate actual cost from token usage
426
+ */
427
+ calculateCost(model: "opus" | "haiku", inputTokens: number, outputTokens: number, thinkingTokens?: number): number {
428
+ const pricing = PRICING[model];
429
+ let cost = (inputTokens * pricing.input) + (outputTokens * pricing.output);
430
+
431
+ if (model === "opus" && thinkingTokens) {
432
+ cost += thinkingTokens * PRICING.opus.thinking;
433
+ }
434
+
435
+ return cost;
436
+ }
437
+
438
+ /**
439
+ * Get the run ID
440
+ */
441
+ getRunId(): string {
442
+ return this.runId;
443
+ }
444
+ }
package/src/index.ts CHANGED
@@ -16,6 +16,9 @@ import path from "path";
16
16
  import os from "os";
17
17
  import fs from "fs";
18
18
 
19
+ import { getTransportMode, getHttpPort } from "./transport/index.js";
20
+ import { startHttpServer } from "./transport/http.js";
21
+
19
22
  import { EngramDatabase } from "./storage/database.js";
20
23
  import { KnowledgeGraph } from "./graph/knowledge-graph.js";
21
24
  import { createRetriever } from "./retrieval/colbert.js";
@@ -112,17 +115,20 @@ process.on("SIGINT", () => {
112
115
  process.on("exit", cleanup);
113
116
 
114
117
  // Detect when parent process (Claude) dies by monitoring stdin
115
- process.stdin.on("end", () => {
116
- console.error("[Engram] stdin closed, parent process likely died. Shutting down...");
117
- cleanup();
118
- process.exit(0);
119
- });
120
-
121
- process.stdin.on("close", () => {
122
- console.error("[Engram] stdin closed, shutting down...");
123
- cleanup();
124
- process.exit(0);
125
- });
118
+ // Only needed in stdio mode
119
+ if (getTransportMode() === "stdio") {
120
+ process.stdin.on("end", () => {
121
+ console.error("[Engram] stdin closed, parent process likely died. Shutting down...");
122
+ cleanup();
123
+ process.exit(0);
124
+ });
125
+
126
+ process.stdin.on("close", () => {
127
+ console.error("[Engram] stdin closed, shutting down...");
128
+ cleanup();
129
+ process.exit(0);
130
+ });
131
+ }
126
132
 
127
133
  // ============ Initialize Components ============
128
134
 
@@ -496,7 +502,29 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
496
502
  includeGraph: include_graph,
497
503
  });
498
504
 
505
+ // Format digests (synthesized context - these provide broad understanding)
506
+ const digestsFormatted = response.digests.map((d) => ({
507
+ type: "digest" as const,
508
+ id: d.digest.id,
509
+ level: d.digest.level, // 1=session, 2=topic, 3=entity
510
+ topic: d.digest.topic,
511
+ content: d.digest.content,
512
+ source_count: d.digest.source_count,
513
+ period: {
514
+ start: d.digest.period_start.toISOString(),
515
+ end: d.digest.period_end.toISOString(),
516
+ },
517
+ relevance_score: d.score.toFixed(4),
518
+ // Key evidence - specific memories supporting this synthesis
519
+ key_memories: d.key_memories.map((m) => ({
520
+ id: m.id,
521
+ content: m.content,
522
+ timestamp: m.timestamp.toISOString(),
523
+ })),
524
+ }));
525
+
499
526
  const formatted = response.results.map((r) => ({
527
+ type: "memory" as const,
500
528
  id: r.memory.id,
501
529
  content: r.memory.content,
502
530
  source: r.memory.source,
@@ -511,6 +539,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
511
539
 
512
540
  // Format connected memories (Hebbian associations)
513
541
  const connectedFormatted = response.connected_memories.map((c) => ({
542
+ type: "connected" as const,
514
543
  id: c.memory.id,
515
544
  content: c.memory.content,
516
545
  connected_to: c.connected_to,
@@ -524,10 +553,16 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
524
553
  text: JSON.stringify({
525
554
  recall_id: response.recall_id, // For memory_feedback
526
555
  query,
556
+ // Digests first - they provide synthesized context
557
+ digests: digestsFormatted,
558
+ digests_count: digestsFormatted.length,
559
+ // Then individual memories for specific evidence
527
560
  results: formatted,
528
561
  count: formatted.length,
529
562
  connected_memories: connectedFormatted,
530
- hint: formatted.length > 0 ? "Call memory_feedback with useful_memory_ids after answering" : undefined,
563
+ hint: formatted.length > 0 || digestsFormatted.length > 0
564
+ ? "Call memory_feedback with useful_memory_ids after answering"
565
+ : undefined,
531
566
  }, null, 2),
532
567
  },
533
568
  ],
@@ -847,16 +882,27 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
847
882
  // ============ Main ============
848
883
 
849
884
  async function main() {
850
- // Kill any zombie instances before starting
851
- cleanupZombies();
852
- writePidFile();
885
+ const transportMode = getTransportMode();
853
886
 
854
- await initialize();
887
+ // Zombie cleanup only needed in stdio mode (local usage)
888
+ if (transportMode === "stdio") {
889
+ cleanupZombies();
890
+ writePidFile();
891
+ }
855
892
 
856
- const transport = new StdioServerTransport();
857
- await server.connect(transport);
893
+ await initialize();
858
894
 
859
- console.error(`[Engram] MCP server running on stdio (PID ${process.pid})`);
895
+ if (transportMode === "http") {
896
+ // HTTP mode - for Railway/remote deployment
897
+ const port = getHttpPort();
898
+ await startHttpServer({ port, server });
899
+ console.error(`[Engram] MCP server running in HTTP mode (PID ${process.pid})`);
900
+ } else {
901
+ // Stdio mode (default) - for local Claude Desktop/Cursor
902
+ const transport = new StdioServerTransport();
903
+ await server.connect(transport);
904
+ console.error(`[Engram] MCP server running on stdio (PID ${process.pid})`);
905
+ }
860
906
  }
861
907
 
862
908
  main().catch((error) => {
@@ -4,7 +4,7 @@
4
4
  * Enhanced with temporal decay and salience scoring
5
5
  */
6
6
 
7
- import { EngramDatabase, Memory } from "../storage/database.js";
7
+ import { EngramDatabase, Memory, Digest } from "../storage/database.js";
8
8
  import { KnowledgeGraph } from "../graph/knowledge-graph.js";
9
9
  import { ColBERTRetriever, SimpleRetriever, SearchResult, Document } from "./colbert.js";
10
10
 
@@ -20,8 +20,15 @@ export interface HybridSearchResult {
20
20
  };
21
21
  }
22
22
 
23
+ export interface DigestSearchResult {
24
+ digest: Digest;
25
+ score: number;
26
+ key_memories: Memory[]; // 2-3 source memories that best support this digest
27
+ }
28
+
23
29
  export interface HybridSearchResponse {
24
30
  results: HybridSearchResult[];
31
+ digests: DigestSearchResult[]; // Relevant synthesized context
25
32
  recall_id: string; // For LLM feedback
26
33
  connected_memories: Array<{
27
34
  memory: Memory;
@@ -179,11 +186,15 @@ export class HybridSearch {
179
186
  if (allCandidateIds.size === 0) {
180
187
  return {
181
188
  results: [],
189
+ digests: [],
182
190
  recall_id: recallId,
183
191
  connected_memories: [],
184
192
  };
185
193
  }
186
194
 
195
+ // Search digests via BM25 (top 3 relevant digests)
196
+ const digestResults = this.searchDigests(query, 3);
197
+
187
198
  // Create rankings for RRF
188
199
  const rankings: Map<string, { bm25?: number; semantic?: number; graph?: number; connected?: number }> = new Map();
189
200
 
@@ -335,13 +346,62 @@ export class HybridSearch {
335
346
  }
336
347
  }
337
348
 
349
+ // TOKEN EFFICIENCY: If digests are returned, reduce memory count
350
+ // Digest provides context (synthesis), memories provide evidence (specifics)
351
+ // Return fewer memories when we have good digest coverage
352
+ let finalResults = results;
353
+ if (digestResults.length > 0) {
354
+ // Get IDs of memories already covered by digests as key_memories
355
+ const coveredByDigests = new Set<string>();
356
+ digestResults.forEach(d => d.key_memories.forEach(m => coveredByDigests.add(m.id)));
357
+
358
+ // Keep memories not already shown as key_memories in digests
359
+ // Also limit to fewer since digests provide the context
360
+ const maxMemoriesWithDigests = Math.max(2, Math.floor(limit / 2));
361
+ finalResults = results
362
+ .filter(r => !coveredByDigests.has(r.memory.id))
363
+ .slice(0, maxMemoriesWithDigests);
364
+ }
365
+
338
366
  return {
339
- results,
367
+ results: finalResults,
368
+ digests: digestResults,
340
369
  recall_id: recallId,
341
370
  connected_memories: connectedMemories,
342
371
  };
343
372
  }
344
373
 
374
+ /**
375
+ * Search digests via BM25 and return with key source memories
376
+ * Returns top N digests with 2-3 representative source memories each
377
+ */
378
+ private searchDigests(query: string, limit: number): DigestSearchResult[] {
379
+ try {
380
+ const digestHits = this.db.searchDigestsBM25(query, limit);
381
+
382
+ return digestHits.map(hit => {
383
+ // Get source memories for this digest, take top 3 most relevant
384
+ const sources = this.db.getDigestSources(hit.id);
385
+ // Sort by importance and recency, take best 3
386
+ const keyMemories = sources
387
+ .sort((a, b) => {
388
+ const scoreA = (a.importance || 0.5) + (a.access_count || 0) * 0.1;
389
+ const scoreB = (b.importance || 0.5) + (b.access_count || 0) * 0.1;
390
+ return scoreB - scoreA;
391
+ })
392
+ .slice(0, 3);
393
+
394
+ return {
395
+ digest: hit,
396
+ score: Math.abs(hit.score), // BM25 returns negative scores
397
+ key_memories: keyMemories,
398
+ };
399
+ });
400
+ } catch {
401
+ return [];
402
+ }
403
+ }
404
+
345
405
  /**
346
406
  * Expanded search when LLM needs more memories
347
407
  * Relaxes constraints and follows weaker connections
@@ -355,7 +415,7 @@ export class HybridSearch {
355
415
  // Get the original retrieval log
356
416
  const log = this.db.getRetrievalLog(recallId);
357
417
  if (!log) {
358
- return { results: [], recall_id: recallId, connected_memories: [] };
418
+ return { results: [], digests: [], recall_id: recallId, connected_memories: [] };
359
419
  }
360
420
 
361
421
  // Search again with relaxed parameters