@grec0/memory-bank-mcp 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -119,6 +119,16 @@ Memory Bank is configured through environment variables. You can set them in you
119
119
  | `MEMORYBANK_REASONING_EFFORT` | `medium` | Reasoning level: `low`, `medium`, `high` |
120
120
  | `MEMORYBANK_AUTO_UPDATE_DOCS` | `false` | Auto-regenerate docs when indexing code |
121
121
 
122
+ #### Map-Reduce Auto-Summarization (v0.2.0+)
123
+
124
+ For large projects that exceed the LLM context window, Memory Bank automatically uses **Map-Reduce summarization**:
125
+
126
+ 1. **Map Phase**: Splits chunks into batches (~100K chars each), summarizes each batch
127
+ 2. **Reduce Phase**: Combines batch summaries into a coherent final summary
128
+ 3. **Recursive**: If combined summaries still exceed threshold, recurses up to 3 levels
129
+
130
+ This happens automatically when content exceeds 400K characters. No configuration needed.
131
+
122
132
  ### Configuration in Cursor IDE
123
133
 
124
134
  Edit your MCP configuration file:
@@ -7,6 +7,68 @@ import OpenAI from "openai";
7
7
  import * as fs from "fs";
8
8
  import * as path from "path";
9
9
  import * as crypto from "crypto";
10
+ import { countTokens } from "./chunker.js";
11
+ // ============================================
12
+ // Map-Reduce Configuration for Large Documents
13
+ // ============================================
14
+ // When content exceeds context window, use hierarchical summarization
15
+ // Uses tokens (via gpt-tokenizer) for accurate batching
16
+ /**
17
+ * Model context windows (tokens) - GPT-5.x family
18
+ * Source: https://platform.openai.com/docs/models
19
+ */
20
+ const MODEL_CONTEXT_WINDOWS = {
21
+ "gpt-5.2": 400000, // 400K context
22
+ "gpt-5-mini": 400000, // 400K context
23
+ "gpt-5-nano": 400000, // 400K context
24
+ "gpt-5.1-codex": 400000,
25
+ "gpt-5": 400000,
26
+ "default": 128000, // Fallback for unknown models
27
+ };
28
+ /** Maximum tokens per batch for map phase (safe margin under context window) */
29
+ const MAX_TOKENS_PER_BATCH = 80000; // ~80K tokens per batch
30
+ /** Target summary length per batch in tokens */
31
+ const MAX_SUMMARY_TOKENS = 2000;
32
+ /** Threshold to trigger map-reduce summarization (tokens) */
33
+ const MAP_REDUCE_TOKEN_THRESHOLD = 100000; // Trigger if input > 100K tokens
34
+ /** Maximum recursion depth for hierarchical summarization */
35
+ const MAX_RECURSION_DEPTH = 3;
36
+ /**
37
+ * Gets the context window size for a model
38
+ */
39
+ function getModelContextWindow(model) {
40
+ return MODEL_CONTEXT_WINDOWS[model] || MODEL_CONTEXT_WINDOWS["default"];
41
+ }
42
+ /**
43
+ * Prompt template for batch summarization (map phase)
44
+ * Used to compress chunks before final document generation
45
+ */
46
+ const BATCH_SUMMARY_PROMPT = `You are a code analysis assistant. Summarize the following code chunks concisely.
47
+
48
+ Focus on extracting:
49
+ 1. **Main Components**: Classes, functions, modules and their purposes
50
+ 2. **Patterns**: Design patterns, architectural decisions
51
+ 3. **Dependencies**: Key imports and external dependencies
52
+ 4. **Data Flow**: How data moves through the code
53
+
54
+ Be concise but comprehensive. Maximum 1000 words.
55
+
56
+ Code chunks to summarize:
57
+ {chunks}
58
+
59
+ Provide a structured summary in markdown format.`;
60
+ /**
61
+ * Prompt for combining multiple batch summaries (reduce phase)
62
+ */
63
+ const REDUCE_SUMMARY_PROMPT = `Combine the following code summaries into a single comprehensive summary.
64
+
65
+ Merge similar information, remove redundancies, and create a cohesive overview.
66
+ Maintain the structure: Components, Patterns, Dependencies, Data Flow.
67
+
68
+ Summaries to combine:
69
+ {summaries}
70
+
71
+ Provide a unified markdown summary.`;
10
72
  /**
11
73
  * Document definitions with prompts
12
74
  */
@@ -260,8 +322,9 @@ export class ProjectKnowledgeService {
260
322
  }
261
323
  /**
262
324
  * Prepares chunks for inclusion in a prompt
325
+ * Uses Map-Reduce summarization if content exceeds context window threshold
263
326
  */
264
- prepareChunksForPrompt(chunks, maxChunks) {
327
+ async prepareChunksForPrompt(chunks, maxChunks) {
265
328
  // Sort by relevance (prioritize certain file types)
266
329
  const priorityFiles = ["package.json", "readme", "index", "main", "app"];
267
330
  const sorted = [...chunks].sort((a, b) => {
@@ -280,11 +343,31 @@ export class ProjectKnowledgeService {
280
343
  // Take top chunks
281
344
  const selected = sorted.slice(0, maxChunks);
282
345
  // Format for prompt
283
- return selected.map(chunk => {
346
+ const formatted = selected.map(chunk => {
284
347
  return `--- File: ${chunk.file_path} (${chunk.language}) [${chunk.chunk_type}${chunk.name ? `: ${chunk.name}` : ""}] ---
285
348
  ${chunk.content}
286
349
  ---`;
287
350
  }).join("\n\n");
351
+ // Count tokens using gpt-tokenizer (accurate for GPT-5.x)
352
+ const tokenCount = countTokens(formatted);
353
+ // Check if content exceeds token threshold for map-reduce
354
+ if (tokenCount > MAP_REDUCE_TOKEN_THRESHOLD) {
355
+ console.error(`\nšŸ”„ Content exceeds token threshold (${tokenCount.toLocaleString()} > ${MAP_REDUCE_TOKEN_THRESHOLD.toLocaleString()} tokens)`);
356
+ console.error(` Initiating Map-Reduce summarization for ${selected.length} chunks...`);
357
+ const mapReduceResult = await this.summarizeChunksMapReduce(selected);
358
+ const resultTokens = countTokens(mapReduceResult.content);
359
+ console.error(` Map-Reduce complete: ${resultTokens.toLocaleString()} tokens output, ${mapReduceResult.totalTokens.toLocaleString()} tokens used in processing\n`);
360
+ return {
361
+ text: mapReduceResult.content,
362
+ usedMapReduce: true,
363
+ mapReduceTokens: mapReduceResult.totalTokens,
364
+ };
365
+ }
366
+ return {
367
+ text: formatted,
368
+ usedMapReduce: false,
369
+ mapReduceTokens: 0,
370
+ };
288
371
  }
289
372
  /**
290
373
  * Calls the OpenAI Responses API with reasoning
@@ -369,6 +452,154 @@ ${chunk.content}
369
452
  outputTokens: response.usage?.completion_tokens || 0,
370
453
  };
371
454
  }
455
+ // ============================================
456
+ // Map-Reduce Summarization Methods
457
+ // ============================================
458
+ /**
459
+ * Summarizes a single batch of chunks (map phase)
460
+ * @param chunks Chunks to summarize
461
+ * @param batchIndex Batch number for logging
462
+ * @param totalBatches Total number of batches
463
+ * @returns Summary text and token usage
464
+ */
465
+ async summarizeBatch(chunks, batchIndex, totalBatches) {
466
+ // Format chunks for the prompt
467
+ const chunksText = chunks.map(chunk => {
468
+ return `--- ${chunk.file_path} (${chunk.language}) ---\n${chunk.content}\n---`;
469
+ }).join("\n\n");
470
+ const inputTokens = countTokens(chunksText);
471
+ const prompt = BATCH_SUMMARY_PROMPT.replace("{chunks}", chunksText);
472
+ console.error(` [Map ${batchIndex + 1}/${totalBatches}] Summarizing ${chunks.length} chunks (${inputTokens.toLocaleString()} tokens)...`);
473
+ try {
474
+ const result = await this.callResponsesAPI(prompt);
475
+ console.error(` [Map ${batchIndex + 1}/${totalBatches}] Done (${result.outputTokens.toLocaleString()} output tokens)`);
476
+ return {
477
+ summary: result.content,
478
+ tokens: result.outputTokens + result.reasoningTokens,
479
+ };
480
+ }
481
+ catch (error) {
482
+ console.error(` [Map ${batchIndex + 1}/${totalBatches}] Error: ${error.message}`);
483
+ // Return a minimal summary on error
484
+ return {
485
+ summary: `[Batch ${batchIndex + 1} summary failed: ${error.message}]`,
486
+ tokens: 0,
487
+ };
488
+ }
489
+ }
490
+ /**
491
+ * Combines multiple summaries into one (reduce phase)
492
+ * @param summaries Array of batch summaries
493
+ * @returns Combined summary
494
+ */
495
+ async combineSummaries(summaries) {
496
+ const summariesText = summaries.map((s, i) => `### Batch ${i + 1} Summary\n${s}`).join("\n\n");
497
+ const inputTokens = countTokens(summariesText);
498
+ const prompt = REDUCE_SUMMARY_PROMPT.replace("{summaries}", summariesText);
499
+ console.error(` [Reduce] Combining ${summaries.length} summaries (${inputTokens.toLocaleString()} tokens)...`);
500
+ try {
501
+ const result = await this.callResponsesAPI(prompt);
502
+ console.error(` [Reduce] Done (${result.outputTokens.toLocaleString()} output tokens)`);
503
+ return {
504
+ summary: result.content,
505
+ tokens: result.outputTokens + result.reasoningTokens,
506
+ };
507
+ }
508
+ catch (error) {
509
+ console.error(` [Reduce] Error: ${error.message}`);
510
+ // Fallback: just concatenate summaries
511
+ return {
512
+ summary: summaries.join("\n\n---\n\n"),
513
+ tokens: 0,
514
+ };
515
+ }
516
+ }
517
+ /**
518
+ * Map-Reduce summarization for large chunk sets
519
+ * Recursively summarizes chunks in batches until content fits context window
520
+ * Uses token counting (via gpt-tokenizer) for accurate batching
521
+ *
522
+ * @param chunks All chunks to process
523
+ * @param depth Current recursion depth
524
+ * @returns Summarized content that fits within context limits
525
+ */
526
+ async summarizeChunksMapReduce(chunks, depth = 0) {
527
+ const indent = " ".repeat(depth);
528
+ const totalInputTokens = chunks.reduce((sum, c) => sum + countTokens(c.content), 0);
529
+ console.error(`${indent}[Map-Reduce Depth ${depth}] Processing ${chunks.length} chunks (${totalInputTokens.toLocaleString()} tokens)...`);
530
+ // Safety check for recursion depth
531
+ if (depth >= MAX_RECURSION_DEPTH) {
532
+ console.error(`${indent}[Map-Reduce] Max recursion depth reached, truncating...`);
533
+ // Take first N chunks that fit within token limit
534
+ let truncatedTokens = 0;
535
+ const truncated = [];
536
+ for (const chunk of chunks) {
537
+ const chunkTokens = countTokens(chunk.content);
538
+ if (truncatedTokens + chunkTokens > MAX_TOKENS_PER_BATCH)
539
+ break;
540
+ truncated.push(chunk);
541
+ truncatedTokens += chunkTokens;
542
+ }
543
+ const content = truncated.map(c => `${c.file_path}: ${c.content.slice(0, 500)}...`).join("\n");
544
+ return { content, totalTokens: 0 };
545
+ }
546
+ // Split chunks into batches based on TOKEN count (not characters)
547
+ const batches = [];
548
+ let currentBatch = [];
549
+ let currentTokens = 0;
550
+ for (const chunk of chunks) {
551
+ const chunkTokens = countTokens(chunk.content) + countTokens(chunk.file_path) + 20; // overhead for formatting
552
+ if (currentTokens + chunkTokens > MAX_TOKENS_PER_BATCH && currentBatch.length > 0) {
553
+ batches.push(currentBatch);
554
+ currentBatch = [];
555
+ currentTokens = 0;
556
+ }
557
+ currentBatch.push(chunk);
558
+ currentTokens += chunkTokens;
559
+ }
560
+ if (currentBatch.length > 0) {
561
+ batches.push(currentBatch);
562
+ }
563
+ console.error(`${indent}[Map-Reduce Depth ${depth}] Split into ${batches.length} batches (max ${MAX_TOKENS_PER_BATCH.toLocaleString()} tokens/batch)`);
564
+ // Map phase: summarize each batch in parallel
565
+ let totalTokens = 0;
566
+ const summaryPromises = batches.map((batch, index) => this.summarizeBatch(batch, index, batches.length));
567
+ const summaryResults = await Promise.all(summaryPromises);
568
+ const summaries = summaryResults.map(r => r.summary);
569
+ totalTokens += summaryResults.reduce((sum, r) => sum + r.tokens, 0);
570
+ // Check if combined summaries still exceed token threshold
571
+ const combinedText = summaries.join("\n\n");
572
+ const combinedTokens = countTokens(combinedText);
573
+ console.error(`${indent}[Map-Reduce Depth ${depth}] Combined summaries: ${combinedTokens.toLocaleString()} tokens`);
574
+ if (combinedTokens > MAP_REDUCE_TOKEN_THRESHOLD && depth < MAX_RECURSION_DEPTH - 1) {
575
+ // Recursively summarize the summaries
576
+ console.error(`${indent}[Map-Reduce Depth ${depth}] Still exceeds ${MAP_REDUCE_TOKEN_THRESHOLD.toLocaleString()} tokens, recursing...`);
577
+ // Convert summaries to pseudo-chunks for recursive processing
578
+ const summaryChunks = summaries.map((summary, i) => ({
579
+ id: `summary-${depth}-${i}`,
580
+ vector: [], // Empty vector - not used for map-reduce
581
+ file_path: `batch-${i}-summary`,
582
+ file_hash: "",
583
+ content: summary,
584
+ language: "markdown",
585
+ chunk_type: "summary",
586
+ start_line: 0,
587
+ end_line: 0,
588
+ timestamp: Date.now(),
589
+ project_id: "",
590
+ }));
591
+ const recursiveResult = await this.summarizeChunksMapReduce(summaryChunks, depth + 1);
592
+ totalTokens += recursiveResult.totalTokens;
593
+ return { content: recursiveResult.content, totalTokens };
594
+ }
595
+ // Reduce phase: combine all summaries
596
+ if (summaries.length > 1) {
597
+ const reduceResult = await this.combineSummaries(summaries);
598
+ totalTokens += reduceResult.tokens;
599
+ return { content: reduceResult.summary, totalTokens };
600
+ }
601
+ return { content: summaries[0] || "", totalTokens };
602
+ }
372
603
  /**
373
604
  * Generates a single document for a specific project
374
605
  */
@@ -385,10 +616,10 @@ ${chunk.content}
385
616
  }
386
617
  console.error(`Generating document: ${definition.title} (project: ${projectId})`);
387
618
  console.error(` Input chunks: ${chunks.length}`);
388
- // Prepare prompt
389
- const chunksText = this.prepareChunksForPrompt(chunks, this.options.maxChunksPerDoc);
390
- console.error(` Chunks text length: ${chunksText.length} chars`);
391
- let prompt = definition.promptTemplate.replace("{chunks}", chunksText);
619
+ // Prepare prompt (may trigger Map-Reduce if content too large)
620
+ const preparedChunks = await this.prepareChunksForPrompt(chunks, this.options.maxChunksPerDoc);
621
+ console.error(` Chunks text length: ${preparedChunks.text.length} chars${preparedChunks.usedMapReduce ? ' (after Map-Reduce)' : ''}`);
622
+ let prompt = definition.promptTemplate.replace("{chunks}", preparedChunks.text);
392
623
  if (type === "progress" && previousProgress) {
393
624
  prompt = prompt.replace("{previousProgress}", previousProgress);
394
625
  }
@@ -397,6 +628,8 @@ ${chunk.content}
397
628
  }
398
629
  // Call API
399
630
  const result = await this.callResponsesAPI(prompt);
631
+ // Include map-reduce tokens in the total
632
+ const totalReasoningTokens = result.reasoningTokens + (preparedChunks.usedMapReduce ? preparedChunks.mapReduceTokens : 0);
400
633
  // Create document
401
634
  const doc = {
402
635
  type,
@@ -405,7 +638,7 @@ ${chunk.content}
405
638
  type,
406
639
  lastGenerated: Date.now(),
407
640
  lastInputHash: inputHash,
408
- reasoningTokens: result.reasoningTokens,
641
+ reasoningTokens: totalReasoningTokens,
409
642
  outputTokens: result.outputTokens,
410
643
  },
411
644
  };
@@ -415,7 +648,8 @@ ${chunk.content}
415
648
  // Update metadata
416
649
  metadataCache.set(type, doc.metadata);
417
650
  this.saveProjectMetadata(projectId);
418
- console.error(`Generated ${definition.title} (${result.reasoningTokens} reasoning + ${result.outputTokens} output tokens)`);
651
+ const mapReduceNote = preparedChunks.usedMapReduce ? ` [Map-Reduce: ${preparedChunks.mapReduceTokens} tokens]` : '';
652
+ console.error(`Generated ${definition.title} (${result.reasoningTokens} reasoning + ${result.outputTokens} output tokens)${mapReduceNote}`);
419
653
  return doc;
420
654
  }
421
655
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@grec0/memory-bank-mcp",
3
- "version": "0.1.10",
3
+ "version": "0.1.11",
4
4
  "description": "MCP server for semantic code indexing with Memory Bank - AI-powered codebase understanding",
5
5
  "license": "MIT",
6
6
  "author": "@grec0",