npm - @grec0/memory-bank-mcp - Versions diffs - 0.0.2 → 0.0.4 - Mend

@grec0/memory-bank-mcp 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +74 -5
package/dist/common/chunker.js +168 -24
package/dist/common/fileScanner.js +94 -10
package/dist/common/indexManager.js +97 -25
package/dist/common/logger.js +54 -0
package/dist/common/projectKnowledgeService.js +627 -0
package/dist/common/vectorStore.js +77 -21
package/dist/index.js +76 -8
package/dist/tools/analyzeCoverage.js +1 -1
package/dist/tools/generateProjectDocs.js +133 -0
package/dist/tools/getProjectDocs.js +126 -0
package/dist/tools/index.js +3 -0
package/dist/tools/searchMemory.js +2 -2
package/package.json +2 -1

package/README.md CHANGED Viewed

@@ -28,14 +28,31 @@ Con Memory Bank, las IAs:
 ## 🚀 Características
+### Core Memory Bank (Búsqueda Precisa)
 - **🔍 Búsqueda Semántica**: Pregunta "¿cómo funciona la autenticación?" y obtén código relevante
-- **🧩 Chunking Inteligente**: AST parsing para TypeScript/JavaScript/Python
+- **🧩 Chunking Inteligente**: AST parsing para TS/JS/Python con límites de tokens (8192 máx)
 - **⚡ Actualización Incremental**: Solo reindexa archivos modificados (detección por hash)
 - **💾 Cache de Embeddings**: Evita regenerar embeddings innecesariamente
 - **🎯 Filtros Avanzados**: Por archivo, lenguaje, tipo de chunk
 - **📊 Estadísticas Detalladas**: Conoce el estado de tu índice en todo momento
 - **🔒 Privacidad**: Vector store local, respeta .gitignore y .memoryignore
+### Project Knowledge Layer (Conocimiento Global) 🆕
+- **📄 Documentación Automática**: Genera 6 documentos markdown estructurados del proyecto
+- **🧠 IA con Razonamiento**: Usa OpenAI Responses API con modelos de razonamiento (gpt-5-mini)
+- **🔄 Actualización Inteligente**: Solo regenera documentos afectados por cambios
+- **📚 Contexto Global**: Complementa búsqueda precisa con visión de alto nivel
+Los documentos generados incluyen:
+| Documento | Propósito |
+|-----------|-----------|
+| `projectBrief.md` | Descripción general del proyecto |
+| `productContext.md` | Perspectiva de negocio y usuarios |
+| `systemPatterns.md` | Patrones de arquitectura y diseño |
+| `techContext.md` | Stack tecnológico y dependencias |
+| `activeContext.md` | Estado actual de desarrollo |
+| `progress.md` | Seguimiento de cambios |
 ## 📋 Requisitos
 - **Node.js** >= 18.0.0
@@ -81,13 +98,18 @@ Crea un archivo `.env` en la raíz de tu workspace (o configúralas en tu client
 # REQUERIDO: Tu API key de OpenAI
 OPENAI_API_KEY=sk-your-api-key-here
-# OPCIONAL: Configuración avanzada
+# OPCIONAL: Configuración de indexación
 MEMORYBANK_STORAGE_PATH=.memorybank              # Dónde almacenar el índice
 MEMORYBANK_EMBEDDING_MODEL=text-embedding-3-small # Modelo de OpenAI
 MEMORYBANK_EMBEDDING_DIMENSIONS=1536             # Dimensiones (1536 o 512)
-MEMORYBANK_CHUNK_SIZE=1000                       # Tamaño máximo de chunks
-MEMORYBANK_CHUNK_OVERLAP=200                     # Overlap entre chunks
+MEMORYBANK_MAX_TOKENS=7500                       # Tokens máx por chunk (límite: 8192)
+MEMORYBANK_CHUNK_OVERLAP_TOKENS=200              # Overlap en tokens entre chunks
 MEMORYBANK_WORKSPACE_ROOT=/path/to/project       # Raíz del workspace
+# OPCIONAL: Project Knowledge Layer (documentación con IA)
+MEMORYBANK_REASONING_MODEL=gpt-5-mini            # Modelo de razonamiento
+MEMORYBANK_REASONING_EFFORT=medium               # low/medium/high
+MEMORYBANK_AUTO_UPDATE_DOCS=false                # Auto-actualizar docs al indexar
 ```
 ### Configuración en Claude Desktop
@@ -152,7 +174,7 @@ Busca código por similitud semántica.
 **Parámetros:**
 - `query` (requerido): Consulta en lenguaje natural
 - `topK` (opcional): Número de resultados (default: 10)
-- `minScore` (opcional): Score mínimo 0-1 (default: 0.7)
+- `minScore` (opcional): Score mínimo 0-1 (default: 0.4)
 - `filterByFile` (opcional): Filtrar por patrón de archivo
 - `filterByLanguage` (opcional): Filtrar por lenguaje
@@ -205,6 +227,53 @@ Obtiene estadísticas del Memory Bank.
 memorybank_get_stats({})
 ```
+### `memorybank_analyze_coverage`
+Analiza la cobertura de indexación del proyecto.
+**Ejemplo:**
+```
+memorybank_analyze_coverage({})
+```
+### `memorybank_generate_project_docs` 🆕
+Genera documentación estructurada del proyecto usando IA con razonamiento (gpt-5-mini).
+**Parámetros:**
+- `projectId` (opcional): ID del proyecto
+- `force` (opcional): Forzar regeneración (default: false)
+**Ejemplo:**
+```
+memorybank_generate_project_docs({ force: true })
+```
+Genera 6 documentos markdown:
+- `projectBrief.md`: Descripción general
+- `productContext.md`: Perspectiva de negocio
+- `systemPatterns.md`: Patrones de arquitectura
+- `techContext.md`: Stack tecnológico
+- `activeContext.md`: Estado actual
+- `progress.md`: Seguimiento
+### `memorybank_get_project_docs` 🆕
+Lee la documentación del proyecto generada por IA.
+**Parámetros:**
+- `document` (opcional): Documento específico o "all"/"summary" (default: "summary")
+- `format` (opcional): "full" o "summary" (default: "full")
+**Ejemplo:**
+```
+// Obtener resumen de todos los docs
+memorybank_get_project_docs({ document: "summary" })
+// Obtener documento específico
+memorybank_get_project_docs({ document: "systemPatterns" })
+```
 ## 🎯 Casos de Uso
 ### 1. Primera Indexación

package/dist/common/chunker.js CHANGED Viewed

@@ -1,13 +1,31 @@
 /**
  * @fileoverview Intelligent code chunker for Memory Bank
  * Fragments code intelligently using AST parsing when possible
+ * Uses token counting to respect embedding model limits
  */
 import * as fs from "fs";
 import { parse } from "@babel/parser";
 import traverseLib from "@babel/traverse";
 import * as crypto from "crypto";
+import { encode } from "gpt-tokenizer";
 // Handle traverse library export
 const traverse = typeof traverseLib === 'function' ? traverseLib : traverseLib.default;
+// Constants for embedding model limits
+// text-embedding-3-small has 8192 token limit, use 7500 for safety margin
+const MAX_TOKENS_PER_CHUNK = 7500;
+const DEFAULT_CHUNK_OVERLAP_TOKENS = 200;
+/**
+ * Counts tokens in a text using tiktoken-compatible tokenizer
+ */
+export function countTokens(text) {
+    try {
+        return encode(text).length;
+    }
+    catch {
+        // Fallback estimation: ~4 characters per token for code
+        return Math.ceil(text.length / 4);
+    }
+}
 /**
  * Generates unique ID for a chunk based on content and metadata
  */
@@ -59,6 +77,94 @@ function extractContext(content, language) {
     }
     return contextLines.join("\n");
 }
+/**
+ * Splits a chunk that exceeds the token limit into smaller chunks
+ */
+function splitLargeChunk(chunk, maxTokens, overlapTokens) {
+    const tokenCount = countTokens(chunk.content);
+    // If under limit, return as-is
+    if (tokenCount <= maxTokens) {
+        return [{ ...chunk, tokenCount }];
+    }
+    console.error(`Splitting large chunk: ${chunk.filePath} (${chunk.name || 'unnamed'}) - ${tokenCount} tokens exceeds ${maxTokens} limit`);
+    const subChunks = [];
+    const lines = chunk.content.split("\n");
+    let currentLines = [];
+    let currentTokens = 0;
+    let subChunkStartLine = chunk.startLine;
+    let subChunkIndex = 0;
+    for (let i = 0; i < lines.length; i++) {
+        const line = lines[i];
+        const lineTokens = countTokens(line + "\n");
+        // If single line exceeds max, we have to include it anyway (extreme edge case)
+        if (lineTokens > maxTokens && currentLines.length === 0) {
+            currentLines.push(line);
+            currentTokens = lineTokens;
+        }
+        else if (currentTokens + lineTokens > maxTokens && currentLines.length > 0) {
+            // Save current chunk
+            const content = currentLines.join("\n");
+            const actualTokens = countTokens(content);
+            subChunks.push({
+                id: generateChunkId(chunk.filePath, content, subChunkStartLine),
+                filePath: chunk.filePath,
+                content,
+                startLine: subChunkStartLine,
+                endLine: chunk.startLine + i - 1,
+                chunkType: chunk.chunkType,
+                name: chunk.name ? `${chunk.name}_part${subChunkIndex + 1}` : undefined,
+                language: chunk.language,
+                context: chunk.context,
+                tokenCount: actualTokens,
+            });
+            subChunkIndex++;
+            // Calculate overlap - try to include enough lines to reach overlapTokens
+            let overlapLines = [];
+            let overlapTokenCount = 0;
+            for (let j = currentLines.length - 1; j >= 0 && overlapTokenCount < overlapTokens; j--) {
+                overlapLines.unshift(currentLines[j]);
+                overlapTokenCount += countTokens(currentLines[j] + "\n");
+            }
+            currentLines = [...overlapLines, line];
+            currentTokens = overlapTokenCount + lineTokens;
+            subChunkStartLine = chunk.startLine + i - overlapLines.length;
+        }
+        else {
+            currentLines.push(line);
+            currentTokens += lineTokens;
+        }
+    }
+    // Save final sub-chunk
+    if (currentLines.length > 0) {
+        const content = currentLines.join("\n");
+        const actualTokens = countTokens(content);
+        subChunks.push({
+            id: generateChunkId(chunk.filePath, content, subChunkStartLine),
+            filePath: chunk.filePath,
+            content,
+            startLine: subChunkStartLine,
+            endLine: chunk.endLine,
+            chunkType: chunk.chunkType,
+            name: chunk.name ? `${chunk.name}_part${subChunkIndex + 1}` : undefined,
+            language: chunk.language,
+            context: chunk.context,
+            tokenCount: actualTokens,
+        });
+    }
+    console.error(`  Split into ${subChunks.length} sub-chunks`);
+    return subChunks;
+}
+/**
+ * Processes chunks to ensure none exceed the token limit
+ */
+function enforceTokenLimits(chunks, maxTokens, overlapTokens) {
+    const result = [];
+    for (const chunk of chunks) {
+        const splitChunks = splitLargeChunk(chunk, maxTokens, overlapTokens);
+        result.push(...splitChunks);
+    }
+    return result;
+}
 /**
  * Chunks TypeScript/JavaScript code using AST parsing
  */
@@ -170,8 +276,9 @@ function chunkTypeScriptJavaScript(options) {
                 }
             },
         });
-        // If no chunks were extracted or file is small, treat as single chunk
-        if (chunks.length === 0 || options.content.length <= options.maxChunkSize) {
+        // If no chunks were extracted, treat as single chunk
+        if (chunks.length === 0) {
+            const tokenCount = countTokens(options.content);
             chunks.push({
                 id: generateChunkId(options.filePath, options.content, 1),
                 filePath: options.filePath,
@@ -181,15 +288,17 @@ function chunkTypeScriptJavaScript(options) {
                 chunkType: "file",
                 language: options.language,
                 context,
+                tokenCount,
             });
         }
     }
     catch (error) {
         console.error(`AST parsing failed for ${options.filePath}, falling back to fixed chunking: ${error}`);
         // Fallback to fixed chunking if AST parsing fails
-        return chunkByFixedSize(options);
+        return chunkByTokens(options);
     }
-    return chunks;
+    // Enforce token limits on all chunks
+    return enforceTokenLimits(chunks, options.maxTokens, options.chunkOverlapTokens);
 }
 /**
  * Chunks Python code using simple pattern matching
@@ -208,7 +317,7 @@ function chunkPython(options) {
     for (let i = 0; i < lines.length; i++) {
         const line = lines[i];
         const trimmed = line.trim();
-        const indent = line.length - line.trimLeft().length;
+        const indent = line.length - line.trimStart().length;
         // Detect function definition
         if (trimmed.startsWith("def ")) {
             // Save previous chunk if exists
@@ -317,45 +426,57 @@ function chunkPython(options) {
             context,
         });
     }
-    return chunks;
+    // Enforce token limits on all chunks
+    return enforceTokenLimits(chunks, options.maxTokens, options.chunkOverlapTokens);
 }
 /**
- * Chunks code by fixed size with overlap
+ * Chunks code by token count with overlap (replacement for chunkByFixedSize)
  */
-function chunkByFixedSize(options) {
+function chunkByTokens(options) {
     const chunks = [];
     const lines = options.content.split("\n");
     const context = extractContext(options.content, options.language);
     let currentLines = [];
-    let currentSize = 0;
+    let currentTokens = 0;
     let chunkStartLine = 1;
     for (let i = 0; i < lines.length; i++) {
         const line = lines[i];
-        currentLines.push(line);
-        currentSize += line.length + 1; // +1 for newline
-        // If we've reached max chunk size
-        if (currentSize >= options.maxChunkSize) {
+        const lineTokens = countTokens(line + "\n");
+        // If we've reached max tokens
+        if (currentTokens + lineTokens > options.maxTokens && currentLines.length > 0) {
             const content = currentLines.join("\n");
+            const actualTokens = countTokens(content);
             chunks.push({
                 id: generateChunkId(options.filePath, content, chunkStartLine),
                 filePath: options.filePath,
                 content,
                 startLine: chunkStartLine,
-                endLine: i + 1,
+                endLine: i,
                 chunkType: "block",
                 language: options.language,
                 context,
+                tokenCount: actualTokens,
             });
-            // Calculate overlap
-            const overlapLines = Math.floor(options.chunkOverlap / 50); // Approximate lines
-            currentLines = currentLines.slice(-overlapLines);
-            currentSize = currentLines.reduce((sum, l) => sum + l.length + 1, 0);
-            chunkStartLine = i + 1 - overlapLines + 1;
+            // Calculate overlap in lines (approximate)
+            let overlapLines = [];
+            let overlapTokenCount = 0;
+            for (let j = currentLines.length - 1; j >= 0 && overlapTokenCount < options.chunkOverlapTokens; j--) {
+                overlapLines.unshift(currentLines[j]);
+                overlapTokenCount += countTokens(currentLines[j] + "\n");
+            }
+            currentLines = [...overlapLines, line];
+            currentTokens = overlapTokenCount + lineTokens;
+            chunkStartLine = i + 1 - overlapLines.length;
+        }
+        else {
+            currentLines.push(line);
+            currentTokens += lineTokens;
         }
     }
     // Add remaining content as final chunk
     if (currentLines.length > 0) {
         const content = currentLines.join("\n");
+        const actualTokens = countTokens(content);
         chunks.push({
             id: generateChunkId(options.filePath, content, chunkStartLine),
             filePath: options.filePath,
@@ -365,10 +486,18 @@ function chunkByFixedSize(options) {
             chunkType: "block",
             language: options.language,
             context,
+            tokenCount: actualTokens,
         });
     }
     return chunks;
 }
+/**
+ * Legacy function for backwards compatibility
+ * @deprecated Use chunkByTokens instead
+ */
+function chunkByFixedSize(options) {
+    return chunkByTokens(options);
+}
 /**
  * Main chunking function - routes to appropriate strategy based on language
  */
@@ -377,6 +506,9 @@ export function chunkCode(options) {
         filePath: options.filePath,
         content: options.content,
         language: options.language,
+        maxTokens: options.maxTokens || MAX_TOKENS_PER_CHUNK,
+        chunkOverlapTokens: options.chunkOverlapTokens || DEFAULT_CHUNK_OVERLAP_TOKENS,
+        // Legacy options mapping
         maxChunkSize: options.maxChunkSize || 1000,
         chunkOverlap: options.chunkOverlap || 200,
     };
@@ -388,20 +520,32 @@ export function chunkCode(options) {
         return chunkPython(fullOptions);
     }
     else {
-        // For other languages, use fixed-size chunking
-        return chunkByFixedSize(fullOptions);
+        // For other languages, use token-based chunking
+        return chunkByTokens(fullOptions);
     }
 }
 /**
  * Chunks a file by reading it from disk
  */
-export function chunkFile(filePath, language, maxChunkSize, chunkOverlap) {
+export function chunkFile(filePath, language, maxTokens, chunkOverlapTokens) {
     const content = fs.readFileSync(filePath, "utf-8");
     return chunkCode({
         filePath,
         content,
         language,
-        maxChunkSize,
-        chunkOverlap,
+        maxTokens,
+        chunkOverlapTokens,
     });
 }
+/**
+ * Utility to check if content would fit in a single embedding
+ */
+export function wouldFitInSingleEmbedding(content, maxTokens = MAX_TOKENS_PER_CHUNK) {
+    return countTokens(content) <= maxTokens;
+}
+/**
+ * Get the maximum tokens allowed per chunk
+ */
+export function getMaxTokensPerChunk() {
+    return MAX_TOKENS_PER_CHUNK;
+}

package/dist/common/fileScanner.js CHANGED Viewed

@@ -10,48 +10,116 @@ import ignoreLib from "ignore";
 const ignore = typeof ignoreLib === 'function' ? ignoreLib : ignoreLib.default;
 // Language detection by file extension
 const LANGUAGE_MAP = {
+    // TypeScript/JavaScript
     ".ts": "typescript",
     ".tsx": "typescript",
     ".js": "javascript",
     ".jsx": "javascript",
     ".mjs": "javascript",
     ".cjs": "javascript",
+    // Python
     ".py": "python",
+    ".pyi": "python",
+    ".pyw": "python",
+    // JVM Languages
     ".java": "java",
+    ".kt": "kotlin",
+    ".kts": "kotlin",
+    ".scala": "scala",
+    ".groovy": "groovy",
+    ".gradle": "groovy",
+    // C/C++
     ".c": "c",
     ".cpp": "cpp",
     ".cc": "cpp",
     ".cxx": "cpp",
     ".h": "c",
     ".hpp": "cpp",
+    ".hxx": "cpp",
+    // .NET
     ".cs": "csharp",
+    ".fs": "fsharp",
+    ".vb": "vb",
+    // Systems Languages
     ".go": "go",
     ".rs": "rust",
+    // Scripting Languages
     ".rb": "ruby",
     ".php": "php",
-    ".swift": "swift",
-    ".kt": "kotlin",
-    ".kts": "kotlin",
-    ".scala": "scala",
+    ".pl": "perl",
+    ".pm": "perl",
+    ".lua": "lua",
     ".r": "r",
     ".R": "r",
-    ".sql": "sql",
+    // Mobile
+    ".swift": "swift",
+    ".m": "objectivec",
+    ".mm": "objectivec",
+    // Shell
     ".sh": "shell",
     ".bash": "shell",
     ".zsh": "shell",
     ".fish": "shell",
-    ".md": "markdown",
-    ".json": "json",
-    ".yaml": "yaml",
-    ".yml": "yaml",
-    ".xml": "xml",
+    ".ps1": "powershell",
+    ".psm1": "powershell",
+    ".bat": "batch",
+    ".cmd": "batch",
+    // Web
     ".html": "html",
     ".htm": "html",
     ".css": "css",
     ".scss": "scss",
     ".sass": "sass",
+    ".less": "less",
     ".vue": "vue",
     ".svelte": "svelte",
+    ".astro": "astro",
+    // Data/Config
+    ".json": "json",
+    ".jsonc": "json",
+    ".json5": "json",
+    ".yaml": "yaml",
+    ".yml": "yaml",
+    ".toml": "toml",
+    ".xml": "xml",
+    ".ini": "ini",
+    ".cfg": "ini",
+    ".conf": "ini",
+    ".properties": "properties",
+    ".env": "dotenv",
+    ".env.local": "dotenv",
+    ".env.example": "dotenv",
+    // Documentation
+    ".md": "markdown",
+    ".mdx": "markdown",
+    ".rst": "rst",
+    ".txt": "text",
+    // Database
+    ".sql": "sql",
+    ".prisma": "prisma",
+    ".graphql": "graphql",
+    ".gql": "graphql",
+    // Other
+    ".dockerfile": "dockerfile",
+    ".tf": "terraform",
+    ".hcl": "hcl",
+    ".proto": "protobuf",
+    ".sol": "solidity",
+    ".zig": "zig",
+    ".nim": "nim",
+    ".ex": "elixir",
+    ".exs": "elixir",
+    ".erl": "erlang",
+    ".hrl": "erlang",
+    ".clj": "clojure",
+    ".cljs": "clojure",
+    ".cljc": "clojure",
+    ".hs": "haskell",
+    ".elm": "elm",
+    ".dart": "dart",
+    ".v": "v",
+    ".asm": "assembly",
+    ".s": "assembly",
 };
 // Binary file extensions to skip
 const BINARY_EXTENSIONS = new Set([
@@ -132,8 +200,24 @@ export function isCodeFile(filePath) {
     // Additional checks for files without extension or special cases
     const basename = path.basename(filePath);
     const codeFileNames = new Set([
+        // Build/DevOps
         "Makefile", "Dockerfile", "Jenkinsfile", "Vagrantfile",
         "Rakefile", "Gemfile", "Podfile", "Fastfile",
+        "CMakeLists.txt", "meson.build", "BUILD", "WORKSPACE",
+        // Config files
+        ".gitignore", ".gitattributes", ".dockerignore",
+        ".editorconfig", ".prettierrc", ".eslintrc",
+        ".babelrc", ".browserslistrc",
+        "tsconfig.json", "jsconfig.json", "package.json",
+        "angular.json", "nest-cli.json", "nx.json",
+        "webpack.config.js", "vite.config.js", "rollup.config.js",
+        // CI/CD
+        ".gitlab-ci.yml", ".travis.yml", "azure-pipelines.yml",
+        "bitbucket-pipelines.yml", "cloudbuild.yaml",
+        // K8s/Helm
+        "Chart.yaml", "values.yaml", "kustomization.yaml",
+        // Lock files (optional - might want to skip these)
+        // "package-lock.json", "yarn.lock", "pnpm-lock.yaml",
     ]);
     return codeFileNames.has(basename);
 }