npm - ocerebro - Versions diffs - 0.2.3 → 0.3.1 - Mend

ocerebro 0.2.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/cerebro/index/embeddings.db +0 -0
package/cerebro/index/entities.db +0 -0
package/cerebro/index/metadata.db +0 -0
package/package.json +1 -1
package/pyproject.toml +2 -1
package/src/consolidation/promoter.py +71 -1
package/src/index/entities_db.py +822 -0
package/src/index/queries.py +81 -11
package/src/mcp/server.py +146 -3

package/cerebro/index/embeddings.db ADDED Viewed

Binary file

package/cerebro/index/entities.db ADDED Viewed

Binary file

package/cerebro/index/metadata.db ADDED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ocerebro",
-  "version": "0.2.3",
+  "version": "0.3.1",
   "description": "OCerebro - Sistema de Memoria para Agentes (Claude Code/MCP)",
   "main": "bin/ocerebro.js",
   "bin": {

package/pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ocerebro"
-version = "0.2.3"
+version = "0.3.1"
 description = "OCerebro - Sistema de Memoria para Agentes (Claude Code/MCP)"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -37,6 +37,7 @@ dependencies = [
     "sentence-transformers>=2.2.0",
     "mcp>=1.0.0",
     "anthropic>=0.40.0",
+    "spacy>=3.5.0",
 ]
 [project.optional-dependencies]

package/src/consolidation/promoter.py CHANGED Viewed

@@ -37,7 +37,8 @@ class Promoter:
     def __init__(
         self,
         working_storage: YAMLStorage,
-        official_storage: MarkdownStorage
+        official_storage: MarkdownStorage,
+        entities_db_path: Optional[Path] = None
     ):
         """
         Inicializa o Promoter.
@@ -45,9 +46,12 @@ class Promoter:
         Args:
             working_storage: Instância do YAMLStorage
             official_storage: Instância do MarkdownStorage
+            entities_db_path: Path para o EntitiesDB (opcional)
         """
         self.working_storage = working_storage
         self.official_storage = official_storage
+        self.entities_db_path = entities_db_path
+        self._entities_db = None
     def promote_session(
         self,
@@ -223,6 +227,10 @@ class Promoter:
             content=content
         )
+        # Extrai entidades do frontmatter E do conteúdo
+        self._extract_entities_from_frontmatter(draft_id, frontmatter, project)
+        self._extract_entities_from_content(draft_id, content)
         return PromotionResult(
             success=True,
             source_type=draft.get("type", "session"),
@@ -300,6 +308,10 @@ class Promoter:
             content=body
         )
+        # Extrai entidades do frontmatter E do conteúdo
+        self._extract_entities_from_frontmatter(draft_id, frontmatter, project)
+        self._extract_entities_from_content(draft_id, body)
         return PromotionResult(
             success=True,
             source_type=draft.get("type", "session"),
@@ -433,3 +445,61 @@ class Promoter:
             if existing:
                 existing.update(draft)
                 self.working_storage.write_feature(project, draft_id, existing)
+    @property
+    def entities_db(self):
+        """Lazy load do EntitiesDB"""
+        if self._entities_db is None and self.entities_db_path:
+            from src.index.entities_db import EntitiesDB
+            self._entities_db = EntitiesDB(self.entities_db_path)
+        return self._entities_db
+    def _extract_entities_from_frontmatter(
+        self,
+        memory_id: str,
+        frontmatter: Dict[str, Any],
+        project: str
+    ) -> List[str]:
+        """
+        Extrai entidades do frontmatter e registra no grafo.
+        Args:
+            memory_id: ID da memória
+            frontmatter: Dicionário com metadados
+            project: Nome do projeto
+        Returns:
+            Lista de IDs de entidades criadas
+        """
+        if not self.entities_db:
+            return []
+        return self.entities_db.extract_from_frontmatter(
+            memory_id=memory_id,
+            frontmatter=frontmatter,
+            project=project
+        )
+    def _extract_entities_from_content(
+        self,
+        memory_id: str,
+        content: str
+    ) -> List[str]:
+        """
+        Extrai entidades do conteúdo usando spaCy NER e registra no grafo.
+        Args:
+            memory_id: ID da memória
+            content: Conteúdo de texto
+        Returns:
+            Lista de IDs de entidades criadas
+        """
+        if not self.entities_db:
+            return []
+        return self.entities_db.extract_from_content(
+            memory_id=memory_id,
+            content=content,
+            use_spacy=True
+        )

package/src/index/entities_db.py ADDED Viewed

@@ -0,0 +1,822 @@
+"""EntitiesDB: Grafo de experiência com entidades e relacionamentos"""
+import sqlite3
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Set
+from datetime import datetime
+from collections import deque
+class EntitiesDB:
+    """
+    Banco de dados para grafo de experiência usando SQLite.
+    Armazena entidades extraídas de memórias (ORG, PERSON, TECH, etc)
+    e relacionamentos entre elas. Permite busca associativa por traversal.
+    Diferencial vs LightRAG:
+    - Extração local com spaCy NER (offline, grátis)
+    - Frontmatter como nós iniciais (sem LLM)
+    - Arestas implícitas por projeto/tags/type
+    """
+    def __init__(self, db_path: Path):
+        """
+        Inicializa o EntitiesDB.
+        Args:
+            db_path: Path para o arquivo do banco
+        """
+        self.db_path = db_path
+        db_path.parent.mkdir(parents=True, exist_ok=True)
+        self._init_schema()
+    def _connect(self) -> sqlite3.Connection:
+        """Cria conexão com o banco"""
+        conn = sqlite3.connect(self.db_path)
+        conn.row_factory = sqlite3.Row
+        conn.execute("PRAGMA journal_mode=WAL")
+        return conn
+    def _init_schema(self):
+        """Cria schema do banco"""
+        conn = self._connect()
+        # Tabela de entidades
+        conn.execute("""
+            CREATE TABLE IF NOT EXISTS entities (
+                id TEXT PRIMARY KEY,
+                memory_id TEXT,
+                entity_name TEXT,
+                entity_type TEXT,
+                confidence REAL DEFAULT 1.0,
+                span_start INTEGER,
+                span_end INTEGER,
+                context_snippet TEXT,
+                created_at TEXT DEFAULT (datetime('now')),
+                FOREIGN KEY (memory_id) REFERENCES memories(id)
+            )
+        """)
+        # Índices para performance
+        conn.execute("""
+            CREATE INDEX IF NOT EXISTS idx_entities_name
+            ON entities(entity_name)
+        """)
+        conn.execute("""
+            CREATE INDEX IF NOT EXISTS idx_entities_type
+            ON entities(entity_type)
+        """)
+        conn.execute("""
+            CREATE INDEX IF NOT EXISTS idx_entities_memory
+            ON entities(memory_id)
+        """)
+        # Tabela de cache de hash (para evitar reprocessamento)
+        conn.execute("""
+            CREATE TABLE IF NOT EXISTS entity_cache (
+                memory_id TEXT PRIMARY KEY,
+                content_hash TEXT,
+                processed_at TEXT DEFAULT (datetime('now')),
+                entity_count INTEGER DEFAULT 0
+            )
+        """)
+        conn.execute("""
+            CREATE INDEX IF NOT EXISTS idx_entity_cache_hash
+            ON entity_cache(content_hash)
+        """)
+        # Tabela de relacionamentos
+        conn.execute("""
+            CREATE TABLE IF NOT EXISTS entity_relationships (
+                id TEXT PRIMARY KEY,
+                source_entity TEXT,
+                target_entity TEXT,
+                relationship_type TEXT,
+                memory_id TEXT,
+                created_at TEXT DEFAULT (datetime('now')),
+                FOREIGN KEY (memory_id) REFERENCES memories(id)
+            )
+        """)
+        conn.execute("""
+            CREATE INDEX IF NOT EXISTS idx_relationships_source
+            ON entity_relationships(source_entity)
+        """)
+        conn.execute("""
+            CREATE INDEX IF NOT EXISTS idx_relationships_target
+            ON entity_relationships(target_entity)
+        """)
+        conn.commit()
+        conn.close()
+    # ========================================================================
+    # OPERAÇÕES DE ENTIDADES
+    # ========================================================================
+    def insert_entity(
+        self,
+        memory_id: str,
+        entity_name: str,
+        entity_type: str,
+        confidence: float = 1.0,
+        span_start: int = 0,
+        span_end: int = 0,
+        context_snippet: str = ""
+    ) -> str:
+        """
+        Insere uma entidade.
+        Args:
+            memory_id: ID da memória de origem
+            entity_name: Nome da entidade
+            entity_type: Tipo (ORG, PERSON, TECH, etc)
+            confidence: Confiança da extração (0-1)
+            span_start: Posição inicial no texto
+            span_end: Posição final no texto
+            context_snippet: Contexto ao redor da entidade
+        Returns:
+            ID da entidade
+        """
+        entity_id = f"ent_{memory_id}_{entity_name.lower().replace(' ', '_')}"
+        conn = self._connect()
+        conn.execute("""
+            INSERT OR REPLACE INTO entities
+            (id, memory_id, entity_name, entity_type, confidence, span_start, span_end, context_snippet)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+        """, (
+            entity_id,
+            memory_id,
+            entity_name,
+            entity_type,
+            confidence,
+            span_start,
+            span_end,
+            context_snippet
+        ))
+        conn.commit()
+        conn.close()
+        return entity_id
+    def get_entities_by_memory(self, memory_id: str) -> List[Dict[str, Any]]:
+        """
+        Obtém entidades de uma memória.
+        Args:
+            memory_id: ID da memória
+        Returns:
+            Lista de entidades
+        """
+        conn = self._connect()
+        cursor = conn.execute(
+            "SELECT * FROM entities WHERE memory_id = ?",
+            (memory_id,)
+        )
+        results = [dict(row) for row in cursor.fetchall()]
+        conn.close()
+        return results
+    def get_entities_by_name(self, entity_name: str) -> List[Dict[str, Any]]:
+        """
+        Busca entidades por nome (case-insensitive).
+        Args:
+            entity_name: Nome da entidade
+        Returns:
+            Lista de entidades
+        """
+        conn = self._connect()
+        cursor = conn.execute(
+            "SELECT * FROM entities WHERE LOWER(entity_name) = LOWER(?)",
+            (entity_name,)
+        )
+        results = [dict(row) for row in cursor.fetchall()]
+        conn.close()
+        return results
+    def delete_entities_by_memory(self, memory_id: str) -> int:
+        """
+        Remove entidades de uma memória.
+        Args:
+            memory_id: ID da memória
+        Returns:
+            Número de entidades removidas
+        """
+        conn = self._connect()
+        cursor = conn.execute(
+            "DELETE FROM entities WHERE memory_id = ?",
+            (memory_id,)
+        )
+        deleted = cursor.rowcount
+        # Remove relacionamentos também
+        conn.execute(
+            "DELETE FROM entity_relationships WHERE memory_id = ?",
+            (memory_id,)
+        )
+        conn.commit()
+        conn.close()
+        return deleted
+    # ========================================================================
+    # OPERAÇÕES DE RELACIONAMENTOS
+    # ========================================================================
+    def insert_relationship(
+        self,
+        source_entity: str,
+        target_entity: str,
+        relationship_type: str,
+        memory_id: str
+    ) -> str:
+        """
+        Insere relacionamento entre entidades.
+        Args:
+            source_entity: Nome da entidade origem
+            target_entity: Nome da entidade alvo
+            relationship_type: Tipo do relacionamento
+            memory_id: ID da memória de origem
+        Returns:
+            ID do relacionamento
+        """
+        rel_id = f"rel_{source_entity}_{target_entity}_{memory_id}"
+        conn = self._connect()
+        conn.execute("""
+            INSERT OR REPLACE INTO entity_relationships
+            (id, source_entity, target_entity, relationship_type, memory_id)
+            VALUES (?, ?, ?, ?, ?)
+        """, (rel_id, source_entity, target_entity, relationship_type, memory_id))
+        conn.commit()
+        conn.close()
+        return rel_id
+    def get_relationships(self, entity_name: str) -> List[Dict[str, Any]]:
+        """
+        Obtém relacionamentos de uma entidade.
+        Args:
+            entity_name: Nome da entidade
+        Returns:
+            Lista de relacionamentos (ida e volta)
+        """
+        conn = self._connect()
+        # Relacionamentos onde é origem
+        cursor = conn.execute("""
+            SELECT 'outgoing' as direction, r.*, e.entity_type as target_type
+            FROM entity_relationships r
+            LEFT JOIN entities e ON LOWER(e.entity_name) = LOWER(r.target_entity)
+            WHERE LOWER(r.source_entity) = LOWER(?)
+        """, (entity_name,))
+        outgoing = [dict(row) for row in cursor.fetchall()]
+        # Relacionamentos onde é alvo
+        cursor = conn.execute("""
+            SELECT 'incoming' as direction, r.*, e.entity_type as source_type
+            FROM entity_relationships r
+            LEFT JOIN entities e ON LOWER(e.entity_name) = LOWER(r.source_entity)
+            WHERE LOWER(r.target_entity) = LOWER(?)
+        """, (entity_name,))
+        incoming = [dict(row) for row in cursor.fetchall()]
+        conn.close()
+        return outgoing + incoming
+    # ========================================================================
+    # TRAVESSIA DO GRAFO (BFS)
+    # ========================================================================
+    def traverse(
+        self,
+        start_entity: str,
+        depth: int = 2,
+        entity_types: Optional[List[str]] = None,
+        max_nodes: int = 50
+    ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+        """
+        Faz traversal BFS a partir de uma entidade.
+        Args:
+            start_entity: Nome da entidade inicial
+            depth: Profundidade máxima (1-3 recomendado)
+            entity_types: Filtrar por tipos de entidade
+            max_nodes: Limite máximo de nós retornados
+        Returns:
+            Tupla (nodes, edges) para visualização do grafo
+        """
+        nodes: Dict[str, Dict[str, Any]] = {}
+        edges: List[Dict[str, Any]] = []
+        # Fila BFS: (entidade, profundidade)
+        queue: deque = deque([(start_entity, 0)])
+        visited: Set[str] = set()
+        while queue and len(nodes) < max_nodes:
+            entity_name, current_depth = queue.popleft()
+            if entity_name in visited:
+                continue
+            visited.add(entity_name)
+            # Busca entidade
+            entities = self.get_entities_by_name(entity_name)
+            if not entities:
+                continue
+            entity = entities[0]  # Pega primeiro match
+            # Filtra por tipo se especificado
+            if entity_types and entity["entity_type"] not in entity_types:
+                continue
+            # Adiciona nó
+            node_key = entity["entity_name"].lower()
+            if node_key not in nodes:
+                nodes[node_key] = {
+                    "name": entity["entity_name"],
+                    "type": entity["entity_type"],
+                    "depth": current_depth,
+                    "memory_count": 1
+                }
+            else:
+                nodes[node_key]["memory_count"] += 1
+            # Se atingiu profundidade máxima, não expande
+            if current_depth >= depth:
+                continue
+            # Busca relacionamentos
+            relationships = self.get_relationships(entity_name)
+            for rel in relationships:
+                if rel["direction"] == "outgoing":
+                    target = rel["target_entity"]
+                    source = rel["source_entity"]
+                else:
+                    target = rel["target_entity"]
+                    source = rel["source_entity"]
+                # Adiciona aresta
+                edge_key = f"{source.lower()}_{target.lower()}"
+                if not any(e.get("key") == edge_key for e in edges):
+                    edges.append({
+                        "key": edge_key,
+                        "source": source,
+                        "target": target,
+                        "type": rel["relationship_type"],
+                        "memory_id": rel.get("memory_id")
+                    })
+                # Adiciona próximo nó na fila
+                next_entity = target if rel["direction"] == "outgoing" else source
+                if next_entity not in visited:
+                    queue.append((next_entity, current_depth + 1))
+        return list(nodes.values()), edges
+    # ========================================================================
+    # BUSCA POR ENTIDADES (integra com QueryEngine)
+    # ========================================================================
+    def search_by_query(self, query: str, limit: int = 20) -> List[Dict[str, Any]]:
+        """
+        Busca memórias por entidades relacionadas à query.
+        Extrai entidades da query e retorna memórias conectadas.
+        Args:
+            query: Texto de busca
+            limit: Limite de resultados
+        Returns:
+            Lista de memórias com score de grafo
+        """
+        # Tenta extrair entidades da query (palavras-chave)
+        query_entities = self._extract_query_entities(query)
+        if not query_entities:
+            return []
+        # Busca memórias conectadas às entidades
+        conn = self._connect()
+        results = {}
+        for entity_name in query_entities:
+            cursor = conn.execute("""
+                SELECT DISTINCT e.memory_id, e.entity_name, e.entity_type,
+                       COUNT(*) as entity_count
+                FROM entities e
+                WHERE LOWER(e.entity_name) = LOWER(?)
+                GROUP BY e.memory_id
+                ORDER BY entity_count DESC
+                LIMIT ?
+            """, (entity_name, limit))
+            for row in cursor.fetchall():
+                memory_id = row["memory_id"]
+                if memory_id not in results:
+                    results[memory_id] = {
+                        "memory_id": memory_id,
+                        "matched_entities": [],
+                        "score": 0.0
+                    }
+                results[memory_id]["matched_entities"].append({
+                    "name": row["entity_name"],
+                    "type": row["entity_type"]
+                })
+                results[memory_id]["score"] += 0.5  # Score base por entidade
+        conn.close()
+        # Normaliza scores
+        if results:
+            max_score = max(r["score"] for r in results.values())
+            for r in results.values():
+                r["score"] = r["score"] / max_score if max_score > 0 else 0
+        return list(results.values())
+    def _extract_query_entities(self, query: str) -> List[str]:
+        """
+        Extrai possíveis entidades de uma query.
+        Usa heurísticas simples (sem spaCy para evitar dependência aqui):
+        - Palavras capitalizadas
+        - Termos entre aspas
+        - Acrônimos
+        Args:
+            query: Texto de busca
+        Returns:
+            Lista de nomes de entidades candidatas
+        """
+        entities = set()
+        # Termos entre aspas
+        quoted = re.findall(r'"([^"]+)"', query)
+        entities.update(quoted)
+        # Palavras capitalizadas (prováveis nomes próprios)
+        capitalized = re.findall(r'\b[A-Z][a-zA-Z]*\b', query)
+        entities.update(capitalized)
+        # Acrônimos
+        acronyms = re.findall(r'\b[A-Z]{2,}\b', query)
+        entities.update(acronyms)
+        # Remove stop words e termos muito curtos
+        stop_words = {"A", "O", "Os", "As", "Um", "Uma", "Em", "De", "Do", "Da", "Com", "Por", "Para"}
+        entities = {e for e in entities if e not in stop_words and len(e) > 2}
+        return list(entities)
+    # ========================================================================
+    # MÉTODOS DE INTEGRAÇÃO COM FRONTMATTER
+    # ========================================================================
+    def extract_from_frontmatter(
+        self,
+        memory_id: str,
+        frontmatter: Dict[str, Any],
+        project: str
+    ) -> List[str]:
+        """
+        Extrai entidades do frontmatter de uma memória.
+        Args:
+            memory_id: ID da memória
+            frontmatter: Dicionário com metadados
+            project: Nome do projeto
+        Returns:
+            Lista de IDs de entidades criadas
+        """
+        entity_ids = []
+        # Type como entidade
+        if "type" in frontmatter:
+            eid = self.insert_entity(
+                memory_id,
+                f"TYPE:{frontmatter['type']}",
+                "META",
+                confidence=1.0
+            )
+            entity_ids.append(eid)
+        # Project como entidade
+        if project:
+            eid = self.insert_entity(
+                memory_id,
+                project,
+                "PROJECT",
+                confidence=1.0
+            )
+            entity_ids.append(eid)
+        # Tags como entidades
+        if "tags" in frontmatter:
+            tags = frontmatter.get("tags", "")
+            if isinstance(tags, str):
+                for tag in [t.strip() for t in tags.split(",") if t.strip()]:
+                    eid = self.insert_entity(
+                        memory_id,
+                        f"TAG:{tag}",
+                        "TAG",
+                        confidence=1.0
+                    )
+                    entity_ids.append(eid)
+            elif isinstance(tags, list):
+                for tag in tags:
+                    eid = self.insert_entity(
+                        memory_id,
+                        f"TAG:{tag}",
+                        "TAG",
+                        confidence=1.0
+                    )
+                    entity_ids.append(eid)
+        return entity_ids
+    def extract_from_content(
+        self,
+        memory_id: str,
+        content: str,
+        use_spacy: bool = True
+    ) -> List[str]:
+        """
+        Extrai entidades do conteúdo usando spaCy NER.
+        Args:
+            memory_id: ID da memória
+            content: Conteúdo de texto
+            use_spacy: Usar spaCy (padrão: True)
+        Returns:
+            Lista de IDs de entidades criadas
+        """
+        # Verifica cache - só processa se conteúdo mudou
+        cached_hash = self.get_cached_hash(memory_id)
+        current_hash = self._compute_hash(content)
+        if cached_hash == current_hash:
+            # Conteúdo igual, verifica se já tem entidades
+            existing = self.get_entities_by_memory(memory_id)
+            if existing:
+                return []  # Já processado, sem mudanças
+        # Conteúdo novo ou mudou - remove entidades antigas e reprocessa
+        self.delete_entities_by_memory(memory_id)
+        entity_ids = []
+        if use_spacy:
+            try:
+                import spacy
+                from spacy import Language
+                # Carrega modelo (download na primeira vez)
+                try:
+                    nlp: Language = spacy.load("pt_core_news_sm")
+                except OSError:
+                    # Modelo não instalado - tenta instalar
+                    import subprocess
+                    subprocess.run(
+                        ["python", "-m", "spacy", "download", "pt_core_news_sm"],
+                        capture_output=True
+                    )
+                    nlp = spacy.load("pt_core_news_sm")
+                # Processa texto
+                doc = nlp(content[:5000])  # Limita a 5000 chars para performance
+                # Mapeia labels do spaCy para nossos tipos
+                label_map = {
+                    "ORG": "ORG",
+                    "PERSON": "PERSON",
+                    "GPE": "LOC",
+                    "LOC": "LOC",
+                    "PRODUCT": "PRODUCT",
+                    "EVENT": "EVENT",
+                    "WORK_OF_ART": "PRODUCT",
+                    "LAW": "PRODUCT",
+                    "LANGUAGE": "TECH",
+                }
+                for ent in doc.ents:
+                    if ent.label_ in label_map:
+                        entity_type = label_map[ent.label_]
+                        confidence = float(ent._.get("nerd_score", 0.8)) if hasattr(ent._, "nerd_score") else 0.8
+                        eid = self.insert_entity(
+                            memory_id,
+                            ent.text,
+                            entity_type,
+                            confidence=confidence,
+                            span_start=ent.start_char,
+                            span_end=ent.end_char,
+                            context_snippet=content[max(0, ent.start_char - 25):ent.end_char + 25][:50]
+                        )
+                        entity_ids.append(eid)
+                # Atualiza cache após processamento
+                self.update_cache(memory_id, content, len(entity_ids))
+            except ImportError:
+                # spaCy não disponível - usa fallback simples
+                entity_ids.extend(self._extract_entities_fallback(memory_id, content))
+                # Atualiza cache após processamento
+                self.update_cache(memory_id, content, len(entity_ids))
+            except Exception:
+                # Falha silenciosa - não quebra o fluxo
+                pass
+        else:
+            entity_ids.extend(self._extract_entities_fallback(memory_id, content))
+            # Atualiza cache após processamento
+            self.update_cache(memory_id, content, len(entity_ids))
+        return entity_ids
+    def _extract_entities_fallback(
+        self,
+        memory_id: str,
+        content: str
+    ) -> List[str]:
+        """
+        Fallback sem spaCy - usa heurísticas simples.
+        Args:
+            memory_id: ID da memória
+            content: Conteúdo de texto
+        Returns:
+            Lista de IDs de entidades criadas
+        """
+        entity_ids = []
+        # Palavras capitalizadas (prováveis nomes próprios/ORGs)
+        capitalized = re.findall(r'\b[A-Z][a-zA-Z]{2,}\b', content[:2000])
+        # Filtra termos comuns que não são entidades
+        common_words = {
+            "The", "This", "That", "These", "Those", "What", "When", "Where", "Why", "How",
+            "Para", "Com", "Por", "Em", "De", "Do", "Da", "Dos", "Das", "Uma", "Um",
+            "Como", "Quando", "Onde", "Qual", "Quais", "Quem", "Sobre"
+        }
+        entities = set(e for e in capitalized if e not in common_words)
+        for entity in list(entities)[:20]:  # Limita a 20 entidades
+            eid = self.insert_entity(
+                memory_id,
+                entity,
+                "ORG" if entity[0].isupper() else "PERSON",
+                confidence=0.5,  # Baixa confiança sem spaCy
+                context_snippet=entity
+            )
+            entity_ids.append(eid)
+        return entity_ids
+    # ========================================================================
+    # CACHE DE HASH (para evitar reprocessamento)
+    # ========================================================================
+    def _compute_hash(self, content: str) -> str:
+        """
+        Computa hash do conteúdo.
+        Args:
+            content: Conteúdo de texto
+        Returns:
+            Hash SHA256 (primeiros 16 chars)
+        """
+        import hashlib
+        return hashlib.sha256(content.encode('utf-8')).hexdigest()[:16]
+    def get_cached_hash(self, memory_id: str) -> Optional[str]:
+        """
+        Obtém hash em cache de uma memória.
+        Args:
+            memory_id: ID da memória
+        Returns:
+            Hash armazenado ou None
+        """
+        conn = self._connect()
+        cursor = conn.execute(
+            "SELECT content_hash FROM entity_cache WHERE memory_id = ?",
+            (memory_id,)
+        )
+        row = cursor.fetchone()
+        conn.close()
+        return row["content_hash"] if row else None
+    def is_content_changed(self, memory_id: str, content: str) -> bool:
+        """
+        Verifica se conteúdo mudou desde último processamento.
+        Args:
+            memory_id: ID da memória
+            content: Conteúdo de texto
+        Returns:
+            True se mudou, False se igual
+        """
+        current_hash = self._compute_hash(content)
+        cached_hash = self.get_cached_hash(memory_id)
+        return cached_hash != current_hash
+    def update_cache(self, memory_id: str, content: str, entity_count: int) -> None:
+        """
+        Atualiza cache de hash após processamento.
+        Args:
+            memory_id: ID da memória
+            content: Conteúdo de texto
+            entity_count: Número de entidades extraídas
+        """
+        content_hash = self._compute_hash(content)
+        conn = self._connect()
+        conn.execute("""
+            INSERT OR REPLACE INTO entity_cache
+            (memory_id, content_hash, processed_at, entity_count)
+            VALUES (?, ?, datetime('now'), ?)
+        """, (memory_id, content_hash, entity_count))
+        conn.commit()
+        conn.close()
+    def clear_cache(self, memory_id: str) -> None:
+        """
+        Limpa cache de uma memória.
+        Args:
+            memory_id: ID da memória
+        """
+        conn = self._connect()
+        conn.execute(
+            "DELETE FROM entity_cache WHERE memory_id = ?",
+            (memory_id,)
+        )
+        conn.commit()
+        conn.close()
+    # ========================================================================
+    # ESTATÍSTICAS
+    # ========================================================================
+    def get_stats(self) -> Dict[str, Any]:
+        """
+        Obtém estatísticas do grafo.
+        Returns:
+            Dicionário com estatísticas
+        """
+        conn = self._connect()
+        total_entities = conn.execute(
+            "SELECT COUNT(*) FROM entities"
+        ).fetchone()[0]
+        total_relationships = conn.execute(
+            "SELECT COUNT(*) FROM entity_relationships"
+        ).fetchone()[0]
+        by_type = conn.execute(
+            "SELECT entity_type, COUNT(*) FROM entities GROUP BY entity_type"
+        ).fetchall()
+        conn.close()
+        return {
+            "total_entities": total_entities,
+            "total_relationships": total_relationships,
+            "by_type": dict(by_type),
+            "avg_relationships_per_entity": (
+                total_relationships / total_entities if total_entities > 0 else 0
+            )
+        }

package/src/index/queries.py CHANGED Viewed

@@ -6,6 +6,7 @@ from dataclasses import dataclass
 from .metadata_db import MetadataDB
 from .embeddings_db import EmbeddingsDB
+from .entities_db import EntitiesDB
 @dataclass
@@ -16,7 +17,7 @@ class QueryResult:
     project: str
     title: str
     score: float
-    source: str  # 'fts', 'semantic', 'metadata'
+    source: str  # 'fts', 'semantic', 'metadata', 'graph'
     metadata: Dict[str, Any] = None
@@ -24,16 +25,18 @@ class QueryEngine:
     """
     Engine de consultas híbridas.
-    Combina três tipos de busca:
+    Combina quatro tipos de busca:
     - Metadata: filtros estruturados (projeto, tipo, tags)
     - FTS: busca full-text no conteúdo
     - Semantic: busca por similaridade de embeddings
+    - Graph: busca por entidades e relacionamentos
     """
     def __init__(
         self,
         metadata_db: MetadataDB,
-        embeddings_db: EmbeddingsDB
+        embeddings_db: EmbeddingsDB,
+        entities_db: Optional[EntitiesDB] = None
     ):
         """
         Inicializa o QueryEngine.
@@ -41,9 +44,11 @@ class QueryEngine:
         Args:
             metadata_db: Instância do MetadataDB
             embeddings_db: Instância do EmbeddingsDB
+            entities_db: Instância do EntitiesDB (opcional)
         """
         self.metadata_db = metadata_db
         self.embeddings_db = embeddings_db
+        self.entities_db = entities_db
     def search(
         self,
@@ -53,11 +58,13 @@ class QueryEngine:
         limit: int = 10,
         use_fts: bool = True,
         use_semantic: bool = True,
-        fts_weight: float = 0.4,
-        semantic_weight: float = 0.6
+        use_graph: bool = True,
+        fts_weight: float = 0.3,
+        semantic_weight: float = 0.5,
+        graph_weight: float = 0.2
     ) -> List[QueryResult]:
         """
-        Busca híbrida combinando FTS e semantic.
+        Busca híbrida combinando FTS, semantic e graph.
         Args:
             query: Texto de busca
@@ -66,8 +73,10 @@ class QueryEngine:
             limit: Limite de resultados
             use_fts: Usar busca FTS
             use_semantic: Usar busca semantic
+            use_graph: Usar busca por graph
             fts_weight: Peso da busca FTS
             semantic_weight: Peso da busca semantic
+            graph_weight: Peso da busca graph
         Returns:
             Lista de resultados ordenados por relevância
@@ -78,6 +87,7 @@ class QueryEngine:
         if use_fts:
             fts_results = self._search_fts(query, project, mem_type, limit)
             for r in fts_results:
+                r.score *= fts_weight  # Aplica peso FTS desde o início
                 results[r.memory_id] = r
         # Busca Semantic
@@ -85,12 +95,9 @@ class QueryEngine:
             semantic_results = self._search_semantic(query, project, limit)
             for r in semantic_results:
                 if r.memory_id in results:
-                    # Combina scores
+                    # Combina scores: média ponderada (FTS já tem peso aplicado)
                     existing = results[r.memory_id]
-                    combined_score = (
-                        existing.score * fts_weight +
-                        r.score * semantic_weight
-                    )
+                    combined_score = existing.score + (r.score * semantic_weight)
                     results[r.memory_id] = QueryResult(
                         memory_id=r.memory_id,
                         type=r.type,
@@ -104,6 +111,27 @@ class QueryEngine:
                     r.score *= semantic_weight
                     results[r.memory_id] = r
+        # Busca por Graph (entidades)
+        if use_graph and self.entities_db:
+            graph_results = self._search_by_graph(query, limit)
+            for r in graph_results:
+                if r.memory_id in results:
+                    # Combina scores: soma ponderada (scores anteriores já têm peso)
+                    existing = results[r.memory_id]
+                    combined_score = existing.score + (r.score * graph_weight)
+                    results[r.memory_id] = QueryResult(
+                        memory_id=r.memory_id,
+                        type=r.type,
+                        project=r.project,
+                        title=r.title,
+                        score=combined_score,
+                        source="hybrid",
+                        metadata=r.metadata
+                    )
+                else:
+                    r.score *= graph_weight
+                    results[r.memory_id] = r
         # Filtra por tipo se especificado
         if mem_type:
             results = {
@@ -198,6 +226,48 @@ class QueryEngine:
         return results
+    def _search_by_graph(
+        self,
+        query: str,
+        limit: int
+    ) -> List[QueryResult]:
+        """
+        Busca por entidades no grafo.
+        Extrai entidades da query e retorna memórias conectadas.
+        Args:
+            query: Texto de busca
+            limit: Limite de resultados
+        Returns:
+            Lista de resultados com score de grafo
+        """
+        if not self.entities_db:
+            return []
+        graph_results = self.entities_db.search_by_query(query, limit * 2)
+        results = []
+        for item in graph_results:
+            # Busca metadados adicionais
+            memory = self.metadata_db.get_by_id(item["memory_id"])
+            results.append(QueryResult(
+                memory_id=item["memory_id"],
+                type=memory.get("type", "unknown") if memory else "unknown",
+                project=memory.get("project", "unknown") if memory else "unknown",
+                title=memory.get("title", item["memory_id"]) if memory else item["memory_id"],
+                score=item["score"],
+                source="graph",
+                metadata={
+                    "matched_entities": item.get("matched_entities", []),
+                    "graph_score": item["score"]
+                }
+            ))
+        return results
     def search_by_metadata(
         self,
         project: Optional[str] = None,

package/src/mcp/server.py CHANGED Viewed

@@ -26,6 +26,7 @@ from src.consolidation.extractor import Extractor
 from src.consolidation.promoter import Promoter
 from src.index.metadata_db import MetadataDB
 from src.index.embeddings_db import EmbeddingsDB
+from src.index.entities_db import EntitiesDB
 from src.index.queries import QueryEngine
 from src.hooks.custom_loader import HooksLoader, HookRunner
 from src.diff.memory_diff import MemoryDiff
@@ -81,10 +82,15 @@ class CerebroMCP:
         self.metadata_db = MetadataDB(self.cerebro_path / "index" / "metadata.db")
         self.embeddings_db = EmbeddingsDB(self.cerebro_path / "index" / "embeddings.db")
-        self.query_engine = QueryEngine(self.metadata_db, self.embeddings_db)
+        self.entities_db = EntitiesDB(self.cerebro_path / "index" / "entities.db")
+        self.query_engine = QueryEngine(self.metadata_db, self.embeddings_db, self.entities_db)
         self.extractor = Extractor(self.raw_storage, self.working_storage)
-        self.promoter = Promoter(self.working_storage, self.official_storage)
+        self.promoter = Promoter(
+            self.working_storage,
+            self.official_storage,
+            self.cerebro_path / "index" / "entities.db"
+        )
         self.memory_view = MemoryView(
             self.cerebro_path,
@@ -340,6 +346,31 @@ class CerebroMCP:
                         }
                     }
                 }
+            ),
+            Tool(
+                name="cerebro_graph",
+                description="Explora grafo de entidades - mostra conexões entre projetos, tecnologias, pessoas e decisões",
+                inputSchema={
+                    "type": "object",
+                    "properties": {
+                        "entity": {
+                            "type": "string",
+                            "description": "Nome da entidade para iniciar traversal (ex: 'MedicsPro', 'JWT', 'autenticação')"
+                        },
+                        "depth": {
+                            "type": "integer",
+                            "description": "Profundidade máxima do traversal (1-3, padrão: 2)",
+                            "default": 2
+                        },
+                        "types": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Filtrar por tipos de entidade (ex: ['ORG', 'TECH'])",
+                            "default": ["ORG", "TECH", "PERSON", "PROJECT"]
+                        }
+                    },
+                    "required": ["entity"]
+                }
             )
         ]
@@ -377,6 +408,8 @@ class CerebroMCP:
                 result = self._gc(arguments)
             elif name == "cerebro_capture_memory":
                 result = self._capture_memory(arguments)
+            elif name == "cerebro_graph":
+                result = self._cerebro_graph(arguments)
             else:
                 return [TextContent(type="text", text=f"Ferramenta desconhecida: {name}")]
@@ -499,7 +532,8 @@ class CerebroMCP:
             "",
             "Índice:",
             f"  Metadata DB: {self.cerebro_path / 'index' / 'metadata.db'}",
-            f"  Embeddings DB: {self.cerebro_path / 'index' / 'embeddings.db'}"
+            f"  Embeddings DB: {self.cerebro_path / 'index' / 'embeddings.db'}",
+            f"  Entities DB: {self.cerebro_path / 'index' / 'entities.db'}"
         ]
         return "\n".join(lines)
@@ -679,8 +713,14 @@ Uma chamada por memória. O sistema salva e indexa automaticamente.
         desc_match = re.search(r'description:\s*(.*)', content)
         type_match = re.search(r'type:\s*(.*)', content)
+        project_match = re.search(r'project:\s*(.*)', content)
+        tags_match = re.search(r'tags:\s*(.*)', content)
         desc = desc_match.group(1).strip() if desc_match else "sem descrição"
         m_type = type_match.group(1).strip() if type_match else "project"
+        project = project_match.group(1).strip() if project_match else "unknown"
+        tags = tags_match.group(1).strip() if tags_match else ""
         ts = datetime.now().strftime("%Y-%m-%d")
         entry = f"- [{m_type}] {mem_name}.md ({ts}): {desc}\n"
@@ -693,6 +733,29 @@ Uma chamada por memória. O sistema salva e indexa automaticamente.
         else:
             index_path.write_text(f"# Memórias do Projeto\n\n{entry}", encoding="utf-8")
+        # BUG FIX: Registrar entidades no grafo (frontmatter + conteúdo)
+        if self.entities_db:
+            import yaml
+            frontmatter_match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL)
+            if frontmatter_match:
+                try:
+                    frontmatter = yaml.safe_load(frontmatter_match.group(1))
+                    # Extrai entidades do frontmatter
+                    self.entities_db.extract_from_frontmatter(
+                        memory_id=mem_name,
+                        frontmatter=frontmatter or {},
+                        project=project
+                    )
+                    # Extrai entidades do conteúdo (spaCy NER)
+                    body_content = frontmatter_match.group(2)
+                    self.entities_db.extract_from_content(
+                        memory_id=mem_name,
+                        content=body_content,
+                        use_spacy=True
+                    )
+                except Exception as e:
+                    pass  # Falha silenciosa se frontmatter inválido
         return f"✅ Memória '{mem_name}' salva em {file_path}"
     def _remember(self, args: Dict[str, Any]) -> str:
@@ -717,6 +780,86 @@ Uma chamada por memória. O sistema salva e indexa automaticamente.
         )
         return gc.generate_gc_report(results)
+    def _cerebro_graph(self, args: Dict[str, Any]) -> str:
+        """Explora grafo de entidades"""
+        entity = args.get("entity")
+        if not entity:
+            return "Erro: 'entity' é obrigatório para cerebro_graph"
+        depth = args.get("depth", 2)
+        entity_types = args.get("types", ["ORG", "TECH", "PERSON", "PROJECT"])
+        # Limita profundidade máxima para evitar traversal muito grande
+        depth = min(depth, 3)
+        nodes, edges = self.entities_db.traverse(
+            start_entity=entity,
+            depth=depth,
+            entity_types=entity_types,
+            max_nodes=50
+        )
+        if not nodes:
+            return f"Nenhuma entidade encontrada para '{entity}'"
+        # Formata grafo como árvore
+        return self._format_graph(nodes, edges, entity)
+    def _format_graph(
+        self,
+        nodes: List[Dict[str, Any]],
+        edges: List[Dict[str, Any]],
+        root_entity: str
+    ) -> str:
+        """Formata grafo como árvore visual"""
+        lines = [f"## Grafo de '{root_entity}'\n"]
+        lines.append(f"**{len(nodes)}** entidades encontradas, **{len(edges)}** conexões\n")
+        # Constroi adjacency list
+        adj: Dict[str, List[Dict[str, Any]]] = {}
+        for edge in edges:
+            source = edge["source"].lower()
+            if source not in adj:
+                adj[source] = []
+            adj[source].append(edge)
+        # BFS para imprimir árvore
+        visited = set()
+        queue = [(root_entity.lower(), 0)]
+        while queue:
+            entity_name, depth = queue.pop(0)
+            if entity_name in visited:
+                continue
+            visited.add(entity_name)
+            # Encontra nó correspondente
+            node = next((n for n in nodes if n["name"].lower() == entity_name), None)
+            if not node:
+                continue
+            # Imprime nó
+            prefix = "  " * depth
+            connector = "├─ " if depth > 0 else ""
+            lines.append(f"{prefix}{connector}{node['name']} ({node['type']})")
+            # Adiciona filhos na fila
+            if depth < 3:
+                children = adj.get(entity_name, [])
+                for child in children:
+                    child_name = child["target"].lower()
+                    if child_name not in visited:
+                        queue.append((child_name, depth + 1))
+        # Lista todas as arestas
+        if edges:
+            lines.append("\n## Conexões")
+            for edge in edges:
+                lines.append(f"- {edge['source']} → {edge['target']} ({edge['type']})")
+        return "\n".join(lines)
 async def main():
     """Entry point do MCP Server"""