ocerebro 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ocerebro",
3
- "version": "0.2.3",
3
+ "version": "0.3.0",
4
4
  "description": "OCerebro - Sistema de Memoria para Agentes (Claude Code/MCP)",
5
5
  "main": "bin/ocerebro.js",
6
6
  "bin": {
package/pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ocerebro"
7
- version = "0.2.3"
7
+ version = "0.3.0"
8
8
  description = "OCerebro - Sistema de Memoria para Agentes (Claude Code/MCP)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -37,7 +37,8 @@ class Promoter:
37
37
  def __init__(
38
38
  self,
39
39
  working_storage: YAMLStorage,
40
- official_storage: MarkdownStorage
40
+ official_storage: MarkdownStorage,
41
+ entities_db_path: Optional[Path] = None
41
42
  ):
42
43
  """
43
44
  Inicializa o Promoter.
@@ -45,9 +46,12 @@ class Promoter:
45
46
  Args:
46
47
  working_storage: Instância do YAMLStorage
47
48
  official_storage: Instância do MarkdownStorage
49
+ entities_db_path: Path para o EntitiesDB (opcional)
48
50
  """
49
51
  self.working_storage = working_storage
50
52
  self.official_storage = official_storage
53
+ self.entities_db_path = entities_db_path
54
+ self._entities_db = None
51
55
 
52
56
  def promote_session(
53
57
  self,
@@ -223,6 +227,9 @@ class Promoter:
223
227
  content=content
224
228
  )
225
229
 
230
+ # Extrai entidades do frontmatter e registra no grafo
231
+ self._extract_entities_from_frontmatter(draft_id, frontmatter, project)
232
+
226
233
  return PromotionResult(
227
234
  success=True,
228
235
  source_type=draft.get("type", "session"),
@@ -300,6 +307,9 @@ class Promoter:
300
307
  content=body
301
308
  )
302
309
 
310
+ # Extrai entidades do frontmatter e registra no grafo
311
+ self._extract_entities_from_frontmatter(draft_id, frontmatter, project)
312
+
303
313
  return PromotionResult(
304
314
  success=True,
305
315
  source_type=draft.get("type", "session"),
@@ -433,3 +443,37 @@ class Promoter:
433
443
  if existing:
434
444
  existing.update(draft)
435
445
  self.working_storage.write_feature(project, draft_id, existing)
446
+
447
+ @property
448
+ def entities_db(self):
449
+ """Lazy load do EntitiesDB"""
450
+ if self._entities_db is None and self.entities_db_path:
451
+ from src.index.entities_db import EntitiesDB
452
+ self._entities_db = EntitiesDB(self.entities_db_path)
453
+ return self._entities_db
454
+
455
+ def _extract_entities_from_frontmatter(
456
+ self,
457
+ memory_id: str,
458
+ frontmatter: Dict[str, Any],
459
+ project: str
460
+ ) -> List[str]:
461
+ """
462
+ Extrai entidades do frontmatter e registra no grafo.
463
+
464
+ Args:
465
+ memory_id: ID da memória
466
+ frontmatter: Dicionário com metadados
467
+ project: Nome do projeto
468
+
469
+ Returns:
470
+ Lista de IDs de entidades criadas
471
+ """
472
+ if not self.entities_db:
473
+ return []
474
+
475
+ return self.entities_db.extract_from_frontmatter(
476
+ memory_id=memory_id,
477
+ frontmatter=frontmatter,
478
+ project=project
479
+ )
@@ -0,0 +1,581 @@
1
+ """EntitiesDB: Grafo de experiência com entidades e relacionamentos"""
2
+
3
+ import sqlite3
4
+ import re
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List, Optional, Tuple, Set
7
+ from datetime import datetime
8
+ from collections import deque
9
+
10
+
11
+ class EntitiesDB:
12
+ """
13
+ Banco de dados para grafo de experiência usando SQLite.
14
+
15
+ Armazena entidades extraídas de memórias (ORG, PERSON, TECH, etc)
16
+ e relacionamentos entre elas. Permite busca associativa por traversal.
17
+
18
+ Diferencial vs LightRAG:
19
+ - Extração local com spaCy NER (offline, grátis)
20
+ - Frontmatter como nós iniciais (sem LLM)
21
+ - Arestas implícitas por projeto/tags/type
22
+ """
23
+
24
+ def __init__(self, db_path: Path):
25
+ """
26
+ Inicializa o EntitiesDB.
27
+
28
+ Args:
29
+ db_path: Path para o arquivo do banco
30
+ """
31
+ self.db_path = db_path
32
+ db_path.parent.mkdir(parents=True, exist_ok=True)
33
+ self._init_schema()
34
+
35
+ def _connect(self) -> sqlite3.Connection:
36
+ """Cria conexão com o banco"""
37
+ conn = sqlite3.connect(self.db_path)
38
+ conn.row_factory = sqlite3.Row
39
+ conn.execute("PRAGMA journal_mode=WAL")
40
+ return conn
41
+
42
+ def _init_schema(self):
43
+ """Cria schema do banco"""
44
+ conn = self._connect()
45
+
46
+ # Tabela de entidades
47
+ conn.execute("""
48
+ CREATE TABLE IF NOT EXISTS entities (
49
+ id TEXT PRIMARY KEY,
50
+ memory_id TEXT,
51
+ entity_name TEXT,
52
+ entity_type TEXT,
53
+ confidence REAL DEFAULT 1.0,
54
+ span_start INTEGER,
55
+ span_end INTEGER,
56
+ context_snippet TEXT,
57
+ created_at TEXT DEFAULT (datetime('now')),
58
+ FOREIGN KEY (memory_id) REFERENCES memories(id)
59
+ )
60
+ """)
61
+
62
+ # Índices para performance
63
+ conn.execute("""
64
+ CREATE INDEX IF NOT EXISTS idx_entities_name
65
+ ON entities(entity_name)
66
+ """)
67
+
68
+ conn.execute("""
69
+ CREATE INDEX IF NOT EXISTS idx_entities_type
70
+ ON entities(entity_type)
71
+ """)
72
+
73
+ conn.execute("""
74
+ CREATE INDEX IF NOT EXISTS idx_entities_memory
75
+ ON entities(memory_id)
76
+ """)
77
+
78
+ # Tabela de relacionamentos
79
+ conn.execute("""
80
+ CREATE TABLE IF NOT EXISTS entity_relationships (
81
+ id TEXT PRIMARY KEY,
82
+ source_entity TEXT,
83
+ target_entity TEXT,
84
+ relationship_type TEXT,
85
+ memory_id TEXT,
86
+ created_at TEXT DEFAULT (datetime('now')),
87
+ FOREIGN KEY (memory_id) REFERENCES memories(id)
88
+ )
89
+ """)
90
+
91
+ conn.execute("""
92
+ CREATE INDEX IF NOT EXISTS idx_relationships_source
93
+ ON entity_relationships(source_entity)
94
+ """)
95
+
96
+ conn.execute("""
97
+ CREATE INDEX IF NOT EXISTS idx_relationships_target
98
+ ON entity_relationships(target_entity)
99
+ """)
100
+
101
+ conn.commit()
102
+ conn.close()
103
+
104
+ # ========================================================================
105
+ # OPERAÇÕES DE ENTIDADES
106
+ # ========================================================================
107
+
108
+ def insert_entity(
109
+ self,
110
+ memory_id: str,
111
+ entity_name: str,
112
+ entity_type: str,
113
+ confidence: float = 1.0,
114
+ span_start: int = 0,
115
+ span_end: int = 0,
116
+ context_snippet: str = ""
117
+ ) -> str:
118
+ """
119
+ Insere uma entidade.
120
+
121
+ Args:
122
+ memory_id: ID da memória de origem
123
+ entity_name: Nome da entidade
124
+ entity_type: Tipo (ORG, PERSON, TECH, etc)
125
+ confidence: Confiança da extração (0-1)
126
+ span_start: Posição inicial no texto
127
+ span_end: Posição final no texto
128
+ context_snippet: Contexto ao redor da entidade
129
+
130
+ Returns:
131
+ ID da entidade
132
+ """
133
+ entity_id = f"ent_{memory_id}_{entity_name.lower().replace(' ', '_')}"
134
+
135
+ conn = self._connect()
136
+ conn.execute("""
137
+ INSERT OR REPLACE INTO entities
138
+ (id, memory_id, entity_name, entity_type, confidence, span_start, span_end, context_snippet)
139
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
140
+ """, (
141
+ entity_id,
142
+ memory_id,
143
+ entity_name,
144
+ entity_type,
145
+ confidence,
146
+ span_start,
147
+ span_end,
148
+ context_snippet
149
+ ))
150
+ conn.commit()
151
+ conn.close()
152
+
153
+ return entity_id
154
+
155
+ def get_entities_by_memory(self, memory_id: str) -> List[Dict[str, Any]]:
156
+ """
157
+ Obtém entidades de uma memória.
158
+
159
+ Args:
160
+ memory_id: ID da memória
161
+
162
+ Returns:
163
+ Lista de entidades
164
+ """
165
+ conn = self._connect()
166
+ cursor = conn.execute(
167
+ "SELECT * FROM entities WHERE memory_id = ?",
168
+ (memory_id,)
169
+ )
170
+ results = [dict(row) for row in cursor.fetchall()]
171
+ conn.close()
172
+ return results
173
+
174
+ def get_entities_by_name(self, entity_name: str) -> List[Dict[str, Any]]:
175
+ """
176
+ Busca entidades por nome (case-insensitive).
177
+
178
+ Args:
179
+ entity_name: Nome da entidade
180
+
181
+ Returns:
182
+ Lista de entidades
183
+ """
184
+ conn = self._connect()
185
+ cursor = conn.execute(
186
+ "SELECT * FROM entities WHERE LOWER(entity_name) = LOWER(?)",
187
+ (entity_name,)
188
+ )
189
+ results = [dict(row) for row in cursor.fetchall()]
190
+ conn.close()
191
+ return results
192
+
193
+ def delete_entities_by_memory(self, memory_id: str) -> int:
194
+ """
195
+ Remove entidades de uma memória.
196
+
197
+ Args:
198
+ memory_id: ID da memória
199
+
200
+ Returns:
201
+ Número de entidades removidas
202
+ """
203
+ conn = self._connect()
204
+ cursor = conn.execute(
205
+ "DELETE FROM entities WHERE memory_id = ?",
206
+ (memory_id,)
207
+ )
208
+ deleted = cursor.rowcount
209
+
210
+ # Remove relacionamentos também
211
+ conn.execute(
212
+ "DELETE FROM entity_relationships WHERE memory_id = ?",
213
+ (memory_id,)
214
+ )
215
+
216
+ conn.commit()
217
+ conn.close()
218
+ return deleted
219
+
220
+ # ========================================================================
221
+ # OPERAÇÕES DE RELACIONAMENTOS
222
+ # ========================================================================
223
+
224
+ def insert_relationship(
225
+ self,
226
+ source_entity: str,
227
+ target_entity: str,
228
+ relationship_type: str,
229
+ memory_id: str
230
+ ) -> str:
231
+ """
232
+ Insere relacionamento entre entidades.
233
+
234
+ Args:
235
+ source_entity: Nome da entidade origem
236
+ target_entity: Nome da entidade alvo
237
+ relationship_type: Tipo do relacionamento
238
+ memory_id: ID da memória de origem
239
+
240
+ Returns:
241
+ ID do relacionamento
242
+ """
243
+ rel_id = f"rel_{source_entity}_{target_entity}_{memory_id}"
244
+
245
+ conn = self._connect()
246
+ conn.execute("""
247
+ INSERT OR REPLACE INTO entity_relationships
248
+ (id, source_entity, target_entity, relationship_type, memory_id)
249
+ VALUES (?, ?, ?, ?, ?)
250
+ """, (rel_id, source_entity, target_entity, relationship_type, memory_id))
251
+ conn.commit()
252
+ conn.close()
253
+
254
+ return rel_id
255
+
256
+ def get_relationships(self, entity_name: str) -> List[Dict[str, Any]]:
257
+ """
258
+ Obtém relacionamentos de uma entidade.
259
+
260
+ Args:
261
+ entity_name: Nome da entidade
262
+
263
+ Returns:
264
+ Lista de relacionamentos (ida e volta)
265
+ """
266
+ conn = self._connect()
267
+
268
+ # Relacionamentos onde é origem
269
+ cursor = conn.execute("""
270
+ SELECT 'outgoing' as direction, r.*, e.entity_type as target_type
271
+ FROM entity_relationships r
272
+ LEFT JOIN entities e ON LOWER(e.entity_name) = LOWER(r.target_entity)
273
+ WHERE LOWER(r.source_entity) = LOWER(?)
274
+ """, (entity_name,))
275
+ outgoing = [dict(row) for row in cursor.fetchall()]
276
+
277
+ # Relacionamentos onde é alvo
278
+ cursor = conn.execute("""
279
+ SELECT 'incoming' as direction, r.*, e.entity_type as source_type
280
+ FROM entity_relationships r
281
+ LEFT JOIN entities e ON LOWER(e.entity_name) = LOWER(r.source_entity)
282
+ WHERE LOWER(r.target_entity) = LOWER(?)
283
+ """, (entity_name,))
284
+ incoming = [dict(row) for row in cursor.fetchall()]
285
+
286
+ conn.close()
287
+ return outgoing + incoming
288
+
289
+ # ========================================================================
290
+ # TRAVESSIA DO GRAFO (BFS)
291
+ # ========================================================================
292
+
293
+ def traverse(
294
+ self,
295
+ start_entity: str,
296
+ depth: int = 2,
297
+ entity_types: Optional[List[str]] = None,
298
+ max_nodes: int = 50
299
+ ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
300
+ """
301
+ Faz traversal BFS a partir de uma entidade.
302
+
303
+ Args:
304
+ start_entity: Nome da entidade inicial
305
+ depth: Profundidade máxima (1-3 recomendado)
306
+ entity_types: Filtrar por tipos de entidade
307
+ max_nodes: Limite máximo de nós retornados
308
+
309
+ Returns:
310
+ Tupla (nodes, edges) para visualização do grafo
311
+ """
312
+ nodes: Dict[str, Dict[str, Any]] = {}
313
+ edges: List[Dict[str, Any]] = []
314
+
315
+ # Fila BFS: (entidade, profundidade)
316
+ queue: deque = deque([(start_entity, 0)])
317
+ visited: Set[str] = set()
318
+
319
+ while queue and len(nodes) < max_nodes:
320
+ entity_name, current_depth = queue.popleft()
321
+
322
+ if entity_name in visited:
323
+ continue
324
+ visited.add(entity_name)
325
+
326
+ # Busca entidade
327
+ entities = self.get_entities_by_name(entity_name)
328
+ if not entities:
329
+ continue
330
+
331
+ entity = entities[0] # Pega primeiro match
332
+
333
+ # Filtra por tipo se especificado
334
+ if entity_types and entity["entity_type"] not in entity_types:
335
+ continue
336
+
337
+ # Adiciona nó
338
+ node_key = entity["entity_name"].lower()
339
+ if node_key not in nodes:
340
+ nodes[node_key] = {
341
+ "name": entity["entity_name"],
342
+ "type": entity["entity_type"],
343
+ "depth": current_depth,
344
+ "memory_count": 1
345
+ }
346
+ else:
347
+ nodes[node_key]["memory_count"] += 1
348
+
349
+ # Se atingiu profundidade máxima, não expande
350
+ if current_depth >= depth:
351
+ continue
352
+
353
+ # Busca relacionamentos
354
+ relationships = self.get_relationships(entity_name)
355
+
356
+ for rel in relationships:
357
+ if rel["direction"] == "outgoing":
358
+ target = rel["target_entity"]
359
+ source = rel["source_entity"]
360
+ else:
361
+ target = rel["target_entity"]
362
+ source = rel["source_entity"]
363
+
364
+ # Adiciona aresta
365
+ edge_key = f"{source.lower()}_{target.lower()}"
366
+ if not any(e.get("key") == edge_key for e in edges):
367
+ edges.append({
368
+ "key": edge_key,
369
+ "source": source,
370
+ "target": target,
371
+ "type": rel["relationship_type"],
372
+ "memory_id": rel.get("memory_id")
373
+ })
374
+
375
+ # Adiciona próximo nó na fila
376
+ next_entity = target if rel["direction"] == "outgoing" else source
377
+ if next_entity not in visited:
378
+ queue.append((next_entity, current_depth + 1))
379
+
380
+ return list(nodes.values()), edges
381
+
382
+ # ========================================================================
383
+ # BUSCA POR ENTIDADES (integra com QueryEngine)
384
+ # ========================================================================
385
+
386
+ def search_by_query(self, query: str, limit: int = 20) -> List[Dict[str, Any]]:
387
+ """
388
+ Busca memórias por entidades relacionadas à query.
389
+
390
+ Extrai entidades da query e retorna memórias conectadas.
391
+
392
+ Args:
393
+ query: Texto de busca
394
+ limit: Limite de resultados
395
+
396
+ Returns:
397
+ Lista de memórias com score de grafo
398
+ """
399
+ # Tenta extrair entidades da query (palavras-chave)
400
+ query_entities = self._extract_query_entities(query)
401
+
402
+ if not query_entities:
403
+ return []
404
+
405
+ # Busca memórias conectadas às entidades
406
+ conn = self._connect()
407
+
408
+ results = {}
409
+ for entity_name in query_entities:
410
+ cursor = conn.execute("""
411
+ SELECT DISTINCT e.memory_id, e.entity_name, e.entity_type,
412
+ COUNT(*) as entity_count
413
+ FROM entities e
414
+ WHERE LOWER(e.entity_name) = LOWER(?)
415
+ GROUP BY e.memory_id
416
+ ORDER BY entity_count DESC
417
+ LIMIT ?
418
+ """, (entity_name, limit))
419
+
420
+ for row in cursor.fetchall():
421
+ memory_id = row["memory_id"]
422
+ if memory_id not in results:
423
+ results[memory_id] = {
424
+ "memory_id": memory_id,
425
+ "matched_entities": [],
426
+ "score": 0.0
427
+ }
428
+
429
+ results[memory_id]["matched_entities"].append({
430
+ "name": row["entity_name"],
431
+ "type": row["entity_type"]
432
+ })
433
+ results[memory_id]["score"] += 0.5 # Score base por entidade
434
+
435
+ conn.close()
436
+
437
+ # Normaliza scores
438
+ if results:
439
+ max_score = max(r["score"] for r in results.values())
440
+ for r in results.values():
441
+ r["score"] = r["score"] / max_score if max_score > 0 else 0
442
+
443
+ return list(results.values())
444
+
445
+ def _extract_query_entities(self, query: str) -> List[str]:
446
+ """
447
+ Extrai possíveis entidades de uma query.
448
+
449
+ Usa heurísticas simples (sem spaCy para evitar dependência aqui):
450
+ - Palavras capitalizadas
451
+ - Termos entre aspas
452
+ - Acrônimos
453
+
454
+ Args:
455
+ query: Texto de busca
456
+
457
+ Returns:
458
+ Lista de nomes de entidades candidatas
459
+ """
460
+ entities = set()
461
+
462
+ # Termos entre aspas
463
+ quoted = re.findall(r'"([^"]+)"', query)
464
+ entities.update(quoted)
465
+
466
+ # Palavras capitalizadas (prováveis nomes próprios)
467
+ capitalized = re.findall(r'\b[A-Z][a-zA-Z]*\b', query)
468
+ entities.update(capitalized)
469
+
470
+ # Acrônimos
471
+ acronyms = re.findall(r'\b[A-Z]{2,}\b', query)
472
+ entities.update(acronyms)
473
+
474
+ # Remove stop words e termos muito curtos
475
+ stop_words = {"A", "O", "Os", "As", "Um", "Uma", "Em", "De", "Do", "Da", "Com", "Por", "Para"}
476
+ entities = {e for e in entities if e not in stop_words and len(e) > 2}
477
+
478
+ return list(entities)
479
+
480
+ # ========================================================================
481
+ # MÉTODOS DE INTEGRAÇÃO COM FRONTMATTER
482
+ # ========================================================================
483
+
484
+ def extract_from_frontmatter(
485
+ self,
486
+ memory_id: str,
487
+ frontmatter: Dict[str, Any],
488
+ project: str
489
+ ) -> List[str]:
490
+ """
491
+ Extrai entidades do frontmatter de uma memória.
492
+
493
+ Args:
494
+ memory_id: ID da memória
495
+ frontmatter: Dicionário com metadados
496
+ project: Nome do projeto
497
+
498
+ Returns:
499
+ Lista de IDs de entidades criadas
500
+ """
501
+ entity_ids = []
502
+
503
+ # Type como entidade
504
+ if "type" in frontmatter:
505
+ eid = self.insert_entity(
506
+ memory_id,
507
+ f"TYPE:{frontmatter['type']}",
508
+ "META",
509
+ confidence=1.0
510
+ )
511
+ entity_ids.append(eid)
512
+
513
+ # Project como entidade
514
+ if project:
515
+ eid = self.insert_entity(
516
+ memory_id,
517
+ project,
518
+ "PROJECT",
519
+ confidence=1.0
520
+ )
521
+ entity_ids.append(eid)
522
+
523
+ # Tags como entidades
524
+ if "tags" in frontmatter:
525
+ tags = frontmatter.get("tags", "")
526
+ if isinstance(tags, str):
527
+ for tag in [t.strip() for t in tags.split(",") if t.strip()]:
528
+ eid = self.insert_entity(
529
+ memory_id,
530
+ f"TAG:{tag}",
531
+ "TAG",
532
+ confidence=1.0
533
+ )
534
+ entity_ids.append(eid)
535
+ elif isinstance(tags, list):
536
+ for tag in tags:
537
+ eid = self.insert_entity(
538
+ memory_id,
539
+ f"TAG:{tag}",
540
+ "TAG",
541
+ confidence=1.0
542
+ )
543
+ entity_ids.append(eid)
544
+
545
+ return entity_ids
546
+
547
+ # ========================================================================
548
+ # ESTATÍSTICAS
549
+ # ========================================================================
550
+
551
+ def get_stats(self) -> Dict[str, Any]:
552
+ """
553
+ Obtém estatísticas do grafo.
554
+
555
+ Returns:
556
+ Dicionário com estatísticas
557
+ """
558
+ conn = self._connect()
559
+
560
+ total_entities = conn.execute(
561
+ "SELECT COUNT(*) FROM entities"
562
+ ).fetchone()[0]
563
+
564
+ total_relationships = conn.execute(
565
+ "SELECT COUNT(*) FROM entity_relationships"
566
+ ).fetchone()[0]
567
+
568
+ by_type = conn.execute(
569
+ "SELECT entity_type, COUNT(*) FROM entities GROUP BY entity_type"
570
+ ).fetchall()
571
+
572
+ conn.close()
573
+
574
+ return {
575
+ "total_entities": total_entities,
576
+ "total_relationships": total_relationships,
577
+ "by_type": dict(by_type),
578
+ "avg_relationships_per_entity": (
579
+ total_relationships / total_entities if total_entities > 0 else 0
580
+ )
581
+ }
@@ -6,6 +6,7 @@ from dataclasses import dataclass
6
6
 
7
7
  from .metadata_db import MetadataDB
8
8
  from .embeddings_db import EmbeddingsDB
9
+ from .entities_db import EntitiesDB
9
10
 
10
11
 
11
12
  @dataclass
@@ -16,7 +17,7 @@ class QueryResult:
16
17
  project: str
17
18
  title: str
18
19
  score: float
19
- source: str # 'fts', 'semantic', 'metadata'
20
+ source: str # 'fts', 'semantic', 'metadata', 'graph'
20
21
  metadata: Dict[str, Any] = None
21
22
 
22
23
 
@@ -24,16 +25,18 @@ class QueryEngine:
24
25
  """
25
26
  Engine de consultas híbridas.
26
27
 
27
- Combina três tipos de busca:
28
+ Combina quatro tipos de busca:
28
29
  - Metadata: filtros estruturados (projeto, tipo, tags)
29
30
  - FTS: busca full-text no conteúdo
30
31
  - Semantic: busca por similaridade de embeddings
32
+ - Graph: busca por entidades e relacionamentos
31
33
  """
32
34
 
33
35
  def __init__(
34
36
  self,
35
37
  metadata_db: MetadataDB,
36
- embeddings_db: EmbeddingsDB
38
+ embeddings_db: EmbeddingsDB,
39
+ entities_db: Optional[EntitiesDB] = None
37
40
  ):
38
41
  """
39
42
  Inicializa o QueryEngine.
@@ -41,9 +44,11 @@ class QueryEngine:
41
44
  Args:
42
45
  metadata_db: Instância do MetadataDB
43
46
  embeddings_db: Instância do EmbeddingsDB
47
+ entities_db: Instância do EntitiesDB (opcional)
44
48
  """
45
49
  self.metadata_db = metadata_db
46
50
  self.embeddings_db = embeddings_db
51
+ self.entities_db = entities_db
47
52
 
48
53
  def search(
49
54
  self,
@@ -53,11 +58,13 @@ class QueryEngine:
53
58
  limit: int = 10,
54
59
  use_fts: bool = True,
55
60
  use_semantic: bool = True,
56
- fts_weight: float = 0.4,
57
- semantic_weight: float = 0.6
61
+ use_graph: bool = True,
62
+ fts_weight: float = 0.3,
63
+ semantic_weight: float = 0.5,
64
+ graph_weight: float = 0.2
58
65
  ) -> List[QueryResult]:
59
66
  """
60
- Busca híbrida combinando FTS e semantic.
67
+ Busca híbrida combinando FTS, semantic e graph.
61
68
 
62
69
  Args:
63
70
  query: Texto de busca
@@ -66,8 +73,10 @@ class QueryEngine:
66
73
  limit: Limite de resultados
67
74
  use_fts: Usar busca FTS
68
75
  use_semantic: Usar busca semantic
76
+ use_graph: Usar busca por graph
69
77
  fts_weight: Peso da busca FTS
70
78
  semantic_weight: Peso da busca semantic
79
+ graph_weight: Peso da busca graph
71
80
 
72
81
  Returns:
73
82
  Lista de resultados ordenados por relevância
@@ -104,6 +113,30 @@ class QueryEngine:
104
113
  r.score *= semantic_weight
105
114
  results[r.memory_id] = r
106
115
 
116
+ # Busca por Graph (entidades)
117
+ if use_graph and self.entities_db:
118
+ graph_results = self._search_by_graph(query, limit)
119
+ for r in graph_results:
120
+ if r.memory_id in results:
121
+ # Combina scores
122
+ existing = results[r.memory_id]
123
+ combined_score = (
124
+ existing.score * (1 - graph_weight) +
125
+ r.score * graph_weight
126
+ )
127
+ results[r.memory_id] = QueryResult(
128
+ memory_id=r.memory_id,
129
+ type=r.type,
130
+ project=r.project,
131
+ title=r.title,
132
+ score=combined_score,
133
+ source="hybrid",
134
+ metadata=r.metadata
135
+ )
136
+ else:
137
+ r.score *= graph_weight
138
+ results[r.memory_id] = r
139
+
107
140
  # Filtra por tipo se especificado
108
141
  if mem_type:
109
142
  results = {
@@ -198,6 +231,48 @@ class QueryEngine:
198
231
 
199
232
  return results
200
233
 
234
+ def _search_by_graph(
235
+ self,
236
+ query: str,
237
+ limit: int
238
+ ) -> List[QueryResult]:
239
+ """
240
+ Busca por entidades no grafo.
241
+
242
+ Extrai entidades da query e retorna memórias conectadas.
243
+
244
+ Args:
245
+ query: Texto de busca
246
+ limit: Limite de resultados
247
+
248
+ Returns:
249
+ Lista de resultados com score de grafo
250
+ """
251
+ if not self.entities_db:
252
+ return []
253
+
254
+ graph_results = self.entities_db.search_by_query(query, limit * 2)
255
+
256
+ results = []
257
+ for item in graph_results:
258
+ # Busca metadados adicionais
259
+ memory = self.metadata_db.get_by_id(item["memory_id"])
260
+
261
+ results.append(QueryResult(
262
+ memory_id=item["memory_id"],
263
+ type=memory.get("type", "unknown") if memory else "unknown",
264
+ project=memory.get("project", "unknown") if memory else "unknown",
265
+ title=memory.get("title", item["memory_id"]) if memory else item["memory_id"],
266
+ score=item["score"],
267
+ source="graph",
268
+ metadata={
269
+ "matched_entities": item.get("matched_entities", []),
270
+ "graph_score": item["score"]
271
+ }
272
+ ))
273
+
274
+ return results
275
+
201
276
  def search_by_metadata(
202
277
  self,
203
278
  project: Optional[str] = None,
package/src/mcp/server.py CHANGED
@@ -26,6 +26,7 @@ from src.consolidation.extractor import Extractor
26
26
  from src.consolidation.promoter import Promoter
27
27
  from src.index.metadata_db import MetadataDB
28
28
  from src.index.embeddings_db import EmbeddingsDB
29
+ from src.index.entities_db import EntitiesDB
29
30
  from src.index.queries import QueryEngine
30
31
  from src.hooks.custom_loader import HooksLoader, HookRunner
31
32
  from src.diff.memory_diff import MemoryDiff
@@ -81,10 +82,15 @@ class CerebroMCP:
81
82
 
82
83
  self.metadata_db = MetadataDB(self.cerebro_path / "index" / "metadata.db")
83
84
  self.embeddings_db = EmbeddingsDB(self.cerebro_path / "index" / "embeddings.db")
84
- self.query_engine = QueryEngine(self.metadata_db, self.embeddings_db)
85
+ self.entities_db = EntitiesDB(self.cerebro_path / "index" / "entities.db")
86
+ self.query_engine = QueryEngine(self.metadata_db, self.embeddings_db, self.entities_db)
85
87
 
86
88
  self.extractor = Extractor(self.raw_storage, self.working_storage)
87
- self.promoter = Promoter(self.working_storage, self.official_storage)
89
+ self.promoter = Promoter(
90
+ self.working_storage,
91
+ self.official_storage,
92
+ self.cerebro_path / "index" / "entities.db"
93
+ )
88
94
 
89
95
  self.memory_view = MemoryView(
90
96
  self.cerebro_path,
@@ -340,6 +346,31 @@ class CerebroMCP:
340
346
  }
341
347
  }
342
348
  }
349
+ ),
350
+ Tool(
351
+ name="cerebro_graph",
352
+ description="Explora grafo de entidades - mostra conexões entre projetos, tecnologias, pessoas e decisões",
353
+ inputSchema={
354
+ "type": "object",
355
+ "properties": {
356
+ "entity": {
357
+ "type": "string",
358
+ "description": "Nome da entidade para iniciar traversal (ex: 'MedicsPro', 'JWT', 'autenticação')"
359
+ },
360
+ "depth": {
361
+ "type": "integer",
362
+ "description": "Profundidade máxima do traversal (1-3, padrão: 2)",
363
+ "default": 2
364
+ },
365
+ "types": {
366
+ "type": "array",
367
+ "items": {"type": "string"},
368
+ "description": "Filtrar por tipos de entidade (ex: ['ORG', 'TECH'])",
369
+ "default": ["ORG", "TECH", "PERSON", "PROJECT"]
370
+ }
371
+ },
372
+ "required": ["entity"]
373
+ }
343
374
  )
344
375
  ]
345
376
 
@@ -377,6 +408,8 @@ class CerebroMCP:
377
408
  result = self._gc(arguments)
378
409
  elif name == "cerebro_capture_memory":
379
410
  result = self._capture_memory(arguments)
411
+ elif name == "cerebro_graph":
412
+ result = self._cerebro_graph(arguments)
380
413
  else:
381
414
  return [TextContent(type="text", text=f"Ferramenta desconhecida: {name}")]
382
415
 
@@ -717,6 +750,86 @@ Uma chamada por memória. O sistema salva e indexa automaticamente.
717
750
  )
718
751
  return gc.generate_gc_report(results)
719
752
 
753
+ def _cerebro_graph(self, args: Dict[str, Any]) -> str:
754
+ """Explora grafo de entidades"""
755
+ entity = args.get("entity")
756
+ if not entity:
757
+ return "Erro: 'entity' é obrigatório para cerebro_graph"
758
+
759
+ depth = args.get("depth", 2)
760
+ entity_types = args.get("types", ["ORG", "TECH", "PERSON", "PROJECT"])
761
+
762
+ # Limita profundidade máxima para evitar traversal muito grande
763
+ depth = min(depth, 3)
764
+
765
+ nodes, edges = self.entities_db.traverse(
766
+ start_entity=entity,
767
+ depth=depth,
768
+ entity_types=entity_types,
769
+ max_nodes=50
770
+ )
771
+
772
+ if not nodes:
773
+ return f"Nenhuma entidade encontrada para '{entity}'"
774
+
775
+ # Formata grafo como árvore
776
+ return self._format_graph(nodes, edges, entity)
777
+
778
+ def _format_graph(
779
+ self,
780
+ nodes: List[Dict[str, Any]],
781
+ edges: List[Dict[str, Any]],
782
+ root_entity: str
783
+ ) -> str:
784
+ """Formata grafo como árvore visual"""
785
+ lines = [f"## Grafo de '{root_entity}'\n"]
786
+ lines.append(f"**{len(nodes)}** entidades encontradas, **{len(edges)}** conexões\n")
787
+
788
+ # Constroi adjacency list
789
+ adj: Dict[str, List[Dict[str, Any]]] = {}
790
+ for edge in edges:
791
+ source = edge["source"].lower()
792
+ if source not in adj:
793
+ adj[source] = []
794
+ adj[source].append(edge)
795
+
796
+ # BFS para imprimir árvore
797
+ visited = set()
798
+ queue = [(root_entity.lower(), 0)]
799
+
800
+ while queue:
801
+ entity_name, depth = queue.pop(0)
802
+
803
+ if entity_name in visited:
804
+ continue
805
+ visited.add(entity_name)
806
+
807
+ # Encontra nó correspondente
808
+ node = next((n for n in nodes if n["name"].lower() == entity_name), None)
809
+ if not node:
810
+ continue
811
+
812
+ # Imprime nó
813
+ prefix = " " * depth
814
+ connector = "├─ " if depth > 0 else ""
815
+ lines.append(f"{prefix}{connector}{node['name']} ({node['type']})")
816
+
817
+ # Adiciona filhos na fila
818
+ if depth < 3:
819
+ children = adj.get(entity_name, [])
820
+ for child in children:
821
+ child_name = child["target"].lower()
822
+ if child_name not in visited:
823
+ queue.append((child_name, depth + 1))
824
+
825
+ # Lista todas as arestas
826
+ if edges:
827
+ lines.append("\n## Conexões")
828
+ for edge in edges:
829
+ lines.append(f"- {edge['source']} → {edge['target']} ({edge['type']})")
830
+
831
+ return "\n".join(lines)
832
+
720
833
 
721
834
  async def main():
722
835
  """Entry point do MCP Server"""