ocerebro 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/consolidation/promoter.py +45 -1
- package/src/index/entities_db.py +581 -0
- package/src/index/queries.py +81 -6
- package/src/mcp/server.py +115 -2
package/package.json
CHANGED
package/pyproject.toml
CHANGED
|
@@ -37,7 +37,8 @@ class Promoter:
|
|
|
37
37
|
def __init__(
|
|
38
38
|
self,
|
|
39
39
|
working_storage: YAMLStorage,
|
|
40
|
-
official_storage: MarkdownStorage
|
|
40
|
+
official_storage: MarkdownStorage,
|
|
41
|
+
entities_db_path: Optional[Path] = None
|
|
41
42
|
):
|
|
42
43
|
"""
|
|
43
44
|
Inicializa o Promoter.
|
|
@@ -45,9 +46,12 @@ class Promoter:
|
|
|
45
46
|
Args:
|
|
46
47
|
working_storage: Instância do YAMLStorage
|
|
47
48
|
official_storage: Instância do MarkdownStorage
|
|
49
|
+
entities_db_path: Path para o EntitiesDB (opcional)
|
|
48
50
|
"""
|
|
49
51
|
self.working_storage = working_storage
|
|
50
52
|
self.official_storage = official_storage
|
|
53
|
+
self.entities_db_path = entities_db_path
|
|
54
|
+
self._entities_db = None
|
|
51
55
|
|
|
52
56
|
def promote_session(
|
|
53
57
|
self,
|
|
@@ -223,6 +227,9 @@ class Promoter:
|
|
|
223
227
|
content=content
|
|
224
228
|
)
|
|
225
229
|
|
|
230
|
+
# Extrai entidades do frontmatter e registra no grafo
|
|
231
|
+
self._extract_entities_from_frontmatter(draft_id, frontmatter, project)
|
|
232
|
+
|
|
226
233
|
return PromotionResult(
|
|
227
234
|
success=True,
|
|
228
235
|
source_type=draft.get("type", "session"),
|
|
@@ -300,6 +307,9 @@ class Promoter:
|
|
|
300
307
|
content=body
|
|
301
308
|
)
|
|
302
309
|
|
|
310
|
+
# Extrai entidades do frontmatter e registra no grafo
|
|
311
|
+
self._extract_entities_from_frontmatter(draft_id, frontmatter, project)
|
|
312
|
+
|
|
303
313
|
return PromotionResult(
|
|
304
314
|
success=True,
|
|
305
315
|
source_type=draft.get("type", "session"),
|
|
@@ -433,3 +443,37 @@ class Promoter:
|
|
|
433
443
|
if existing:
|
|
434
444
|
existing.update(draft)
|
|
435
445
|
self.working_storage.write_feature(project, draft_id, existing)
|
|
446
|
+
|
|
447
|
+
@property
|
|
448
|
+
def entities_db(self):
|
|
449
|
+
"""Lazy load do EntitiesDB"""
|
|
450
|
+
if self._entities_db is None and self.entities_db_path:
|
|
451
|
+
from src.index.entities_db import EntitiesDB
|
|
452
|
+
self._entities_db = EntitiesDB(self.entities_db_path)
|
|
453
|
+
return self._entities_db
|
|
454
|
+
|
|
455
|
+
def _extract_entities_from_frontmatter(
|
|
456
|
+
self,
|
|
457
|
+
memory_id: str,
|
|
458
|
+
frontmatter: Dict[str, Any],
|
|
459
|
+
project: str
|
|
460
|
+
) -> List[str]:
|
|
461
|
+
"""
|
|
462
|
+
Extrai entidades do frontmatter e registra no grafo.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
memory_id: ID da memória
|
|
466
|
+
frontmatter: Dicionário com metadados
|
|
467
|
+
project: Nome do projeto
|
|
468
|
+
|
|
469
|
+
Returns:
|
|
470
|
+
Lista de IDs de entidades criadas
|
|
471
|
+
"""
|
|
472
|
+
if not self.entities_db:
|
|
473
|
+
return []
|
|
474
|
+
|
|
475
|
+
return self.entities_db.extract_from_frontmatter(
|
|
476
|
+
memory_id=memory_id,
|
|
477
|
+
frontmatter=frontmatter,
|
|
478
|
+
project=project
|
|
479
|
+
)
|
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
"""EntitiesDB: Grafo de experiência com entidades e relacionamentos"""
|
|
2
|
+
|
|
3
|
+
import sqlite3
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Set
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from collections import deque
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class EntitiesDB:
|
|
12
|
+
"""
|
|
13
|
+
Banco de dados para grafo de experiência usando SQLite.
|
|
14
|
+
|
|
15
|
+
Armazena entidades extraídas de memórias (ORG, PERSON, TECH, etc)
|
|
16
|
+
e relacionamentos entre elas. Permite busca associativa por traversal.
|
|
17
|
+
|
|
18
|
+
Diferencial vs LightRAG:
|
|
19
|
+
- Extração local com spaCy NER (offline, grátis)
|
|
20
|
+
- Frontmatter como nós iniciais (sem LLM)
|
|
21
|
+
- Arestas implícitas por projeto/tags/type
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, db_path: Path):
|
|
25
|
+
"""
|
|
26
|
+
Inicializa o EntitiesDB.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
db_path: Path para o arquivo do banco
|
|
30
|
+
"""
|
|
31
|
+
self.db_path = db_path
|
|
32
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
self._init_schema()
|
|
34
|
+
|
|
35
|
+
def _connect(self) -> sqlite3.Connection:
|
|
36
|
+
"""Cria conexão com o banco"""
|
|
37
|
+
conn = sqlite3.connect(self.db_path)
|
|
38
|
+
conn.row_factory = sqlite3.Row
|
|
39
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
40
|
+
return conn
|
|
41
|
+
|
|
42
|
+
def _init_schema(self):
|
|
43
|
+
"""Cria schema do banco"""
|
|
44
|
+
conn = self._connect()
|
|
45
|
+
|
|
46
|
+
# Tabela de entidades
|
|
47
|
+
conn.execute("""
|
|
48
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
49
|
+
id TEXT PRIMARY KEY,
|
|
50
|
+
memory_id TEXT,
|
|
51
|
+
entity_name TEXT,
|
|
52
|
+
entity_type TEXT,
|
|
53
|
+
confidence REAL DEFAULT 1.0,
|
|
54
|
+
span_start INTEGER,
|
|
55
|
+
span_end INTEGER,
|
|
56
|
+
context_snippet TEXT,
|
|
57
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
58
|
+
FOREIGN KEY (memory_id) REFERENCES memories(id)
|
|
59
|
+
)
|
|
60
|
+
""")
|
|
61
|
+
|
|
62
|
+
# Índices para performance
|
|
63
|
+
conn.execute("""
|
|
64
|
+
CREATE INDEX IF NOT EXISTS idx_entities_name
|
|
65
|
+
ON entities(entity_name)
|
|
66
|
+
""")
|
|
67
|
+
|
|
68
|
+
conn.execute("""
|
|
69
|
+
CREATE INDEX IF NOT EXISTS idx_entities_type
|
|
70
|
+
ON entities(entity_type)
|
|
71
|
+
""")
|
|
72
|
+
|
|
73
|
+
conn.execute("""
|
|
74
|
+
CREATE INDEX IF NOT EXISTS idx_entities_memory
|
|
75
|
+
ON entities(memory_id)
|
|
76
|
+
""")
|
|
77
|
+
|
|
78
|
+
# Tabela de relacionamentos
|
|
79
|
+
conn.execute("""
|
|
80
|
+
CREATE TABLE IF NOT EXISTS entity_relationships (
|
|
81
|
+
id TEXT PRIMARY KEY,
|
|
82
|
+
source_entity TEXT,
|
|
83
|
+
target_entity TEXT,
|
|
84
|
+
relationship_type TEXT,
|
|
85
|
+
memory_id TEXT,
|
|
86
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
87
|
+
FOREIGN KEY (memory_id) REFERENCES memories(id)
|
|
88
|
+
)
|
|
89
|
+
""")
|
|
90
|
+
|
|
91
|
+
conn.execute("""
|
|
92
|
+
CREATE INDEX IF NOT EXISTS idx_relationships_source
|
|
93
|
+
ON entity_relationships(source_entity)
|
|
94
|
+
""")
|
|
95
|
+
|
|
96
|
+
conn.execute("""
|
|
97
|
+
CREATE INDEX IF NOT EXISTS idx_relationships_target
|
|
98
|
+
ON entity_relationships(target_entity)
|
|
99
|
+
""")
|
|
100
|
+
|
|
101
|
+
conn.commit()
|
|
102
|
+
conn.close()
|
|
103
|
+
|
|
104
|
+
# ========================================================================
|
|
105
|
+
# OPERAÇÕES DE ENTIDADES
|
|
106
|
+
# ========================================================================
|
|
107
|
+
|
|
108
|
+
def insert_entity(
|
|
109
|
+
self,
|
|
110
|
+
memory_id: str,
|
|
111
|
+
entity_name: str,
|
|
112
|
+
entity_type: str,
|
|
113
|
+
confidence: float = 1.0,
|
|
114
|
+
span_start: int = 0,
|
|
115
|
+
span_end: int = 0,
|
|
116
|
+
context_snippet: str = ""
|
|
117
|
+
) -> str:
|
|
118
|
+
"""
|
|
119
|
+
Insere uma entidade.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
memory_id: ID da memória de origem
|
|
123
|
+
entity_name: Nome da entidade
|
|
124
|
+
entity_type: Tipo (ORG, PERSON, TECH, etc)
|
|
125
|
+
confidence: Confiança da extração (0-1)
|
|
126
|
+
span_start: Posição inicial no texto
|
|
127
|
+
span_end: Posição final no texto
|
|
128
|
+
context_snippet: Contexto ao redor da entidade
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
ID da entidade
|
|
132
|
+
"""
|
|
133
|
+
entity_id = f"ent_{memory_id}_{entity_name.lower().replace(' ', '_')}"
|
|
134
|
+
|
|
135
|
+
conn = self._connect()
|
|
136
|
+
conn.execute("""
|
|
137
|
+
INSERT OR REPLACE INTO entities
|
|
138
|
+
(id, memory_id, entity_name, entity_type, confidence, span_start, span_end, context_snippet)
|
|
139
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
140
|
+
""", (
|
|
141
|
+
entity_id,
|
|
142
|
+
memory_id,
|
|
143
|
+
entity_name,
|
|
144
|
+
entity_type,
|
|
145
|
+
confidence,
|
|
146
|
+
span_start,
|
|
147
|
+
span_end,
|
|
148
|
+
context_snippet
|
|
149
|
+
))
|
|
150
|
+
conn.commit()
|
|
151
|
+
conn.close()
|
|
152
|
+
|
|
153
|
+
return entity_id
|
|
154
|
+
|
|
155
|
+
def get_entities_by_memory(self, memory_id: str) -> List[Dict[str, Any]]:
|
|
156
|
+
"""
|
|
157
|
+
Obtém entidades de uma memória.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
memory_id: ID da memória
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
Lista de entidades
|
|
164
|
+
"""
|
|
165
|
+
conn = self._connect()
|
|
166
|
+
cursor = conn.execute(
|
|
167
|
+
"SELECT * FROM entities WHERE memory_id = ?",
|
|
168
|
+
(memory_id,)
|
|
169
|
+
)
|
|
170
|
+
results = [dict(row) for row in cursor.fetchall()]
|
|
171
|
+
conn.close()
|
|
172
|
+
return results
|
|
173
|
+
|
|
174
|
+
def get_entities_by_name(self, entity_name: str) -> List[Dict[str, Any]]:
|
|
175
|
+
"""
|
|
176
|
+
Busca entidades por nome (case-insensitive).
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
entity_name: Nome da entidade
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Lista de entidades
|
|
183
|
+
"""
|
|
184
|
+
conn = self._connect()
|
|
185
|
+
cursor = conn.execute(
|
|
186
|
+
"SELECT * FROM entities WHERE LOWER(entity_name) = LOWER(?)",
|
|
187
|
+
(entity_name,)
|
|
188
|
+
)
|
|
189
|
+
results = [dict(row) for row in cursor.fetchall()]
|
|
190
|
+
conn.close()
|
|
191
|
+
return results
|
|
192
|
+
|
|
193
|
+
def delete_entities_by_memory(self, memory_id: str) -> int:
|
|
194
|
+
"""
|
|
195
|
+
Remove entidades de uma memória.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
memory_id: ID da memória
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Número de entidades removidas
|
|
202
|
+
"""
|
|
203
|
+
conn = self._connect()
|
|
204
|
+
cursor = conn.execute(
|
|
205
|
+
"DELETE FROM entities WHERE memory_id = ?",
|
|
206
|
+
(memory_id,)
|
|
207
|
+
)
|
|
208
|
+
deleted = cursor.rowcount
|
|
209
|
+
|
|
210
|
+
# Remove relacionamentos também
|
|
211
|
+
conn.execute(
|
|
212
|
+
"DELETE FROM entity_relationships WHERE memory_id = ?",
|
|
213
|
+
(memory_id,)
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
conn.commit()
|
|
217
|
+
conn.close()
|
|
218
|
+
return deleted
|
|
219
|
+
|
|
220
|
+
# ========================================================================
|
|
221
|
+
# OPERAÇÕES DE RELACIONAMENTOS
|
|
222
|
+
# ========================================================================
|
|
223
|
+
|
|
224
|
+
def insert_relationship(
|
|
225
|
+
self,
|
|
226
|
+
source_entity: str,
|
|
227
|
+
target_entity: str,
|
|
228
|
+
relationship_type: str,
|
|
229
|
+
memory_id: str
|
|
230
|
+
) -> str:
|
|
231
|
+
"""
|
|
232
|
+
Insere relacionamento entre entidades.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
source_entity: Nome da entidade origem
|
|
236
|
+
target_entity: Nome da entidade alvo
|
|
237
|
+
relationship_type: Tipo do relacionamento
|
|
238
|
+
memory_id: ID da memória de origem
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
ID do relacionamento
|
|
242
|
+
"""
|
|
243
|
+
rel_id = f"rel_{source_entity}_{target_entity}_{memory_id}"
|
|
244
|
+
|
|
245
|
+
conn = self._connect()
|
|
246
|
+
conn.execute("""
|
|
247
|
+
INSERT OR REPLACE INTO entity_relationships
|
|
248
|
+
(id, source_entity, target_entity, relationship_type, memory_id)
|
|
249
|
+
VALUES (?, ?, ?, ?, ?)
|
|
250
|
+
""", (rel_id, source_entity, target_entity, relationship_type, memory_id))
|
|
251
|
+
conn.commit()
|
|
252
|
+
conn.close()
|
|
253
|
+
|
|
254
|
+
return rel_id
|
|
255
|
+
|
|
256
|
+
def get_relationships(self, entity_name: str) -> List[Dict[str, Any]]:
|
|
257
|
+
"""
|
|
258
|
+
Obtém relacionamentos de uma entidade.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
entity_name: Nome da entidade
|
|
262
|
+
|
|
263
|
+
Returns:
|
|
264
|
+
Lista de relacionamentos (ida e volta)
|
|
265
|
+
"""
|
|
266
|
+
conn = self._connect()
|
|
267
|
+
|
|
268
|
+
# Relacionamentos onde é origem
|
|
269
|
+
cursor = conn.execute("""
|
|
270
|
+
SELECT 'outgoing' as direction, r.*, e.entity_type as target_type
|
|
271
|
+
FROM entity_relationships r
|
|
272
|
+
LEFT JOIN entities e ON LOWER(e.entity_name) = LOWER(r.target_entity)
|
|
273
|
+
WHERE LOWER(r.source_entity) = LOWER(?)
|
|
274
|
+
""", (entity_name,))
|
|
275
|
+
outgoing = [dict(row) for row in cursor.fetchall()]
|
|
276
|
+
|
|
277
|
+
# Relacionamentos onde é alvo
|
|
278
|
+
cursor = conn.execute("""
|
|
279
|
+
SELECT 'incoming' as direction, r.*, e.entity_type as source_type
|
|
280
|
+
FROM entity_relationships r
|
|
281
|
+
LEFT JOIN entities e ON LOWER(e.entity_name) = LOWER(r.source_entity)
|
|
282
|
+
WHERE LOWER(r.target_entity) = LOWER(?)
|
|
283
|
+
""", (entity_name,))
|
|
284
|
+
incoming = [dict(row) for row in cursor.fetchall()]
|
|
285
|
+
|
|
286
|
+
conn.close()
|
|
287
|
+
return outgoing + incoming
|
|
288
|
+
|
|
289
|
+
# ========================================================================
|
|
290
|
+
# TRAVESSIA DO GRAFO (BFS)
|
|
291
|
+
# ========================================================================
|
|
292
|
+
|
|
293
|
+
def traverse(
|
|
294
|
+
self,
|
|
295
|
+
start_entity: str,
|
|
296
|
+
depth: int = 2,
|
|
297
|
+
entity_types: Optional[List[str]] = None,
|
|
298
|
+
max_nodes: int = 50
|
|
299
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
300
|
+
"""
|
|
301
|
+
Faz traversal BFS a partir de uma entidade.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
start_entity: Nome da entidade inicial
|
|
305
|
+
depth: Profundidade máxima (1-3 recomendado)
|
|
306
|
+
entity_types: Filtrar por tipos de entidade
|
|
307
|
+
max_nodes: Limite máximo de nós retornados
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
Tupla (nodes, edges) para visualização do grafo
|
|
311
|
+
"""
|
|
312
|
+
nodes: Dict[str, Dict[str, Any]] = {}
|
|
313
|
+
edges: List[Dict[str, Any]] = []
|
|
314
|
+
|
|
315
|
+
# Fila BFS: (entidade, profundidade)
|
|
316
|
+
queue: deque = deque([(start_entity, 0)])
|
|
317
|
+
visited: Set[str] = set()
|
|
318
|
+
|
|
319
|
+
while queue and len(nodes) < max_nodes:
|
|
320
|
+
entity_name, current_depth = queue.popleft()
|
|
321
|
+
|
|
322
|
+
if entity_name in visited:
|
|
323
|
+
continue
|
|
324
|
+
visited.add(entity_name)
|
|
325
|
+
|
|
326
|
+
# Busca entidade
|
|
327
|
+
entities = self.get_entities_by_name(entity_name)
|
|
328
|
+
if not entities:
|
|
329
|
+
continue
|
|
330
|
+
|
|
331
|
+
entity = entities[0] # Pega primeiro match
|
|
332
|
+
|
|
333
|
+
# Filtra por tipo se especificado
|
|
334
|
+
if entity_types and entity["entity_type"] not in entity_types:
|
|
335
|
+
continue
|
|
336
|
+
|
|
337
|
+
# Adiciona nó
|
|
338
|
+
node_key = entity["entity_name"].lower()
|
|
339
|
+
if node_key not in nodes:
|
|
340
|
+
nodes[node_key] = {
|
|
341
|
+
"name": entity["entity_name"],
|
|
342
|
+
"type": entity["entity_type"],
|
|
343
|
+
"depth": current_depth,
|
|
344
|
+
"memory_count": 1
|
|
345
|
+
}
|
|
346
|
+
else:
|
|
347
|
+
nodes[node_key]["memory_count"] += 1
|
|
348
|
+
|
|
349
|
+
# Se atingiu profundidade máxima, não expande
|
|
350
|
+
if current_depth >= depth:
|
|
351
|
+
continue
|
|
352
|
+
|
|
353
|
+
# Busca relacionamentos
|
|
354
|
+
relationships = self.get_relationships(entity_name)
|
|
355
|
+
|
|
356
|
+
for rel in relationships:
|
|
357
|
+
if rel["direction"] == "outgoing":
|
|
358
|
+
target = rel["target_entity"]
|
|
359
|
+
source = rel["source_entity"]
|
|
360
|
+
else:
|
|
361
|
+
target = rel["target_entity"]
|
|
362
|
+
source = rel["source_entity"]
|
|
363
|
+
|
|
364
|
+
# Adiciona aresta
|
|
365
|
+
edge_key = f"{source.lower()}_{target.lower()}"
|
|
366
|
+
if not any(e.get("key") == edge_key for e in edges):
|
|
367
|
+
edges.append({
|
|
368
|
+
"key": edge_key,
|
|
369
|
+
"source": source,
|
|
370
|
+
"target": target,
|
|
371
|
+
"type": rel["relationship_type"],
|
|
372
|
+
"memory_id": rel.get("memory_id")
|
|
373
|
+
})
|
|
374
|
+
|
|
375
|
+
# Adiciona próximo nó na fila
|
|
376
|
+
next_entity = target if rel["direction"] == "outgoing" else source
|
|
377
|
+
if next_entity not in visited:
|
|
378
|
+
queue.append((next_entity, current_depth + 1))
|
|
379
|
+
|
|
380
|
+
return list(nodes.values()), edges
|
|
381
|
+
|
|
382
|
+
# ========================================================================
|
|
383
|
+
# BUSCA POR ENTIDADES (integra com QueryEngine)
|
|
384
|
+
# ========================================================================
|
|
385
|
+
|
|
386
|
+
def search_by_query(self, query: str, limit: int = 20) -> List[Dict[str, Any]]:
|
|
387
|
+
"""
|
|
388
|
+
Busca memórias por entidades relacionadas à query.
|
|
389
|
+
|
|
390
|
+
Extrai entidades da query e retorna memórias conectadas.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
query: Texto de busca
|
|
394
|
+
limit: Limite de resultados
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Lista de memórias com score de grafo
|
|
398
|
+
"""
|
|
399
|
+
# Tenta extrair entidades da query (palavras-chave)
|
|
400
|
+
query_entities = self._extract_query_entities(query)
|
|
401
|
+
|
|
402
|
+
if not query_entities:
|
|
403
|
+
return []
|
|
404
|
+
|
|
405
|
+
# Busca memórias conectadas às entidades
|
|
406
|
+
conn = self._connect()
|
|
407
|
+
|
|
408
|
+
results = {}
|
|
409
|
+
for entity_name in query_entities:
|
|
410
|
+
cursor = conn.execute("""
|
|
411
|
+
SELECT DISTINCT e.memory_id, e.entity_name, e.entity_type,
|
|
412
|
+
COUNT(*) as entity_count
|
|
413
|
+
FROM entities e
|
|
414
|
+
WHERE LOWER(e.entity_name) = LOWER(?)
|
|
415
|
+
GROUP BY e.memory_id
|
|
416
|
+
ORDER BY entity_count DESC
|
|
417
|
+
LIMIT ?
|
|
418
|
+
""", (entity_name, limit))
|
|
419
|
+
|
|
420
|
+
for row in cursor.fetchall():
|
|
421
|
+
memory_id = row["memory_id"]
|
|
422
|
+
if memory_id not in results:
|
|
423
|
+
results[memory_id] = {
|
|
424
|
+
"memory_id": memory_id,
|
|
425
|
+
"matched_entities": [],
|
|
426
|
+
"score": 0.0
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
results[memory_id]["matched_entities"].append({
|
|
430
|
+
"name": row["entity_name"],
|
|
431
|
+
"type": row["entity_type"]
|
|
432
|
+
})
|
|
433
|
+
results[memory_id]["score"] += 0.5 # Score base por entidade
|
|
434
|
+
|
|
435
|
+
conn.close()
|
|
436
|
+
|
|
437
|
+
# Normaliza scores
|
|
438
|
+
if results:
|
|
439
|
+
max_score = max(r["score"] for r in results.values())
|
|
440
|
+
for r in results.values():
|
|
441
|
+
r["score"] = r["score"] / max_score if max_score > 0 else 0
|
|
442
|
+
|
|
443
|
+
return list(results.values())
|
|
444
|
+
|
|
445
|
+
def _extract_query_entities(self, query: str) -> List[str]:
|
|
446
|
+
"""
|
|
447
|
+
Extrai possíveis entidades de uma query.
|
|
448
|
+
|
|
449
|
+
Usa heurísticas simples (sem spaCy para evitar dependência aqui):
|
|
450
|
+
- Palavras capitalizadas
|
|
451
|
+
- Termos entre aspas
|
|
452
|
+
- Acrônimos
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
query: Texto de busca
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
Lista de nomes de entidades candidatas
|
|
459
|
+
"""
|
|
460
|
+
entities = set()
|
|
461
|
+
|
|
462
|
+
# Termos entre aspas
|
|
463
|
+
quoted = re.findall(r'"([^"]+)"', query)
|
|
464
|
+
entities.update(quoted)
|
|
465
|
+
|
|
466
|
+
# Palavras capitalizadas (prováveis nomes próprios)
|
|
467
|
+
capitalized = re.findall(r'\b[A-Z][a-zA-Z]*\b', query)
|
|
468
|
+
entities.update(capitalized)
|
|
469
|
+
|
|
470
|
+
# Acrônimos
|
|
471
|
+
acronyms = re.findall(r'\b[A-Z]{2,}\b', query)
|
|
472
|
+
entities.update(acronyms)
|
|
473
|
+
|
|
474
|
+
# Remove stop words e termos muito curtos
|
|
475
|
+
stop_words = {"A", "O", "Os", "As", "Um", "Uma", "Em", "De", "Do", "Da", "Com", "Por", "Para"}
|
|
476
|
+
entities = {e for e in entities if e not in stop_words and len(e) > 2}
|
|
477
|
+
|
|
478
|
+
return list(entities)
|
|
479
|
+
|
|
480
|
+
# ========================================================================
|
|
481
|
+
# MÉTODOS DE INTEGRAÇÃO COM FRONTMATTER
|
|
482
|
+
# ========================================================================
|
|
483
|
+
|
|
484
|
+
def extract_from_frontmatter(
|
|
485
|
+
self,
|
|
486
|
+
memory_id: str,
|
|
487
|
+
frontmatter: Dict[str, Any],
|
|
488
|
+
project: str
|
|
489
|
+
) -> List[str]:
|
|
490
|
+
"""
|
|
491
|
+
Extrai entidades do frontmatter de uma memória.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
memory_id: ID da memória
|
|
495
|
+
frontmatter: Dicionário com metadados
|
|
496
|
+
project: Nome do projeto
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
Lista de IDs de entidades criadas
|
|
500
|
+
"""
|
|
501
|
+
entity_ids = []
|
|
502
|
+
|
|
503
|
+
# Type como entidade
|
|
504
|
+
if "type" in frontmatter:
|
|
505
|
+
eid = self.insert_entity(
|
|
506
|
+
memory_id,
|
|
507
|
+
f"TYPE:{frontmatter['type']}",
|
|
508
|
+
"META",
|
|
509
|
+
confidence=1.0
|
|
510
|
+
)
|
|
511
|
+
entity_ids.append(eid)
|
|
512
|
+
|
|
513
|
+
# Project como entidade
|
|
514
|
+
if project:
|
|
515
|
+
eid = self.insert_entity(
|
|
516
|
+
memory_id,
|
|
517
|
+
project,
|
|
518
|
+
"PROJECT",
|
|
519
|
+
confidence=1.0
|
|
520
|
+
)
|
|
521
|
+
entity_ids.append(eid)
|
|
522
|
+
|
|
523
|
+
# Tags como entidades
|
|
524
|
+
if "tags" in frontmatter:
|
|
525
|
+
tags = frontmatter.get("tags", "")
|
|
526
|
+
if isinstance(tags, str):
|
|
527
|
+
for tag in [t.strip() for t in tags.split(",") if t.strip()]:
|
|
528
|
+
eid = self.insert_entity(
|
|
529
|
+
memory_id,
|
|
530
|
+
f"TAG:{tag}",
|
|
531
|
+
"TAG",
|
|
532
|
+
confidence=1.0
|
|
533
|
+
)
|
|
534
|
+
entity_ids.append(eid)
|
|
535
|
+
elif isinstance(tags, list):
|
|
536
|
+
for tag in tags:
|
|
537
|
+
eid = self.insert_entity(
|
|
538
|
+
memory_id,
|
|
539
|
+
f"TAG:{tag}",
|
|
540
|
+
"TAG",
|
|
541
|
+
confidence=1.0
|
|
542
|
+
)
|
|
543
|
+
entity_ids.append(eid)
|
|
544
|
+
|
|
545
|
+
return entity_ids
|
|
546
|
+
|
|
547
|
+
# ========================================================================
|
|
548
|
+
# ESTATÍSTICAS
|
|
549
|
+
# ========================================================================
|
|
550
|
+
|
|
551
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
552
|
+
"""
|
|
553
|
+
Obtém estatísticas do grafo.
|
|
554
|
+
|
|
555
|
+
Returns:
|
|
556
|
+
Dicionário com estatísticas
|
|
557
|
+
"""
|
|
558
|
+
conn = self._connect()
|
|
559
|
+
|
|
560
|
+
total_entities = conn.execute(
|
|
561
|
+
"SELECT COUNT(*) FROM entities"
|
|
562
|
+
).fetchone()[0]
|
|
563
|
+
|
|
564
|
+
total_relationships = conn.execute(
|
|
565
|
+
"SELECT COUNT(*) FROM entity_relationships"
|
|
566
|
+
).fetchone()[0]
|
|
567
|
+
|
|
568
|
+
by_type = conn.execute(
|
|
569
|
+
"SELECT entity_type, COUNT(*) FROM entities GROUP BY entity_type"
|
|
570
|
+
).fetchall()
|
|
571
|
+
|
|
572
|
+
conn.close()
|
|
573
|
+
|
|
574
|
+
return {
|
|
575
|
+
"total_entities": total_entities,
|
|
576
|
+
"total_relationships": total_relationships,
|
|
577
|
+
"by_type": dict(by_type),
|
|
578
|
+
"avg_relationships_per_entity": (
|
|
579
|
+
total_relationships / total_entities if total_entities > 0 else 0
|
|
580
|
+
)
|
|
581
|
+
}
|
package/src/index/queries.py
CHANGED
|
@@ -6,6 +6,7 @@ from dataclasses import dataclass
|
|
|
6
6
|
|
|
7
7
|
from .metadata_db import MetadataDB
|
|
8
8
|
from .embeddings_db import EmbeddingsDB
|
|
9
|
+
from .entities_db import EntitiesDB
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
@dataclass
|
|
@@ -16,7 +17,7 @@ class QueryResult:
|
|
|
16
17
|
project: str
|
|
17
18
|
title: str
|
|
18
19
|
score: float
|
|
19
|
-
source: str # 'fts', 'semantic', 'metadata'
|
|
20
|
+
source: str # 'fts', 'semantic', 'metadata', 'graph'
|
|
20
21
|
metadata: Dict[str, Any] = None
|
|
21
22
|
|
|
22
23
|
|
|
@@ -24,16 +25,18 @@ class QueryEngine:
|
|
|
24
25
|
"""
|
|
25
26
|
Engine de consultas híbridas.
|
|
26
27
|
|
|
27
|
-
Combina
|
|
28
|
+
Combina quatro tipos de busca:
|
|
28
29
|
- Metadata: filtros estruturados (projeto, tipo, tags)
|
|
29
30
|
- FTS: busca full-text no conteúdo
|
|
30
31
|
- Semantic: busca por similaridade de embeddings
|
|
32
|
+
- Graph: busca por entidades e relacionamentos
|
|
31
33
|
"""
|
|
32
34
|
|
|
33
35
|
def __init__(
|
|
34
36
|
self,
|
|
35
37
|
metadata_db: MetadataDB,
|
|
36
|
-
embeddings_db: EmbeddingsDB
|
|
38
|
+
embeddings_db: EmbeddingsDB,
|
|
39
|
+
entities_db: Optional[EntitiesDB] = None
|
|
37
40
|
):
|
|
38
41
|
"""
|
|
39
42
|
Inicializa o QueryEngine.
|
|
@@ -41,9 +44,11 @@ class QueryEngine:
|
|
|
41
44
|
Args:
|
|
42
45
|
metadata_db: Instância do MetadataDB
|
|
43
46
|
embeddings_db: Instância do EmbeddingsDB
|
|
47
|
+
entities_db: Instância do EntitiesDB (opcional)
|
|
44
48
|
"""
|
|
45
49
|
self.metadata_db = metadata_db
|
|
46
50
|
self.embeddings_db = embeddings_db
|
|
51
|
+
self.entities_db = entities_db
|
|
47
52
|
|
|
48
53
|
def search(
|
|
49
54
|
self,
|
|
@@ -53,11 +58,13 @@ class QueryEngine:
|
|
|
53
58
|
limit: int = 10,
|
|
54
59
|
use_fts: bool = True,
|
|
55
60
|
use_semantic: bool = True,
|
|
56
|
-
|
|
57
|
-
|
|
61
|
+
use_graph: bool = True,
|
|
62
|
+
fts_weight: float = 0.3,
|
|
63
|
+
semantic_weight: float = 0.5,
|
|
64
|
+
graph_weight: float = 0.2
|
|
58
65
|
) -> List[QueryResult]:
|
|
59
66
|
"""
|
|
60
|
-
Busca híbrida combinando FTS e
|
|
67
|
+
Busca híbrida combinando FTS, semantic e graph.
|
|
61
68
|
|
|
62
69
|
Args:
|
|
63
70
|
query: Texto de busca
|
|
@@ -66,8 +73,10 @@ class QueryEngine:
|
|
|
66
73
|
limit: Limite de resultados
|
|
67
74
|
use_fts: Usar busca FTS
|
|
68
75
|
use_semantic: Usar busca semantic
|
|
76
|
+
use_graph: Usar busca por graph
|
|
69
77
|
fts_weight: Peso da busca FTS
|
|
70
78
|
semantic_weight: Peso da busca semantic
|
|
79
|
+
graph_weight: Peso da busca graph
|
|
71
80
|
|
|
72
81
|
Returns:
|
|
73
82
|
Lista de resultados ordenados por relevância
|
|
@@ -104,6 +113,30 @@ class QueryEngine:
|
|
|
104
113
|
r.score *= semantic_weight
|
|
105
114
|
results[r.memory_id] = r
|
|
106
115
|
|
|
116
|
+
# Busca por Graph (entidades)
|
|
117
|
+
if use_graph and self.entities_db:
|
|
118
|
+
graph_results = self._search_by_graph(query, limit)
|
|
119
|
+
for r in graph_results:
|
|
120
|
+
if r.memory_id in results:
|
|
121
|
+
# Combina scores
|
|
122
|
+
existing = results[r.memory_id]
|
|
123
|
+
combined_score = (
|
|
124
|
+
existing.score * (1 - graph_weight) +
|
|
125
|
+
r.score * graph_weight
|
|
126
|
+
)
|
|
127
|
+
results[r.memory_id] = QueryResult(
|
|
128
|
+
memory_id=r.memory_id,
|
|
129
|
+
type=r.type,
|
|
130
|
+
project=r.project,
|
|
131
|
+
title=r.title,
|
|
132
|
+
score=combined_score,
|
|
133
|
+
source="hybrid",
|
|
134
|
+
metadata=r.metadata
|
|
135
|
+
)
|
|
136
|
+
else:
|
|
137
|
+
r.score *= graph_weight
|
|
138
|
+
results[r.memory_id] = r
|
|
139
|
+
|
|
107
140
|
# Filtra por tipo se especificado
|
|
108
141
|
if mem_type:
|
|
109
142
|
results = {
|
|
@@ -198,6 +231,48 @@ class QueryEngine:
|
|
|
198
231
|
|
|
199
232
|
return results
|
|
200
233
|
|
|
234
|
+
def _search_by_graph(
|
|
235
|
+
self,
|
|
236
|
+
query: str,
|
|
237
|
+
limit: int
|
|
238
|
+
) -> List[QueryResult]:
|
|
239
|
+
"""
|
|
240
|
+
Busca por entidades no grafo.
|
|
241
|
+
|
|
242
|
+
Extrai entidades da query e retorna memórias conectadas.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
query: Texto de busca
|
|
246
|
+
limit: Limite de resultados
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Lista de resultados com score de grafo
|
|
250
|
+
"""
|
|
251
|
+
if not self.entities_db:
|
|
252
|
+
return []
|
|
253
|
+
|
|
254
|
+
graph_results = self.entities_db.search_by_query(query, limit * 2)
|
|
255
|
+
|
|
256
|
+
results = []
|
|
257
|
+
for item in graph_results:
|
|
258
|
+
# Busca metadados adicionais
|
|
259
|
+
memory = self.metadata_db.get_by_id(item["memory_id"])
|
|
260
|
+
|
|
261
|
+
results.append(QueryResult(
|
|
262
|
+
memory_id=item["memory_id"],
|
|
263
|
+
type=memory.get("type", "unknown") if memory else "unknown",
|
|
264
|
+
project=memory.get("project", "unknown") if memory else "unknown",
|
|
265
|
+
title=memory.get("title", item["memory_id"]) if memory else item["memory_id"],
|
|
266
|
+
score=item["score"],
|
|
267
|
+
source="graph",
|
|
268
|
+
metadata={
|
|
269
|
+
"matched_entities": item.get("matched_entities", []),
|
|
270
|
+
"graph_score": item["score"]
|
|
271
|
+
}
|
|
272
|
+
))
|
|
273
|
+
|
|
274
|
+
return results
|
|
275
|
+
|
|
201
276
|
def search_by_metadata(
|
|
202
277
|
self,
|
|
203
278
|
project: Optional[str] = None,
|
package/src/mcp/server.py
CHANGED
|
@@ -26,6 +26,7 @@ from src.consolidation.extractor import Extractor
|
|
|
26
26
|
from src.consolidation.promoter import Promoter
|
|
27
27
|
from src.index.metadata_db import MetadataDB
|
|
28
28
|
from src.index.embeddings_db import EmbeddingsDB
|
|
29
|
+
from src.index.entities_db import EntitiesDB
|
|
29
30
|
from src.index.queries import QueryEngine
|
|
30
31
|
from src.hooks.custom_loader import HooksLoader, HookRunner
|
|
31
32
|
from src.diff.memory_diff import MemoryDiff
|
|
@@ -81,10 +82,15 @@ class CerebroMCP:
|
|
|
81
82
|
|
|
82
83
|
self.metadata_db = MetadataDB(self.cerebro_path / "index" / "metadata.db")
|
|
83
84
|
self.embeddings_db = EmbeddingsDB(self.cerebro_path / "index" / "embeddings.db")
|
|
84
|
-
self.
|
|
85
|
+
self.entities_db = EntitiesDB(self.cerebro_path / "index" / "entities.db")
|
|
86
|
+
self.query_engine = QueryEngine(self.metadata_db, self.embeddings_db, self.entities_db)
|
|
85
87
|
|
|
86
88
|
self.extractor = Extractor(self.raw_storage, self.working_storage)
|
|
87
|
-
self.promoter = Promoter(
|
|
89
|
+
self.promoter = Promoter(
|
|
90
|
+
self.working_storage,
|
|
91
|
+
self.official_storage,
|
|
92
|
+
self.cerebro_path / "index" / "entities.db"
|
|
93
|
+
)
|
|
88
94
|
|
|
89
95
|
self.memory_view = MemoryView(
|
|
90
96
|
self.cerebro_path,
|
|
@@ -340,6 +346,31 @@ class CerebroMCP:
|
|
|
340
346
|
}
|
|
341
347
|
}
|
|
342
348
|
}
|
|
349
|
+
),
|
|
350
|
+
Tool(
|
|
351
|
+
name="cerebro_graph",
|
|
352
|
+
description="Explora grafo de entidades - mostra conexões entre projetos, tecnologias, pessoas e decisões",
|
|
353
|
+
inputSchema={
|
|
354
|
+
"type": "object",
|
|
355
|
+
"properties": {
|
|
356
|
+
"entity": {
|
|
357
|
+
"type": "string",
|
|
358
|
+
"description": "Nome da entidade para iniciar traversal (ex: 'MedicsPro', 'JWT', 'autenticação')"
|
|
359
|
+
},
|
|
360
|
+
"depth": {
|
|
361
|
+
"type": "integer",
|
|
362
|
+
"description": "Profundidade máxima do traversal (1-3, padrão: 2)",
|
|
363
|
+
"default": 2
|
|
364
|
+
},
|
|
365
|
+
"types": {
|
|
366
|
+
"type": "array",
|
|
367
|
+
"items": {"type": "string"},
|
|
368
|
+
"description": "Filtrar por tipos de entidade (ex: ['ORG', 'TECH'])",
|
|
369
|
+
"default": ["ORG", "TECH", "PERSON", "PROJECT"]
|
|
370
|
+
}
|
|
371
|
+
},
|
|
372
|
+
"required": ["entity"]
|
|
373
|
+
}
|
|
343
374
|
)
|
|
344
375
|
]
|
|
345
376
|
|
|
@@ -377,6 +408,8 @@ class CerebroMCP:
|
|
|
377
408
|
result = self._gc(arguments)
|
|
378
409
|
elif name == "cerebro_capture_memory":
|
|
379
410
|
result = self._capture_memory(arguments)
|
|
411
|
+
elif name == "cerebro_graph":
|
|
412
|
+
result = self._cerebro_graph(arguments)
|
|
380
413
|
else:
|
|
381
414
|
return [TextContent(type="text", text=f"Ferramenta desconhecida: {name}")]
|
|
382
415
|
|
|
@@ -717,6 +750,86 @@ Uma chamada por memória. O sistema salva e indexa automaticamente.
|
|
|
717
750
|
)
|
|
718
751
|
return gc.generate_gc_report(results)
|
|
719
752
|
|
|
753
|
+
def _cerebro_graph(self, args: Dict[str, Any]) -> str:
|
|
754
|
+
"""Explora grafo de entidades"""
|
|
755
|
+
entity = args.get("entity")
|
|
756
|
+
if not entity:
|
|
757
|
+
return "Erro: 'entity' é obrigatório para cerebro_graph"
|
|
758
|
+
|
|
759
|
+
depth = args.get("depth", 2)
|
|
760
|
+
entity_types = args.get("types", ["ORG", "TECH", "PERSON", "PROJECT"])
|
|
761
|
+
|
|
762
|
+
# Limita profundidade máxima para evitar traversal muito grande
|
|
763
|
+
depth = min(depth, 3)
|
|
764
|
+
|
|
765
|
+
nodes, edges = self.entities_db.traverse(
|
|
766
|
+
start_entity=entity,
|
|
767
|
+
depth=depth,
|
|
768
|
+
entity_types=entity_types,
|
|
769
|
+
max_nodes=50
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
if not nodes:
|
|
773
|
+
return f"Nenhuma entidade encontrada para '{entity}'"
|
|
774
|
+
|
|
775
|
+
# Formata grafo como árvore
|
|
776
|
+
return self._format_graph(nodes, edges, entity)
|
|
777
|
+
|
|
778
|
+
def _format_graph(
|
|
779
|
+
self,
|
|
780
|
+
nodes: List[Dict[str, Any]],
|
|
781
|
+
edges: List[Dict[str, Any]],
|
|
782
|
+
root_entity: str
|
|
783
|
+
) -> str:
|
|
784
|
+
"""Formata grafo como árvore visual"""
|
|
785
|
+
lines = [f"## Grafo de '{root_entity}'\n"]
|
|
786
|
+
lines.append(f"**{len(nodes)}** entidades encontradas, **{len(edges)}** conexões\n")
|
|
787
|
+
|
|
788
|
+
# Constroi adjacency list
|
|
789
|
+
adj: Dict[str, List[Dict[str, Any]]] = {}
|
|
790
|
+
for edge in edges:
|
|
791
|
+
source = edge["source"].lower()
|
|
792
|
+
if source not in adj:
|
|
793
|
+
adj[source] = []
|
|
794
|
+
adj[source].append(edge)
|
|
795
|
+
|
|
796
|
+
# BFS para imprimir árvore
|
|
797
|
+
visited = set()
|
|
798
|
+
queue = [(root_entity.lower(), 0)]
|
|
799
|
+
|
|
800
|
+
while queue:
|
|
801
|
+
entity_name, depth = queue.pop(0)
|
|
802
|
+
|
|
803
|
+
if entity_name in visited:
|
|
804
|
+
continue
|
|
805
|
+
visited.add(entity_name)
|
|
806
|
+
|
|
807
|
+
# Encontra nó correspondente
|
|
808
|
+
node = next((n for n in nodes if n["name"].lower() == entity_name), None)
|
|
809
|
+
if not node:
|
|
810
|
+
continue
|
|
811
|
+
|
|
812
|
+
# Imprime nó
|
|
813
|
+
prefix = " " * depth
|
|
814
|
+
connector = "├─ " if depth > 0 else ""
|
|
815
|
+
lines.append(f"{prefix}{connector}{node['name']} ({node['type']})")
|
|
816
|
+
|
|
817
|
+
# Adiciona filhos na fila
|
|
818
|
+
if depth < 3:
|
|
819
|
+
children = adj.get(entity_name, [])
|
|
820
|
+
for child in children:
|
|
821
|
+
child_name = child["target"].lower()
|
|
822
|
+
if child_name not in visited:
|
|
823
|
+
queue.append((child_name, depth + 1))
|
|
824
|
+
|
|
825
|
+
# Lista todas as arestas
|
|
826
|
+
if edges:
|
|
827
|
+
lines.append("\n## Conexões")
|
|
828
|
+
for edge in edges:
|
|
829
|
+
lines.append(f"- {edge['source']} → {edge['target']} ({edge['type']})")
|
|
830
|
+
|
|
831
|
+
return "\n".join(lines)
|
|
832
|
+
|
|
720
833
|
|
|
721
834
|
async def main():
|
|
722
835
|
"""Entry point do MCP Server"""
|