ocerebro 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cerebro/index/embeddings.db +0 -0
- package/cerebro/index/entities.db +0 -0
- package/cerebro/index/metadata.db +0 -0
- package/package.json +1 -1
- package/pyproject.toml +2 -1
- package/src/consolidation/promoter.py +28 -2
- package/src/index/entities_db.py +283 -8
- package/src/index/queries.py +5 -10
- package/src/mcp/server.py +33 -1
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
package/pyproject.toml
CHANGED
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ocerebro"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.2"
|
|
8
8
|
description = "OCerebro - Sistema de Memoria para Agentes (Claude Code/MCP)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -37,6 +37,7 @@ dependencies = [
|
|
|
37
37
|
"sentence-transformers>=2.2.0",
|
|
38
38
|
"mcp>=1.0.0",
|
|
39
39
|
"anthropic>=0.40.0",
|
|
40
|
+
"spacy>=3.5.0",
|
|
40
41
|
]
|
|
41
42
|
|
|
42
43
|
[project.optional-dependencies]
|
|
@@ -227,8 +227,9 @@ class Promoter:
|
|
|
227
227
|
content=content
|
|
228
228
|
)
|
|
229
229
|
|
|
230
|
-
# Extrai entidades do frontmatter
|
|
230
|
+
# Extrai entidades do frontmatter E do conteúdo
|
|
231
231
|
self._extract_entities_from_frontmatter(draft_id, frontmatter, project)
|
|
232
|
+
self._extract_entities_from_content(draft_id, content)
|
|
232
233
|
|
|
233
234
|
return PromotionResult(
|
|
234
235
|
success=True,
|
|
@@ -307,8 +308,9 @@ class Promoter:
|
|
|
307
308
|
content=body
|
|
308
309
|
)
|
|
309
310
|
|
|
310
|
-
# Extrai entidades do frontmatter
|
|
311
|
+
# Extrai entidades do frontmatter E do conteúdo
|
|
311
312
|
self._extract_entities_from_frontmatter(draft_id, frontmatter, project)
|
|
313
|
+
self._extract_entities_from_content(draft_id, body)
|
|
312
314
|
|
|
313
315
|
return PromotionResult(
|
|
314
316
|
success=True,
|
|
@@ -477,3 +479,27 @@ class Promoter:
|
|
|
477
479
|
frontmatter=frontmatter,
|
|
478
480
|
project=project
|
|
479
481
|
)
|
|
482
|
+
|
|
483
|
+
def _extract_entities_from_content(
|
|
484
|
+
self,
|
|
485
|
+
memory_id: str,
|
|
486
|
+
content: str
|
|
487
|
+
) -> List[str]:
|
|
488
|
+
"""
|
|
489
|
+
Extrai entidades do conteúdo usando spaCy NER e registra no grafo.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
memory_id: ID da memória
|
|
493
|
+
content: Conteúdo de texto
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
Lista de IDs de entidades criadas
|
|
497
|
+
"""
|
|
498
|
+
if not self.entities_db:
|
|
499
|
+
return []
|
|
500
|
+
|
|
501
|
+
return self.entities_db.extract_from_content(
|
|
502
|
+
memory_id=memory_id,
|
|
503
|
+
content=content,
|
|
504
|
+
use_spacy=True
|
|
505
|
+
)
|
package/src/index/entities_db.py
CHANGED
|
@@ -50,6 +50,7 @@ class EntitiesDB:
|
|
|
50
50
|
memory_id TEXT,
|
|
51
51
|
entity_name TEXT,
|
|
52
52
|
entity_type TEXT,
|
|
53
|
+
source TEXT DEFAULT 'content',
|
|
53
54
|
confidence REAL DEFAULT 1.0,
|
|
54
55
|
span_start INTEGER,
|
|
55
56
|
span_end INTEGER,
|
|
@@ -75,6 +76,26 @@ class EntitiesDB:
|
|
|
75
76
|
ON entities(memory_id)
|
|
76
77
|
""")
|
|
77
78
|
|
|
79
|
+
conn.execute("""
|
|
80
|
+
CREATE INDEX IF NOT EXISTS idx_entities_source
|
|
81
|
+
ON entities(source)
|
|
82
|
+
""")
|
|
83
|
+
|
|
84
|
+
# Tabela de cache de hash (para evitar reprocessamento)
|
|
85
|
+
conn.execute("""
|
|
86
|
+
CREATE TABLE IF NOT EXISTS entity_cache (
|
|
87
|
+
memory_id TEXT PRIMARY KEY,
|
|
88
|
+
content_hash TEXT,
|
|
89
|
+
processed_at TEXT DEFAULT (datetime('now')),
|
|
90
|
+
entity_count INTEGER DEFAULT 0
|
|
91
|
+
)
|
|
92
|
+
""")
|
|
93
|
+
|
|
94
|
+
conn.execute("""
|
|
95
|
+
CREATE INDEX IF NOT EXISTS idx_entity_cache_hash
|
|
96
|
+
ON entity_cache(content_hash)
|
|
97
|
+
""")
|
|
98
|
+
|
|
78
99
|
# Tabela de relacionamentos
|
|
79
100
|
conn.execute("""
|
|
80
101
|
CREATE TABLE IF NOT EXISTS entity_relationships (
|
|
@@ -113,7 +134,8 @@ class EntitiesDB:
|
|
|
113
134
|
confidence: float = 1.0,
|
|
114
135
|
span_start: int = 0,
|
|
115
136
|
span_end: int = 0,
|
|
116
|
-
context_snippet: str = ""
|
|
137
|
+
context_snippet: str = "",
|
|
138
|
+
source: str = "content"
|
|
117
139
|
) -> str:
|
|
118
140
|
"""
|
|
119
141
|
Insere uma entidade.
|
|
@@ -126,6 +148,7 @@ class EntitiesDB:
|
|
|
126
148
|
span_start: Posição inicial no texto
|
|
127
149
|
span_end: Posição final no texto
|
|
128
150
|
context_snippet: Contexto ao redor da entidade
|
|
151
|
+
source: Origem da entidade ('frontmatter' ou 'content')
|
|
129
152
|
|
|
130
153
|
Returns:
|
|
131
154
|
ID da entidade
|
|
@@ -135,13 +158,14 @@ class EntitiesDB:
|
|
|
135
158
|
conn = self._connect()
|
|
136
159
|
conn.execute("""
|
|
137
160
|
INSERT OR REPLACE INTO entities
|
|
138
|
-
(id, memory_id, entity_name, entity_type, confidence, span_start, span_end, context_snippet)
|
|
139
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
161
|
+
(id, memory_id, entity_name, entity_type, source, confidence, span_start, span_end, context_snippet)
|
|
162
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
140
163
|
""", (
|
|
141
164
|
entity_id,
|
|
142
165
|
memory_id,
|
|
143
166
|
entity_name,
|
|
144
167
|
entity_type,
|
|
168
|
+
source,
|
|
145
169
|
confidence,
|
|
146
170
|
span_start,
|
|
147
171
|
span_end,
|
|
@@ -192,7 +216,7 @@ class EntitiesDB:
|
|
|
192
216
|
|
|
193
217
|
def delete_entities_by_memory(self, memory_id: str) -> int:
|
|
194
218
|
"""
|
|
195
|
-
Remove entidades de uma memória.
|
|
219
|
+
Remove todas as entidades de uma memória.
|
|
196
220
|
|
|
197
221
|
Args:
|
|
198
222
|
memory_id: ID da memória
|
|
@@ -217,6 +241,27 @@ class EntitiesDB:
|
|
|
217
241
|
conn.close()
|
|
218
242
|
return deleted
|
|
219
243
|
|
|
244
|
+
def delete_entities_by_source(self, memory_id: str, source: str) -> int:
|
|
245
|
+
"""
|
|
246
|
+
Remove entidades de uma memória por fonte (frontmatter ou content).
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
memory_id: ID da memória
|
|
250
|
+
source: Fonte das entidades ('frontmatter' ou 'content')
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
Número de entidades removidas
|
|
254
|
+
"""
|
|
255
|
+
conn = self._connect()
|
|
256
|
+
cursor = conn.execute(
|
|
257
|
+
"DELETE FROM entities WHERE memory_id = ? AND source = ?",
|
|
258
|
+
(memory_id, source)
|
|
259
|
+
)
|
|
260
|
+
deleted = cursor.rowcount
|
|
261
|
+
conn.commit()
|
|
262
|
+
conn.close()
|
|
263
|
+
return deleted
|
|
264
|
+
|
|
220
265
|
# ========================================================================
|
|
221
266
|
# OPERAÇÕES DE RELACIONAMENTOS
|
|
222
267
|
# ========================================================================
|
|
@@ -506,7 +551,8 @@ class EntitiesDB:
|
|
|
506
551
|
memory_id,
|
|
507
552
|
f"TYPE:{frontmatter['type']}",
|
|
508
553
|
"META",
|
|
509
|
-
confidence=1.0
|
|
554
|
+
confidence=1.0,
|
|
555
|
+
source="frontmatter"
|
|
510
556
|
)
|
|
511
557
|
entity_ids.append(eid)
|
|
512
558
|
|
|
@@ -516,7 +562,8 @@ class EntitiesDB:
|
|
|
516
562
|
memory_id,
|
|
517
563
|
project,
|
|
518
564
|
"PROJECT",
|
|
519
|
-
confidence=1.0
|
|
565
|
+
confidence=1.0,
|
|
566
|
+
source="frontmatter"
|
|
520
567
|
)
|
|
521
568
|
entity_ids.append(eid)
|
|
522
569
|
|
|
@@ -529,7 +576,8 @@ class EntitiesDB:
|
|
|
529
576
|
memory_id,
|
|
530
577
|
f"TAG:{tag}",
|
|
531
578
|
"TAG",
|
|
532
|
-
confidence=1.0
|
|
579
|
+
confidence=1.0,
|
|
580
|
+
source="frontmatter"
|
|
533
581
|
)
|
|
534
582
|
entity_ids.append(eid)
|
|
535
583
|
elif isinstance(tags, list):
|
|
@@ -538,12 +586,239 @@ class EntitiesDB:
|
|
|
538
586
|
memory_id,
|
|
539
587
|
f"TAG:{tag}",
|
|
540
588
|
"TAG",
|
|
541
|
-
confidence=1.0
|
|
589
|
+
confidence=1.0,
|
|
590
|
+
source="frontmatter"
|
|
542
591
|
)
|
|
543
592
|
entity_ids.append(eid)
|
|
544
593
|
|
|
545
594
|
return entity_ids
|
|
546
595
|
|
|
596
|
+
def extract_from_content(
|
|
597
|
+
self,
|
|
598
|
+
memory_id: str,
|
|
599
|
+
content: str,
|
|
600
|
+
use_spacy: bool = True
|
|
601
|
+
) -> List[str]:
|
|
602
|
+
"""
|
|
603
|
+
Extrai entidades do conteúdo usando spaCy NER.
|
|
604
|
+
|
|
605
|
+
Args:
|
|
606
|
+
memory_id: ID da memória
|
|
607
|
+
content: Conteúdo de texto
|
|
608
|
+
use_spacy: Usar spaCy (padrão: True)
|
|
609
|
+
|
|
610
|
+
Returns:
|
|
611
|
+
Lista de IDs de entidades criadas
|
|
612
|
+
"""
|
|
613
|
+
# Verifica cache - só processa se conteúdo mudou
|
|
614
|
+
cached_hash = self.get_cached_hash(memory_id)
|
|
615
|
+
current_hash = self._compute_hash(content)
|
|
616
|
+
|
|
617
|
+
if cached_hash == current_hash:
|
|
618
|
+
# Conteúdo igual, verifica se já tem entidades
|
|
619
|
+
existing = self.get_entities_by_memory(memory_id)
|
|
620
|
+
if existing:
|
|
621
|
+
return [] # Já processado, sem mudanças
|
|
622
|
+
|
|
623
|
+
# Conteúdo novo ou mudou - remove apenas entidades de conteúdo e reprocessa
|
|
624
|
+
self.delete_entities_by_source(memory_id, "content")
|
|
625
|
+
|
|
626
|
+
entity_ids = []
|
|
627
|
+
|
|
628
|
+
if use_spacy:
|
|
629
|
+
try:
|
|
630
|
+
import spacy
|
|
631
|
+
from spacy import Language
|
|
632
|
+
|
|
633
|
+
# Carrega modelo (download na primeira vez)
|
|
634
|
+
try:
|
|
635
|
+
nlp: Language = spacy.load("pt_core_news_sm")
|
|
636
|
+
except OSError:
|
|
637
|
+
# Modelo não instalado - tenta instalar
|
|
638
|
+
import subprocess
|
|
639
|
+
subprocess.run(
|
|
640
|
+
["python", "-m", "spacy", "download", "pt_core_news_sm"],
|
|
641
|
+
capture_output=True
|
|
642
|
+
)
|
|
643
|
+
nlp = spacy.load("pt_core_news_sm")
|
|
644
|
+
|
|
645
|
+
# Processa texto
|
|
646
|
+
doc = nlp(content[:5000]) # Limita a 5000 chars para performance
|
|
647
|
+
|
|
648
|
+
# Mapeia labels do spaCy para nossos tipos
|
|
649
|
+
label_map = {
|
|
650
|
+
"ORG": "ORG",
|
|
651
|
+
"PERSON": "PERSON",
|
|
652
|
+
"GPE": "LOC",
|
|
653
|
+
"LOC": "LOC",
|
|
654
|
+
"PRODUCT": "PRODUCT",
|
|
655
|
+
"EVENT": "EVENT",
|
|
656
|
+
"WORK_OF_ART": "PRODUCT",
|
|
657
|
+
"LAW": "PRODUCT",
|
|
658
|
+
"LANGUAGE": "TECH",
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
for ent in doc.ents:
|
|
662
|
+
if ent.label_ in label_map:
|
|
663
|
+
entity_type = label_map[ent.label_]
|
|
664
|
+
confidence = float(ent._.get("nerd_score", 0.8)) if hasattr(ent._, "nerd_score") else 0.8
|
|
665
|
+
|
|
666
|
+
eid = self.insert_entity(
|
|
667
|
+
memory_id,
|
|
668
|
+
ent.text,
|
|
669
|
+
entity_type,
|
|
670
|
+
confidence=confidence,
|
|
671
|
+
span_start=ent.start_char,
|
|
672
|
+
span_end=ent.end_char,
|
|
673
|
+
context_snippet=content[max(0, ent.start_char - 25):ent.end_char + 25][:50]
|
|
674
|
+
)
|
|
675
|
+
entity_ids.append(eid)
|
|
676
|
+
|
|
677
|
+
# Atualiza cache após processamento
|
|
678
|
+
self.update_cache(memory_id, content, len(entity_ids))
|
|
679
|
+
|
|
680
|
+
except ImportError:
|
|
681
|
+
# spaCy não disponível - usa fallback simples
|
|
682
|
+
entity_ids.extend(self._extract_entities_fallback(memory_id, content))
|
|
683
|
+
# Atualiza cache após processamento
|
|
684
|
+
self.update_cache(memory_id, content, len(entity_ids))
|
|
685
|
+
except Exception:
|
|
686
|
+
# Falha silenciosa - não quebra o fluxo
|
|
687
|
+
pass
|
|
688
|
+
else:
|
|
689
|
+
entity_ids.extend(self._extract_entities_fallback(memory_id, content))
|
|
690
|
+
# Atualiza cache após processamento
|
|
691
|
+
self.update_cache(memory_id, content, len(entity_ids))
|
|
692
|
+
|
|
693
|
+
return entity_ids
|
|
694
|
+
|
|
695
|
+
def _extract_entities_fallback(
|
|
696
|
+
self,
|
|
697
|
+
memory_id: str,
|
|
698
|
+
content: str
|
|
699
|
+
) -> List[str]:
|
|
700
|
+
"""
|
|
701
|
+
Fallback sem spaCy - usa heurísticas simples.
|
|
702
|
+
|
|
703
|
+
Args:
|
|
704
|
+
memory_id: ID da memória
|
|
705
|
+
content: Conteúdo de texto
|
|
706
|
+
|
|
707
|
+
Returns:
|
|
708
|
+
Lista de IDs de entidades criadas
|
|
709
|
+
"""
|
|
710
|
+
entity_ids = []
|
|
711
|
+
|
|
712
|
+
# Palavras capitalizadas (prováveis nomes próprios/ORGs)
|
|
713
|
+
capitalized = re.findall(r'\b[A-Z][a-zA-Z]{2,}\b', content[:2000])
|
|
714
|
+
|
|
715
|
+
# Filtra termos comuns que não são entidades
|
|
716
|
+
common_words = {
|
|
717
|
+
"The", "This", "That", "These", "Those", "What", "When", "Where", "Why", "How",
|
|
718
|
+
"Para", "Com", "Por", "Em", "De", "Do", "Da", "Dos", "Das", "Uma", "Um",
|
|
719
|
+
"Como", "Quando", "Onde", "Qual", "Quais", "Quem", "Sobre"
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
entities = set(e for e in capitalized if e not in common_words)
|
|
723
|
+
|
|
724
|
+
for entity in list(entities)[:20]: # Limita a 20 entidades
|
|
725
|
+
eid = self.insert_entity(
|
|
726
|
+
memory_id,
|
|
727
|
+
entity,
|
|
728
|
+
"ORG" if entity[0].isupper() else "PERSON",
|
|
729
|
+
confidence=0.5, # Baixa confiança sem spaCy
|
|
730
|
+
context_snippet=entity
|
|
731
|
+
)
|
|
732
|
+
entity_ids.append(eid)
|
|
733
|
+
|
|
734
|
+
return entity_ids
|
|
735
|
+
|
|
736
|
+
# ========================================================================
|
|
737
|
+
# CACHE DE HASH (para evitar reprocessamento)
|
|
738
|
+
# ========================================================================
|
|
739
|
+
|
|
740
|
+
def _compute_hash(self, content: str) -> str:
|
|
741
|
+
"""
|
|
742
|
+
Computa hash do conteúdo.
|
|
743
|
+
|
|
744
|
+
Args:
|
|
745
|
+
content: Conteúdo de texto
|
|
746
|
+
|
|
747
|
+
Returns:
|
|
748
|
+
Hash SHA256 (primeiros 16 chars)
|
|
749
|
+
"""
|
|
750
|
+
import hashlib
|
|
751
|
+
return hashlib.sha256(content.encode('utf-8')).hexdigest()[:16]
|
|
752
|
+
|
|
753
|
+
def get_cached_hash(self, memory_id: str) -> Optional[str]:
|
|
754
|
+
"""
|
|
755
|
+
Obtém hash em cache de uma memória.
|
|
756
|
+
|
|
757
|
+
Args:
|
|
758
|
+
memory_id: ID da memória
|
|
759
|
+
|
|
760
|
+
Returns:
|
|
761
|
+
Hash armazenado ou None
|
|
762
|
+
"""
|
|
763
|
+
conn = self._connect()
|
|
764
|
+
cursor = conn.execute(
|
|
765
|
+
"SELECT content_hash FROM entity_cache WHERE memory_id = ?",
|
|
766
|
+
(memory_id,)
|
|
767
|
+
)
|
|
768
|
+
row = cursor.fetchone()
|
|
769
|
+
conn.close()
|
|
770
|
+
return row["content_hash"] if row else None
|
|
771
|
+
|
|
772
|
+
def is_content_changed(self, memory_id: str, content: str) -> bool:
|
|
773
|
+
"""
|
|
774
|
+
Verifica se conteúdo mudou desde último processamento.
|
|
775
|
+
|
|
776
|
+
Args:
|
|
777
|
+
memory_id: ID da memória
|
|
778
|
+
content: Conteúdo de texto
|
|
779
|
+
|
|
780
|
+
Returns:
|
|
781
|
+
True se mudou, False se igual
|
|
782
|
+
"""
|
|
783
|
+
current_hash = self._compute_hash(content)
|
|
784
|
+
cached_hash = self.get_cached_hash(memory_id)
|
|
785
|
+
return cached_hash != current_hash
|
|
786
|
+
|
|
787
|
+
def update_cache(self, memory_id: str, content: str, entity_count: int) -> None:
|
|
788
|
+
"""
|
|
789
|
+
Atualiza cache de hash após processamento.
|
|
790
|
+
|
|
791
|
+
Args:
|
|
792
|
+
memory_id: ID da memória
|
|
793
|
+
content: Conteúdo de texto
|
|
794
|
+
entity_count: Número de entidades extraídas
|
|
795
|
+
"""
|
|
796
|
+
content_hash = self._compute_hash(content)
|
|
797
|
+
|
|
798
|
+
conn = self._connect()
|
|
799
|
+
conn.execute("""
|
|
800
|
+
INSERT OR REPLACE INTO entity_cache
|
|
801
|
+
(memory_id, content_hash, processed_at, entity_count)
|
|
802
|
+
VALUES (?, ?, datetime('now'), ?)
|
|
803
|
+
""", (memory_id, content_hash, entity_count))
|
|
804
|
+
conn.commit()
|
|
805
|
+
conn.close()
|
|
806
|
+
|
|
807
|
+
def clear_cache(self, memory_id: str) -> None:
|
|
808
|
+
"""
|
|
809
|
+
Limpa cache de uma memória.
|
|
810
|
+
|
|
811
|
+
Args:
|
|
812
|
+
memory_id: ID da memória
|
|
813
|
+
"""
|
|
814
|
+
conn = self._connect()
|
|
815
|
+
conn.execute(
|
|
816
|
+
"DELETE FROM entity_cache WHERE memory_id = ?",
|
|
817
|
+
(memory_id,)
|
|
818
|
+
)
|
|
819
|
+
conn.commit()
|
|
820
|
+
conn.close()
|
|
821
|
+
|
|
547
822
|
# ========================================================================
|
|
548
823
|
# ESTATÍSTICAS
|
|
549
824
|
# ========================================================================
|
package/src/index/queries.py
CHANGED
|
@@ -87,6 +87,7 @@ class QueryEngine:
|
|
|
87
87
|
if use_fts:
|
|
88
88
|
fts_results = self._search_fts(query, project, mem_type, limit)
|
|
89
89
|
for r in fts_results:
|
|
90
|
+
r.score *= fts_weight # Aplica peso FTS desde o início
|
|
90
91
|
results[r.memory_id] = r
|
|
91
92
|
|
|
92
93
|
# Busca Semantic
|
|
@@ -94,12 +95,9 @@ class QueryEngine:
|
|
|
94
95
|
semantic_results = self._search_semantic(query, project, limit)
|
|
95
96
|
for r in semantic_results:
|
|
96
97
|
if r.memory_id in results:
|
|
97
|
-
# Combina scores
|
|
98
|
+
# Combina scores: média ponderada (FTS já tem peso aplicado)
|
|
98
99
|
existing = results[r.memory_id]
|
|
99
|
-
combined_score = (
|
|
100
|
-
existing.score * fts_weight +
|
|
101
|
-
r.score * semantic_weight
|
|
102
|
-
)
|
|
100
|
+
combined_score = existing.score + (r.score * semantic_weight)
|
|
103
101
|
results[r.memory_id] = QueryResult(
|
|
104
102
|
memory_id=r.memory_id,
|
|
105
103
|
type=r.type,
|
|
@@ -118,12 +116,9 @@ class QueryEngine:
|
|
|
118
116
|
graph_results = self._search_by_graph(query, limit)
|
|
119
117
|
for r in graph_results:
|
|
120
118
|
if r.memory_id in results:
|
|
121
|
-
# Combina scores
|
|
119
|
+
# Combina scores: soma ponderada (scores anteriores já têm peso)
|
|
122
120
|
existing = results[r.memory_id]
|
|
123
|
-
combined_score = (
|
|
124
|
-
existing.score * (1 - graph_weight) +
|
|
125
|
-
r.score * graph_weight
|
|
126
|
-
)
|
|
121
|
+
combined_score = existing.score + (r.score * graph_weight)
|
|
127
122
|
results[r.memory_id] = QueryResult(
|
|
128
123
|
memory_id=r.memory_id,
|
|
129
124
|
type=r.type,
|
package/src/mcp/server.py
CHANGED
|
@@ -532,7 +532,8 @@ class CerebroMCP:
|
|
|
532
532
|
"",
|
|
533
533
|
"Índice:",
|
|
534
534
|
f" Metadata DB: {self.cerebro_path / 'index' / 'metadata.db'}",
|
|
535
|
-
f" Embeddings DB: {self.cerebro_path / 'index' / 'embeddings.db'}"
|
|
535
|
+
f" Embeddings DB: {self.cerebro_path / 'index' / 'embeddings.db'}",
|
|
536
|
+
f" Entities DB: {self.cerebro_path / 'index' / 'entities.db'}"
|
|
536
537
|
]
|
|
537
538
|
|
|
538
539
|
return "\n".join(lines)
|
|
@@ -712,8 +713,14 @@ Uma chamada por memória. O sistema salva e indexa automaticamente.
|
|
|
712
713
|
|
|
713
714
|
desc_match = re.search(r'description:\s*(.*)', content)
|
|
714
715
|
type_match = re.search(r'type:\s*(.*)', content)
|
|
716
|
+
project_match = re.search(r'project:\s*(.*)', content)
|
|
717
|
+
tags_match = re.search(r'tags:\s*(.*)', content)
|
|
718
|
+
|
|
715
719
|
desc = desc_match.group(1).strip() if desc_match else "sem descrição"
|
|
716
720
|
m_type = type_match.group(1).strip() if type_match else "project"
|
|
721
|
+
project = project_match.group(1).strip() if project_match else "unknown"
|
|
722
|
+
tags = tags_match.group(1).strip() if tags_match else ""
|
|
723
|
+
|
|
717
724
|
ts = datetime.now().strftime("%Y-%m-%d")
|
|
718
725
|
entry = f"- [{m_type}] {mem_name}.md ({ts}): {desc}\n"
|
|
719
726
|
|
|
@@ -726,6 +733,31 @@ Uma chamada por memória. O sistema salva e indexa automaticamente.
|
|
|
726
733
|
else:
|
|
727
734
|
index_path.write_text(f"# Memórias do Projeto\n\n{entry}", encoding="utf-8")
|
|
728
735
|
|
|
736
|
+
# BUG FIX: Registrar entidades no grafo (frontmatter + conteúdo)
|
|
737
|
+
# ORDEM IMPORTANTE: content primeiro, frontmatter depois
|
|
738
|
+
# extract_from_content() deleta entidades existentes, então frontmatter deve vir após
|
|
739
|
+
if self.entities_db:
|
|
740
|
+
import yaml
|
|
741
|
+
frontmatter_match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL)
|
|
742
|
+
if frontmatter_match:
|
|
743
|
+
try:
|
|
744
|
+
frontmatter = yaml.safe_load(frontmatter_match.group(1))
|
|
745
|
+
body_content = frontmatter_match.group(2)
|
|
746
|
+
# 1. Extrai entidades do conteúdo (spaCy NER) - pode deletar existentes
|
|
747
|
+
self.entities_db.extract_from_content(
|
|
748
|
+
memory_id=mem_name,
|
|
749
|
+
content=body_content,
|
|
750
|
+
use_spacy=True
|
|
751
|
+
)
|
|
752
|
+
# 2. Extrai entidades do frontmatter - NÃO são deletadas
|
|
753
|
+
self.entities_db.extract_from_frontmatter(
|
|
754
|
+
memory_id=mem_name,
|
|
755
|
+
frontmatter=frontmatter or {},
|
|
756
|
+
project=project
|
|
757
|
+
)
|
|
758
|
+
except Exception as e:
|
|
759
|
+
pass # Falha silenciosa se frontmatter inválido
|
|
760
|
+
|
|
729
761
|
return f"✅ Memória '{mem_name}' salva em {file_path}"
|
|
730
762
|
|
|
731
763
|
def _remember(self, args: Dict[str, Any]) -> str:
|