ocerebro 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cerebro/index/entities.db +0 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/index/entities_db.py +44 -10
- package/src/mcp/server.py +9 -7
|
Binary file
|
package/package.json
CHANGED
package/pyproject.toml
CHANGED
package/src/index/entities_db.py
CHANGED
|
@@ -50,6 +50,7 @@ class EntitiesDB:
|
|
|
50
50
|
memory_id TEXT,
|
|
51
51
|
entity_name TEXT,
|
|
52
52
|
entity_type TEXT,
|
|
53
|
+
source TEXT DEFAULT 'content',
|
|
53
54
|
confidence REAL DEFAULT 1.0,
|
|
54
55
|
span_start INTEGER,
|
|
55
56
|
span_end INTEGER,
|
|
@@ -75,6 +76,11 @@ class EntitiesDB:
|
|
|
75
76
|
ON entities(memory_id)
|
|
76
77
|
""")
|
|
77
78
|
|
|
79
|
+
conn.execute("""
|
|
80
|
+
CREATE INDEX IF NOT EXISTS idx_entities_source
|
|
81
|
+
ON entities(source)
|
|
82
|
+
""")
|
|
83
|
+
|
|
78
84
|
# Tabela de cache de hash (para evitar reprocessamento)
|
|
79
85
|
conn.execute("""
|
|
80
86
|
CREATE TABLE IF NOT EXISTS entity_cache (
|
|
@@ -128,7 +134,8 @@ class EntitiesDB:
|
|
|
128
134
|
confidence: float = 1.0,
|
|
129
135
|
span_start: int = 0,
|
|
130
136
|
span_end: int = 0,
|
|
131
|
-
context_snippet: str = ""
|
|
137
|
+
context_snippet: str = "",
|
|
138
|
+
source: str = "content"
|
|
132
139
|
) -> str:
|
|
133
140
|
"""
|
|
134
141
|
Insere uma entidade.
|
|
@@ -141,6 +148,7 @@ class EntitiesDB:
|
|
|
141
148
|
span_start: Posição inicial no texto
|
|
142
149
|
span_end: Posição final no texto
|
|
143
150
|
context_snippet: Contexto ao redor da entidade
|
|
151
|
+
source: Origem da entidade ('frontmatter' ou 'content')
|
|
144
152
|
|
|
145
153
|
Returns:
|
|
146
154
|
ID da entidade
|
|
@@ -150,13 +158,14 @@ class EntitiesDB:
|
|
|
150
158
|
conn = self._connect()
|
|
151
159
|
conn.execute("""
|
|
152
160
|
INSERT OR REPLACE INTO entities
|
|
153
|
-
(id, memory_id, entity_name, entity_type, confidence, span_start, span_end, context_snippet)
|
|
154
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
161
|
+
(id, memory_id, entity_name, entity_type, source, confidence, span_start, span_end, context_snippet)
|
|
162
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
155
163
|
""", (
|
|
156
164
|
entity_id,
|
|
157
165
|
memory_id,
|
|
158
166
|
entity_name,
|
|
159
167
|
entity_type,
|
|
168
|
+
source,
|
|
160
169
|
confidence,
|
|
161
170
|
span_start,
|
|
162
171
|
span_end,
|
|
@@ -207,7 +216,7 @@ class EntitiesDB:
|
|
|
207
216
|
|
|
208
217
|
def delete_entities_by_memory(self, memory_id: str) -> int:
|
|
209
218
|
"""
|
|
210
|
-
Remove entidades de uma memória.
|
|
219
|
+
Remove todas as entidades de uma memória.
|
|
211
220
|
|
|
212
221
|
Args:
|
|
213
222
|
memory_id: ID da memória
|
|
@@ -232,6 +241,27 @@ class EntitiesDB:
|
|
|
232
241
|
conn.close()
|
|
233
242
|
return deleted
|
|
234
243
|
|
|
244
|
+
def delete_entities_by_source(self, memory_id: str, source: str) -> int:
|
|
245
|
+
"""
|
|
246
|
+
Remove entidades de uma memória por fonte (frontmatter ou content).
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
memory_id: ID da memória
|
|
250
|
+
source: Fonte das entidades ('frontmatter' ou 'content')
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
Número de entidades removidas
|
|
254
|
+
"""
|
|
255
|
+
conn = self._connect()
|
|
256
|
+
cursor = conn.execute(
|
|
257
|
+
"DELETE FROM entities WHERE memory_id = ? AND source = ?",
|
|
258
|
+
(memory_id, source)
|
|
259
|
+
)
|
|
260
|
+
deleted = cursor.rowcount
|
|
261
|
+
conn.commit()
|
|
262
|
+
conn.close()
|
|
263
|
+
return deleted
|
|
264
|
+
|
|
235
265
|
# ========================================================================
|
|
236
266
|
# OPERAÇÕES DE RELACIONAMENTOS
|
|
237
267
|
# ========================================================================
|
|
@@ -521,7 +551,8 @@ class EntitiesDB:
|
|
|
521
551
|
memory_id,
|
|
522
552
|
f"TYPE:{frontmatter['type']}",
|
|
523
553
|
"META",
|
|
524
|
-
confidence=1.0
|
|
554
|
+
confidence=1.0,
|
|
555
|
+
source="frontmatter"
|
|
525
556
|
)
|
|
526
557
|
entity_ids.append(eid)
|
|
527
558
|
|
|
@@ -531,7 +562,8 @@ class EntitiesDB:
|
|
|
531
562
|
memory_id,
|
|
532
563
|
project,
|
|
533
564
|
"PROJECT",
|
|
534
|
-
confidence=1.0
|
|
565
|
+
confidence=1.0,
|
|
566
|
+
source="frontmatter"
|
|
535
567
|
)
|
|
536
568
|
entity_ids.append(eid)
|
|
537
569
|
|
|
@@ -544,7 +576,8 @@ class EntitiesDB:
|
|
|
544
576
|
memory_id,
|
|
545
577
|
f"TAG:{tag}",
|
|
546
578
|
"TAG",
|
|
547
|
-
confidence=1.0
|
|
579
|
+
confidence=1.0,
|
|
580
|
+
source="frontmatter"
|
|
548
581
|
)
|
|
549
582
|
entity_ids.append(eid)
|
|
550
583
|
elif isinstance(tags, list):
|
|
@@ -553,7 +586,8 @@ class EntitiesDB:
|
|
|
553
586
|
memory_id,
|
|
554
587
|
f"TAG:{tag}",
|
|
555
588
|
"TAG",
|
|
556
|
-
confidence=1.0
|
|
589
|
+
confidence=1.0,
|
|
590
|
+
source="frontmatter"
|
|
557
591
|
)
|
|
558
592
|
entity_ids.append(eid)
|
|
559
593
|
|
|
@@ -586,8 +620,8 @@ class EntitiesDB:
|
|
|
586
620
|
if existing:
|
|
587
621
|
return [] # Já processado, sem mudanças
|
|
588
622
|
|
|
589
|
-
# Conteúdo novo ou mudou - remove entidades
|
|
590
|
-
self.
|
|
623
|
+
# Conteúdo novo ou mudou - remove apenas entidades de conteúdo e reprocessa
|
|
624
|
+
self.delete_entities_by_source(memory_id, "content")
|
|
591
625
|
|
|
592
626
|
entity_ids = []
|
|
593
627
|
|
package/src/mcp/server.py
CHANGED
|
@@ -734,25 +734,27 @@ Uma chamada por memória. O sistema salva e indexa automaticamente.
|
|
|
734
734
|
index_path.write_text(f"# Memórias do Projeto\n\n{entry}", encoding="utf-8")
|
|
735
735
|
|
|
736
736
|
# BUG FIX: Registrar entidades no grafo (frontmatter + conteúdo)
|
|
737
|
+
# ORDEM IMPORTANTE: content primeiro, frontmatter depois
|
|
738
|
+
# extract_from_content() deleta entidades existentes, então frontmatter deve vir após
|
|
737
739
|
if self.entities_db:
|
|
738
740
|
import yaml
|
|
739
741
|
frontmatter_match = re.match(r'^---\n(.*?)\n---\n(.*)$', content, re.DOTALL)
|
|
740
742
|
if frontmatter_match:
|
|
741
743
|
try:
|
|
742
744
|
frontmatter = yaml.safe_load(frontmatter_match.group(1))
|
|
743
|
-
# Extrai entidades do frontmatter
|
|
744
|
-
self.entities_db.extract_from_frontmatter(
|
|
745
|
-
memory_id=mem_name,
|
|
746
|
-
frontmatter=frontmatter or {},
|
|
747
|
-
project=project
|
|
748
|
-
)
|
|
749
|
-
# Extrai entidades do conteúdo (spaCy NER)
|
|
750
745
|
body_content = frontmatter_match.group(2)
|
|
746
|
+
# 1. Extrai entidades do conteúdo (spaCy NER) - pode deletar existentes
|
|
751
747
|
self.entities_db.extract_from_content(
|
|
752
748
|
memory_id=mem_name,
|
|
753
749
|
content=body_content,
|
|
754
750
|
use_spacy=True
|
|
755
751
|
)
|
|
752
|
+
# 2. Extrai entidades do frontmatter - NÃO são deletadas
|
|
753
|
+
self.entities_db.extract_from_frontmatter(
|
|
754
|
+
memory_id=mem_name,
|
|
755
|
+
frontmatter=frontmatter or {},
|
|
756
|
+
project=project
|
|
757
|
+
)
|
|
756
758
|
except Exception as e:
|
|
757
759
|
pass # Falha silenciosa se frontmatter inválido
|
|
758
760
|
|