ocerebro 0.4.5 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ocerebro",
3
- "version": "0.4.5",
3
+ "version": "0.4.7",
4
4
  "description": "OCerebro - Sistema de Memoria para Agentes (Claude Code/MCP)",
5
5
  "main": "bin/ocerebro.js",
6
6
  "bin": {
package/pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ocerebro"
7
- version = "0.4.5"
7
+ version = "0.4.7"
8
8
  description = "OCerebro - Sistema de Memoria para Agentes (Claude Code/MCP)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -34,16 +34,22 @@ dependencies = [
34
34
  "pyyaml>=6.0",
35
35
  "pydantic>=2.0",
36
36
  "sqlite-vec>=0.1.0",
37
- "sentence-transformers>=2.2.0",
38
37
  "mcp>=1.0.0",
39
38
  "anthropic>=0.40.0",
40
- "spacy>=3.5.0",
41
39
  "fastapi>=0.109.0",
42
40
  "uvicorn>=0.27.0",
43
41
  ]
44
42
 
45
43
  [project.optional-dependencies]
44
+ semantic = [
45
+ "sentence-transformers>=2.2.0",
46
+ "spacy>=3.5.0",
47
+ ]
48
+ full = [
49
+ "ocerebro[semantic]",
50
+ ]
46
51
  test = [
52
+ "ocerebro[semantic]",
47
53
  "pytest>=7.0",
48
54
  "pytest-cov>=4.0",
49
55
  ]
package/src/cli/main.py CHANGED
@@ -109,18 +109,31 @@ class CerebroCLI:
109
109
  limit: int = 10,
110
110
  use_semantic: bool = True
111
111
  ) -> str:
112
+ # Verifica disponibilidade da busca semântica
113
+ semantic_available = self.query_engine.is_semantic_available()
114
+
115
+ if use_semantic and not semantic_available:
116
+ # Fallback para FTS + Graph apenas
117
+ lines = [
118
+ "⚠️ Busca semântica não disponível (sentence-transformers não instalado).",
119
+ " Instalando com: npx ocerebro full",
120
+ ""
121
+ ]
122
+ else:
123
+ lines = []
124
+
112
125
  results = self.query_engine.search(
113
126
  query=query,
114
127
  project=project,
115
128
  mem_type=mem_type,
116
129
  limit=limit,
117
- use_semantic=use_semantic
130
+ use_semantic=use_semantic and semantic_available
118
131
  )
119
132
 
120
133
  if not results:
121
134
  return "Nenhum resultado encontrado."
122
135
 
123
- lines = [f"Resultados para '{query}':\n"]
136
+ lines.append(f"Resultados para '{query}':\n")
124
137
  for i, r in enumerate(results, 1):
125
138
  lines.append(f"{i}. [{r.type}] {r.title}")
126
139
  lines.append(f" Projeto: {r.project} | Score: {r.score:.3f} | Fonte: {r.source}")
@@ -271,29 +284,108 @@ def _run_init(project_path: Optional[Path] = None):
271
284
  print("Como quer usar o OCerebro?")
272
285
  print(" 1. Neste projeto (cria .ocerebro/ aqui)")
273
286
  print(" 2. Global (usa ~/.ocerebro/ para todos os projetos)")
274
- choice = input("\nEscolha [1/2] (padrão: 1): ").strip() or "1"
287
+ choice = input("\nEscolha [1/2] (padrao: 1): ").strip() or "1"
275
288
 
276
289
  if choice == "2":
277
290
  base_path = Path.home() / ".ocerebro"
278
- print(f"\n Modo global: {base_path}")
291
+ print(f"\n[OK] Modo global: {base_path}")
279
292
  else:
280
293
  base_path = (project_path or Path.cwd()) / ".ocerebro"
281
- print(f"\n Modo projeto: {base_path}")
294
+ print(f"\n[OK] Modo projeto: {base_path}")
282
295
 
283
296
  config_file = Path.home() / ".ocerebro_config"
284
297
  config_file.parent.mkdir(parents=True, exist_ok=True)
285
298
  config_file.write_text(f"base_path={base_path}\n", encoding="utf-8")
286
- print(f" Configuração salva em {config_file}")
299
+ print(f"[OK] Configuracao salva em {config_file}")
287
300
 
288
301
  setup_ocerebro_dir(base_path)
289
302
  setup_hooks(base_path)
290
303
  setup_slash_commands(project_path or Path.cwd())
291
304
 
305
+ # Pergunta sobre busca semantica
306
+ print("\n" + "="*60)
307
+ print("BUSCA SEMANTICA (opcional)")
308
+ print("="*60)
309
+ print("O OCerebro oferece dois modos de busca:")
310
+ print(" 1. FTS + Graph (padrao) - Leve, rapido, sem dependencias extras")
311
+ print(" - Full-text search (FTS) no conteudo")
312
+ print(" - Busca por grafo de entidades")
313
+ print(" - ~100MB de footprint")
314
+ print("")
315
+ print(" 2. FTS + Graph + Semantica (recomendado para producao)")
316
+ print(" - Tudo acima + busca por significado")
317
+ print(" - Encontra 'banco travando' mesmo sem palavra 'deadlock'")
318
+ print(" - ~500MB adicionais de dependencias")
319
+ print("")
320
+ semantic_choice = input("Instalar busca semantica agora? [y/N] (padrao: N): ").strip().lower()
321
+
322
+ if semantic_choice == "y":
323
+ _install_semantic_deps()
324
+ else:
325
+ print("\n[OK] Semantica nao instalada. Use FTS + Graph apenas.")
326
+ print(" Para instalar depois: npx ocerebro full")
327
+
292
328
  # Auto-configura Claude
293
329
  print()
294
330
  setup_claude(auto=True)
295
331
 
296
332
 
333
+ def _install_semantic_deps():
334
+ """Instala dependências de busca semântica (sentence-transformers + spacy)"""
335
+ import subprocess
336
+
337
+ print("\n📦 Instalando dependências de busca semântica...")
338
+ print(" Isso pode levar alguns minutos (~500MB de downloads)")
339
+ print("")
340
+
341
+ pkgs = [
342
+ "sentence-transformers>=2.2.0",
343
+ "spacy>=3.5.0"
344
+ ]
345
+
346
+ # Usa o mesmo Python do ambiente atual
347
+ python = sys.executable
348
+
349
+ try:
350
+ for pkg in pkgs:
351
+ print(f" Instalando {pkg}...")
352
+ subprocess.check_call([
353
+ python, "-m", "pip", "install", pkg, "-q"
354
+ ])
355
+ print(f" ✓ {pkg} instalado")
356
+
357
+ # Pergunta qual modelo de linguagem baixar
358
+ print("\n Qual modelo de linguagem deseja instalar?")
359
+ print(" 1. português (pt_core_news_sm) ~50MB")
360
+ print(" 2. inglês (en_core_web_sm) ~15MB")
361
+ print(" 3. ambos")
362
+ lang_choice = input("\n Escolha [1/2/3] (padrão: 1): ").strip() or "1"
363
+
364
+ if lang_choice in ["1", "3"]:
365
+ print(" Baixando modelo português (pt_core_news_sm)...")
366
+ subprocess.check_call([
367
+ python, "-m", "spacy", "download", "pt_core_news_sm", "-q"
368
+ ])
369
+ print(" ✓ pt_core_news_sm baixado")
370
+
371
+ if lang_choice in ["2", "3"]:
372
+ print(" Baixando modelo inglês (en_core_web_sm)...")
373
+ subprocess.check_call([
374
+ python, "-m", "spacy", "download", "en_core_web_sm", "-q"
375
+ ])
376
+ print(" ✓ en_core_web_sm baixado")
377
+
378
+ print("\n✅ Dependências instaladas com sucesso!")
379
+ print(" A busca semântica agora está disponível.")
380
+ print("\n Para usar: npx ocerebro search \"sua query\"")
381
+
382
+ except subprocess.CalledProcessError as e:
383
+ print(f"\n❌ Erro ao instalar dependências: {e}")
384
+ print(" Tente manualmente: pip install sentence-transformers spacy")
385
+ print(" E depois: python -m spacy download pt_core_news_sm")
386
+ sys.exit(1)
387
+
388
+
297
389
  def main():
298
390
  """Entry point da CLI"""
299
391
  parser = argparse.ArgumentParser(
@@ -374,6 +466,9 @@ def main():
374
466
  gc_parser.add_argument("--threshold", type=int, default=7, dest="threshold_days")
375
467
  gc_parser.add_argument("--apply", action="store_true", dest="apply")
376
468
 
469
+ # Comando: full (instala dependências semânticas)
470
+ subparsers.add_parser("full", help="Instalar dependências de busca semântica")
471
+
377
472
  args = parser.parse_args()
378
473
 
379
474
  if not args.command:
@@ -441,6 +536,9 @@ def main():
441
536
  result = cli.remember(dry_run=not args.apply)
442
537
  elif args.command == "gc":
443
538
  result = cli.gc_cmd(threshold_days=args.threshold_days, dry_run=not args.apply)
539
+ elif args.command == "full":
540
+ _install_semantic_deps()
541
+ sys.exit(0)
444
542
  else:
445
543
  parser.print_help()
446
544
  sys.exit(1)
@@ -8,6 +8,7 @@ from dataclasses import dataclass
8
8
  from src.working.yaml_storage import YAMLStorage
9
9
  from src.official.markdown_storage import MarkdownStorage
10
10
  from src.official.templates import ErrorTemplate, DecisionTemplate
11
+ from src.consolidation.scorer import Scorer, ScoringConfig
11
12
 
12
13
 
13
14
  @dataclass
@@ -53,6 +54,9 @@ class Promoter:
53
54
  self.entities_db_path = entities_db_path
54
55
  self._entities_db = None
55
56
 
57
+ # Inicializa scorer RFM
58
+ self.scorer = Scorer(ScoringConfig())
59
+
56
60
  def promote_session(
57
61
  self,
58
62
  project: str,
@@ -178,6 +182,23 @@ class Promoter:
178
182
  frontmatter["events_from"] = draft["events_range"].get("from")
179
183
  frontmatter["events_to"] = draft["events_range"].get("to")
180
184
 
185
+ # Calcula scores RFM
186
+ scores = self.scorer.calculate_all_scores({
187
+ "type": "decision",
188
+ "last_accessed": datetime.now(timezone.utc),
189
+ "access_count": 0,
190
+ "status": "approved",
191
+ "severity": None,
192
+ "related_to": frontmatter.get("related_to", [])
193
+ })
194
+
195
+ # Adiciona scores ao frontmatter
196
+ frontmatter["importance_score"] = scores["importance_score"]
197
+ frontmatter["recency_score"] = scores["recency_score"]
198
+ frontmatter["frequency_score"] = scores["frequency_score"]
199
+ frontmatter["links_score"] = scores["links_score"]
200
+ frontmatter["total_score"] = scores["total_score"]
201
+
181
202
  # Gera corpo
182
203
  body_sections = [
183
204
  "## Resumo",
@@ -286,6 +307,22 @@ class Promoter:
286
307
  tags=["auto-promoted"]
287
308
  )
288
309
 
310
+ # Calcula scores RFM
311
+ scores = self.scorer.calculate_all_scores({
312
+ "type": "error",
313
+ "last_accessed": datetime.now(timezone.utc),
314
+ "access_count": 0,
315
+ "severity": error.get("severity", "high"),
316
+ "related_to": frontmatter.get("related_to", [])
317
+ })
318
+
319
+ # Adiciona scores ao frontmatter
320
+ frontmatter["importance_score"] = scores["importance_score"]
321
+ frontmatter["recency_score"] = scores["recency_score"]
322
+ frontmatter["frequency_score"] = scores["frequency_score"]
323
+ frontmatter["links_score"] = scores["links_score"]
324
+ frontmatter["total_score"] = scores["total_score"]
325
+
289
326
  # Gera corpo
290
327
  error_original = str(error.get("context", {}))[:500]
291
328
 
@@ -6,7 +6,10 @@ from typing import Any, Dict, List, Optional
6
6
  import sqlite3
7
7
  import json
8
8
  from datetime import datetime
9
- from src.forgetting.gc import calculate_rfms_score
9
+ from src.consolidation.scorer import Scorer, ScoringConfig
10
+
11
+ # Instancia scorer para calculo de RFM risk
12
+ _scorer = Scorer(ScoringConfig())
10
13
 
11
14
 
12
15
  def create_router(
@@ -247,9 +250,10 @@ def create_router(
247
250
 
248
251
  memories = []
249
252
  for row in rows:
250
- # Calcula GC risk
253
+ # Calcula GC risk (1.0 - score RFM)
251
254
  memory_dict = dict(row)
252
- gc_risk = 1.0 - calculate_rfms_score(memory_dict)
255
+ rfms_score = _scorer.calculate(memory_dict)
256
+ gc_risk = 1.0 - rfms_score
253
257
 
254
258
  memories.append({
255
259
  "id": memory_dict["id"],
@@ -22,14 +22,16 @@ class GarbageCollector:
22
22
  baseado em policies de forgetting.
23
23
  """
24
24
 
25
- def __init__(self, config_path: Path):
25
+ def __init__(self, config_path: Path, metadata_db: Optional[Any] = None):
26
26
  """
27
27
  Inicializa o GarbageCollector.
28
28
 
29
29
  Args:
30
30
  config_path: Path para configuração
31
+ metadata_db: Instância opcional do MetadataDB para consultar scores
31
32
  """
32
33
  self.config_path = config_path
34
+ self.metadata_db = metadata_db
33
35
 
34
36
  def find_candidates_for_archive(
35
37
  self,
@@ -72,6 +74,14 @@ class GarbageCollector:
72
74
  if mem_type in ['user', 'feedback']:
73
75
  continue
74
76
 
77
+ # Tarefa 2: Verifica total_score se metadata_db estiver disponível
78
+ if self.metadata_db:
79
+ mem_id = frontmatter.get("name", file_path.stem)
80
+ memory_data = self.metadata_db.get_by_id(mem_id)
81
+ if memory_data and memory_data.get("total_score", 0) >= 0.5:
82
+ # Memória com alto score não é candidata
83
+ continue
84
+
75
85
  candidates.append({
76
86
  "file_path": str(file_path),
77
87
  "filename": file_path.name,
@@ -32,6 +32,7 @@ class EmbeddingsDB:
32
32
  self.db_path = db_path
33
33
  self.model_name = model_name
34
34
  self._model = None
35
+ self._semantic_available = False
35
36
  # Cria diretório pai se não existir
36
37
  db_path.parent.mkdir(parents=True, exist_ok=True)
37
38
  self._init_sqlite_vec()
@@ -44,11 +45,10 @@ class EmbeddingsDB:
44
45
  try:
45
46
  from sentence_transformers import SentenceTransformer
46
47
  self._model = SentenceTransformer(self.model_name)
48
+ self._semantic_available = True
47
49
  except ImportError:
48
- raise ImportError(
49
- "sentence-transformers não instalado. "
50
- "Instale com: pip install sentence-transformers"
51
- )
50
+ self._semantic_available = False
51
+ return None
52
52
  return self._model
53
53
 
54
54
  def _init_sqlite_vec(self):
@@ -116,6 +116,8 @@ class EmbeddingsDB:
116
116
  Returns:
117
117
  Lista de floats (vetor de embedding)
118
118
  """
119
+ if self.model is None:
120
+ return []
119
121
  embedding = self.model.encode(text, convert_to_numpy=True)
120
122
  return embedding.tolist()
121
123
 
@@ -271,6 +273,10 @@ class EmbeddingsDB:
271
273
  Returns:
272
274
  Lista de memórias similares com score
273
275
  """
276
+ # Retorna vazio se busca semântica não estiver disponível
277
+ if not self._semantic_available:
278
+ return []
279
+
274
280
  # Computa embedding da query
275
281
  query_embedding = self._compute_embedding(query)
276
282
 
@@ -419,3 +425,12 @@ class EmbeddingsDB:
419
425
  "model_name": self.model_name,
420
426
  "sqlite_vec_available": self._sqlite_vec_available
421
427
  }
428
+
429
+ def is_semantic_available(self) -> bool:
430
+ """
431
+ Verifica se busca semântica está disponível.
432
+
433
+ Returns:
434
+ True se sentence-transformers está instalado, False caso contrário
435
+ """
436
+ return self._semantic_available
@@ -204,7 +204,11 @@ class QueryEngine:
204
204
  Returns:
205
205
  Lista de resultados
206
206
  """
207
- similar = self.embeddings_db.search_similar(query, project, limit * 2)
207
+ try:
208
+ similar = self.embeddings_db.search_similar(query, project, limit * 2)
209
+ except (ImportError, Exception):
210
+ # Busca semântica não disponível ou falhou
211
+ return []
208
212
 
209
213
  results = []
210
214
  for item in similar:
@@ -425,3 +429,12 @@ class QueryEngine:
425
429
  )
426
430
 
427
431
  return sorted_related[:limit]
432
+
433
+ def is_semantic_available(self) -> bool:
434
+ """
435
+ Verifica se busca semântica está disponível.
436
+
437
+ Returns:
438
+ True se sentence-transformers está instalado e operacional, False caso contrário
439
+ """
440
+ return self.embeddings_db.is_semantic_available()