own-rag-cli 0.0.1-snapshot

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1433 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+ """
4
+ mcp_server.py — Servidor MCP para RAG local de codebase.
5
+
6
+ Expõe ferramentas de busca semântica e indexação via stdio para o Claude Code CLI.
7
+ Conecta-se ao ChromaDB rodando em Docker (localhost:8000).
8
+
9
+ Novidade: modo híbrido ensemble com duas coleções separadas + RRF + reranking leve.
10
+ """
11
+
12
+ import sys
13
+ import os
14
+ import hashlib
15
+ import json
16
+ import logging
17
+ import getpass
18
+ import shutil
19
+ from collections.abc import Iterator
20
+ from concurrent.futures import ThreadPoolExecutor, as_completed
21
+ from dataclasses import dataclass
22
+ from datetime import datetime, timezone
23
+ from pathlib import Path
24
+
25
+ # Evita mensagens advisory do transformers em stderr durante a carga do modelo.
26
+ os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")
27
+
28
+
29
+ class _TorchDtypeWarningFilter(logging.Filter):
30
+ def filter(self, record: logging.LogRecord) -> bool:
31
+ return "`torch_dtype` is deprecated! Use `dtype` instead!" not in record.getMessage()
32
+
33
+
34
+ for _logger_name in ("transformers.configuration_utils", "transformers.modeling_utils"):
35
+ logging.getLogger(_logger_name).addFilter(_TorchDtypeWarningFilter())
36
+
37
+ import chromadb
38
+ from sentence_transformers import CrossEncoder, SentenceTransformer
39
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
40
+ from mcp.server.fastmcp import FastMCP
41
+ from download_model_from_hugginface import download_model_with_fallback
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # Configuração de logging (stderr para não poluir o protocolo stdio)
45
+ # ---------------------------------------------------------------------------
46
+
47
+ logging.basicConfig(
48
+ level=logging.INFO,
49
+ format="[MCP-RAG] %(asctime)s %(levelname)s: %(message)s",
50
+ stream=sys.stderr,
51
+ )
52
+ log = logging.getLogger(__name__)
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Log estruturado de uso MCP (JSONL)
56
+ # ---------------------------------------------------------------------------
57
+
58
+ MCP_USAGE_LOG_PATH = Path(
59
+ os.environ.get("MCP_USAGE_LOG", str(Path.home() / ".rag_db" / "mcp_usage.log"))
60
+ ).expanduser()
61
+
62
+
63
+ def _safe_preview(value: str, limit: int = 120) -> str:
64
+ if len(value) <= limit:
65
+ return value
66
+ return value[:limit] + "...[truncated]"
67
+
68
+
69
+ def _get_parent_cmdline() -> str:
70
+ ppid = os.getppid()
71
+ cmdline_path = Path(f"/proc/{ppid}/cmdline")
72
+ try:
73
+ raw = cmdline_path.read_bytes()
74
+ if not raw:
75
+ return "unknown"
76
+ parts = [p.decode("utf-8", errors="ignore") for p in raw.split(b"\x00") if p]
77
+ return " ".join(parts) if parts else "unknown"
78
+ except Exception:
79
+ return "unknown"
80
+
81
+
82
+ def _infer_actor() -> dict[str, str]:
83
+ actor = os.environ.get("MCP_CLIENT_NAME") or os.environ.get("CLAUDE_USER") or getpass.getuser()
84
+ source = (
85
+ "MCP_CLIENT_NAME" if os.environ.get("MCP_CLIENT_NAME")
86
+ else "CLAUDE_USER" if os.environ.get("CLAUDE_USER")
87
+ else "system_user"
88
+ )
89
+ return {
90
+ "actor": actor,
91
+ "actor_source": source,
92
+ "client_process": _get_parent_cmdline(),
93
+ }
94
+
95
+
96
+ def _log_tool_usage(event: str, tool_name: str, details: dict[str, object] | None = None) -> None:
97
+ try:
98
+ MCP_USAGE_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
99
+ payload: dict[str, object] = {
100
+ "timestamp": datetime.now(timezone.utc).isoformat(),
101
+ "event": event,
102
+ "tool": tool_name,
103
+ "pid": os.getpid(),
104
+ **_infer_actor(),
105
+ }
106
+ if details:
107
+ payload["details"] = details
108
+
109
+ with MCP_USAGE_LOG_PATH.open("a", encoding="utf-8") as f:
110
+ f.write(json.dumps(payload, ensure_ascii=True) + "\n")
111
+ except Exception as e:
112
+ log.warning("Falha ao registrar uso MCP em %s: %s", MCP_USAGE_LOG_PATH, e)
113
+
114
+
115
+ # ---------------------------------------------------------------------------
116
+ # Configurações
117
+ # ---------------------------------------------------------------------------
118
+
119
+
120
+ INDEXER_CONFIG_PATH = Path(
121
+ os.environ.get("MCP_INDEXER_CONFIG_FILE", str(Path.home() / ".rag_db" / "indexer_tuning.json"))
122
+ ).expanduser()
123
+
124
+
125
+ def _load_indexer_tuning_config() -> dict[str, object]:
126
+ try:
127
+ if not INDEXER_CONFIG_PATH.exists():
128
+ return {}
129
+ payload = json.loads(INDEXER_CONFIG_PATH.read_text(encoding="utf-8"))
130
+ return payload if isinstance(payload, dict) else {}
131
+ except Exception:
132
+ return {}
133
+
134
+
135
+ INDEXER_TUNING_CONFIG = _load_indexer_tuning_config()
136
+
137
+
138
+ def _config_str(env_name: str, config_key: str, default: str) -> str:
139
+ env_raw = os.environ.get(env_name)
140
+ if env_raw is not None and env_raw.strip():
141
+ return env_raw
142
+ cfg_raw = INDEXER_TUNING_CONFIG.get(config_key)
143
+ if isinstance(cfg_raw, str) and cfg_raw.strip():
144
+ return cfg_raw
145
+ return default
146
+
147
+
148
+ def _config_int(env_name: str, config_key: str, default: int, *, min_value: int = 1) -> int:
149
+ env_raw = os.environ.get(env_name)
150
+ if env_raw is not None and env_raw.strip():
151
+ try:
152
+ return max(min_value, int(env_raw))
153
+ except ValueError:
154
+ pass
155
+
156
+ cfg_raw = INDEXER_TUNING_CONFIG.get(config_key)
157
+ if isinstance(cfg_raw, int):
158
+ return max(min_value, cfg_raw)
159
+ if isinstance(cfg_raw, str):
160
+ try:
161
+ return max(min_value, int(cfg_raw))
162
+ except ValueError:
163
+ pass
164
+
165
+ return max(min_value, default)
166
+
167
+
168
+ CHROMA_HOST = os.environ.get("CHROMA_HOST", "localhost")
169
+ CHROMA_PORT = int(os.environ.get("CHROMA_PORT", "8000"))
170
+
171
+ # Coleções separadas por especialização de embedding
172
+ COLLECTION_CODE_JINA = "code_vectors_jina"
173
+ COLLECTION_DOC_BGE = "doc_vectors_bge"
174
+
175
+ JINA_V3_EMBEDDING_MODEL = "jinaai/jina-embeddings-v3"
176
+ JINA_V2_EMBEDDING_MODEL = "jinaai/jina-embeddings-v2-base-code"
177
+ BGE_EMBEDDING_MODEL = "BAAI/bge-m3"
178
+
179
+ DEFAULT_EMBEDDING_MODEL_CHOICE = "jina"
180
+ DEFAULT_JINA_QUANTIZATION = "dynamic-int8"
181
+ DEFAULT_SEARCH_MODE = "single" # single | ensemble
182
+
183
+ _embedding_model_choice = _config_str(
184
+ "MCP_EMBEDDING_MODEL",
185
+ "embedding_model",
186
+ DEFAULT_EMBEDDING_MODEL_CHOICE,
187
+ ).strip().lower()
188
+ if _embedding_model_choice not in {"jina", "bge", "hybrid"}:
189
+ log.warning(
190
+ "MCP_EMBEDDING_MODEL invalido '%s'. Usando '%s'.",
191
+ _embedding_model_choice,
192
+ DEFAULT_EMBEDDING_MODEL_CHOICE,
193
+ )
194
+ _embedding_model_choice = DEFAULT_EMBEDDING_MODEL_CHOICE
195
+
196
+ _raw_jina_quantization = _config_str(
197
+ "MCP_JINA_QUANTIZATION",
198
+ "jina_quantization",
199
+ DEFAULT_JINA_QUANTIZATION,
200
+ )
201
+ JINA_QUANTIZATION = _raw_jina_quantization.strip().lower().replace("_", "-")
202
+ if JINA_QUANTIZATION not in {"default", "dynamic-int8"}:
203
+ log.warning(
204
+ "MCP_JINA_QUANTIZATION invalido '%s'. Usando '%s'.",
205
+ JINA_QUANTIZATION,
206
+ DEFAULT_JINA_QUANTIZATION,
207
+ )
208
+ JINA_QUANTIZATION = DEFAULT_JINA_QUANTIZATION
209
+
210
+ SEARCH_MODE_DEFAULT = os.environ.get("MCP_SEARCH_MODE", DEFAULT_SEARCH_MODE).strip().lower()
211
+ if SEARCH_MODE_DEFAULT not in {"single", "ensemble"}:
212
+ SEARCH_MODE_DEFAULT = DEFAULT_SEARCH_MODE
213
+
214
+ if _embedding_model_choice == "hybrid" and "MCP_SEARCH_MODE" not in os.environ:
215
+ # No modo híbrido, o comportamento esperado costuma ser ensemble por padrão.
216
+ SEARCH_MODE_DEFAULT = "ensemble"
217
+
218
+ RERANK_MODEL_ID = os.environ.get("MCP_RERANK_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2")
219
+ RERANK_ENABLED = os.environ.get("MCP_RERANK_ENABLED", "true").strip().lower() in {"1", "true", "yes", "on"}
220
+ RERANK_CANDIDATE_MULTIPLIER = int(os.environ.get("MCP_RERANK_CANDIDATE_MULTIPLIER", "3"))
221
+ RERANK_MAX_CANDIDATES = int(os.environ.get("MCP_RERANK_MAX_CANDIDATES", "40"))
222
+ RERANKER_MAX_LENGTH = int(os.environ.get("MCP_RERANK_MAX_LENGTH", "512"))
223
+ RERANKER_QUANTIZATION = os.environ.get("MCP_RERANK_QUANTIZATION", "dynamic-int8").strip().lower()
224
+ if RERANKER_QUANTIZATION not in {"default", "dynamic-int8"}:
225
+ RERANKER_QUANTIZATION = "dynamic-int8"
226
+
227
+ RRF_K = int(os.environ.get("MCP_RRF_K", "60"))
228
+ EMBEDDING_BATCH_SIZE = _config_int("MCP_EMBEDDING_BATCH_SIZE", "embedding_batch_size", 4, min_value=1)
229
+
230
+ _env_model_dir = os.environ.get("MCP_MODEL_DIR")
231
+ MODEL_DIR = (
232
+ Path(_env_model_dir).expanduser()
233
+ if _env_model_dir
234
+ else Path.home() / ".cache" / "my-custom-rag-python" / "models"
235
+ )
236
+
237
+ # Parâmetros do splitter (alinhados com indexer_full.py, perfil low-memory)
238
+ CHUNK_SIZE = _config_int("MCP_CHUNK_SIZE", "chunk_size", 3000, min_value=256)
239
+ CHUNK_OVERLAP = min(CHUNK_SIZE - 1, _config_int("MCP_CHUNK_OVERLAP", "chunk_overlap", 400, min_value=0))
240
+
241
+ MAX_FILE_SIZE_BYTES = 500 * 1024 # 500 KB
242
+ TOP_K_RESULTS = 7
243
+ MAX_QUERY_RESULTS = 30
244
+
245
+ # Filtros de varredura
246
+ IGNORED_DIRS = {
247
+ ".git", "node_modules", "__pycache__", ".venv", "venv", "env",
248
+ "dist", "build", "out", ".next", ".nuxt", ".cache", "coverage",
249
+ ".pytest_cache", ".mypy_cache", ".ruff_cache", "target", "bin", "obj",
250
+ ".idea", ".vscode", "vendor", "tmp", "temp", "logs", ".rag_db",
251
+ }
252
+
253
+ IGNORED_EXTENSIONS = {
254
+ ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".webp", ".bmp",
255
+ ".mp4", ".mp3", ".wav", ".ogg", ".avi", ".mov",
256
+ ".zip", ".tar", ".gz", ".rar", ".7z", ".jar", ".war",
257
+ ".pyc", ".pyo", ".so", ".dll", ".exe", ".bin",
258
+ ".lock", ".sum", ".sqlite", ".db", ".sqlite3",
259
+ ".ttf", ".woff", ".woff2", ".eot",
260
+ ".pdf", ".docx", ".xlsx", ".pptx",
261
+ }
262
+
263
+ CODE_EXTENSIONS = {
264
+ ".py", ".js", ".ts", ".tsx", ".jsx", ".java", ".c", ".h", ".cpp", ".hpp",
265
+ ".go", ".rs", ".rb", ".php", ".cs", ".swift", ".kt", ".kts", ".scala", ".sql",
266
+ ".sh", ".bash", ".zsh", ".ps1", ".yaml", ".yml", ".toml", ".ini", ".conf",
267
+ ".json", ".xml", ".html", ".css", ".scss", ".sass", ".vue", ".svelte", ".dart",
268
+ ".lua", ".r", ".m", ".mm",
269
+ }
270
+
271
+ DOC_EXTENSIONS = {
272
+ ".md", ".mdx", ".rst", ".txt", ".adoc", ".org", ".tex", ".csv",
273
+ }
274
+
275
+
276
+ @dataclass(frozen=True)
277
+ class BranchSpec:
278
+ key: str
279
+ model_choice: str
280
+ model_id: str
281
+ collection_name: str
282
+ content_domain: str
283
+ label: str
284
+
285
+
286
+ JINA_CODE_BRANCH_MODEL_CHOICE = "jina_v2" if _embedding_model_choice == "hybrid" else "jina"
287
+ JINA_CODE_BRANCH_MODEL_ID = JINA_V2_EMBEDDING_MODEL if _embedding_model_choice == "hybrid" else JINA_V3_EMBEDDING_MODEL
288
+
289
+ BRANCH_SPECS: dict[str, BranchSpec] = {
290
+ "jina_code": BranchSpec(
291
+ key="jina_code",
292
+ model_choice=JINA_CODE_BRANCH_MODEL_CHOICE,
293
+ model_id=JINA_CODE_BRANCH_MODEL_ID,
294
+ collection_name=COLLECTION_CODE_JINA,
295
+ content_domain="code",
296
+ label="Jina v2 Code" if _embedding_model_choice == "hybrid" else "Jina v3 Code",
297
+ ),
298
+ "bge_doc": BranchSpec(
299
+ key="bge_doc",
300
+ model_choice="bge",
301
+ model_id=BGE_EMBEDDING_MODEL,
302
+ collection_name=COLLECTION_DOC_BGE,
303
+ content_domain="doc",
304
+ label="BGE Docs",
305
+ ),
306
+ }
307
+
308
+ DEFAULT_SINGLE_BRANCH_KEY = "bge_doc" if _embedding_model_choice == "bge" else "jina_code"
309
+
310
+
311
+ @dataclass
312
+ class RetrievedHit:
313
+ key: str
314
+ document: str
315
+ metadata: dict[str, object]
316
+ distance: float | None
317
+ similarity: float | None
318
+ branch: BranchSpec
319
+ rank: int
320
+
321
+
322
+ @dataclass
323
+ class FusedHit:
324
+ key: str
325
+ document: str
326
+ metadata: dict[str, object]
327
+ rrf_score: float
328
+ source_details: dict[str, dict[str, object]]
329
+ rerank_score: float | None = None
330
+
331
+
332
+ # ---------------------------------------------------------------------------
333
+ # Runtime caches (lazy loading para economizar RAM)
334
+ # ---------------------------------------------------------------------------
335
+
336
+ _chroma_client: chromadb.HttpClient | None = None
337
+ _collections: dict[str, chromadb.Collection] = {}
338
+ _models: dict[str, SentenceTransformer] = {}
339
+ _model_load_errors: dict[str, str] = {}
340
+ _splitter: RecursiveCharacterTextSplitter | None = None
341
+ _reranker: CrossEncoder | None = None
342
+ _reranker_error: str | None = None
343
+
344
+
345
+ # ---------------------------------------------------------------------------
346
+ # Chroma e modelos
347
+ # ---------------------------------------------------------------------------
348
+
349
+
350
+ def _model_cache_dir(base_dir: Path, model_id: str) -> Path:
351
+ safe_name = model_id.replace("/", "__").replace(":", "_")
352
+ return base_dir / safe_name
353
+
354
+
355
+ def _get_chroma_client() -> chromadb.HttpClient:
356
+ global _chroma_client
357
+ if _chroma_client is None:
358
+ _chroma_client = chromadb.HttpClient(host=CHROMA_HOST, port=CHROMA_PORT)
359
+ _chroma_client.heartbeat()
360
+ log.info("Conectado ao ChromaDB em %s:%s", CHROMA_HOST, CHROMA_PORT)
361
+ return _chroma_client
362
+
363
+
364
+ def get_chroma_collection(collection_name: str) -> chromadb.Collection:
365
+ if collection_name in _collections:
366
+ return _collections[collection_name]
367
+
368
+ try:
369
+ client = _get_chroma_client()
370
+ collection = client.get_or_create_collection(
371
+ name=collection_name,
372
+ metadata={"hnsw:space": "cosine"},
373
+ )
374
+ _collections[collection_name] = collection
375
+ return collection
376
+ except Exception as e:
377
+ raise RuntimeError(
378
+ f"Não foi possível acessar a coleção '{collection_name}' no ChromaDB "
379
+ f"({CHROMA_HOST}:{CHROMA_PORT}). Erro: {e}"
380
+ )
381
+
382
+
383
+ def _load_sentence_transformer_from_local(model_id: str, local_model_dir: Path) -> SentenceTransformer:
384
+ trust_remote_code = model_id.startswith("jinaai/")
385
+ tokenizer_kwargs = {"fix_mistral_regex": True}
386
+
387
+ def _instantiate_model() -> SentenceTransformer:
388
+ return SentenceTransformer(
389
+ str(local_model_dir),
390
+ device="cpu",
391
+ trust_remote_code=trust_remote_code,
392
+ tokenizer_kwargs=tokenizer_kwargs,
393
+ )
394
+
395
+ def _clear_hf_dynamic_modules_cache() -> None:
396
+ cache_dir = Path.home() / ".cache" / "huggingface" / "modules" / "transformers_modules"
397
+ if cache_dir.exists():
398
+ log.warning("Limpando cache dinâmico do Hugging Face em %s", cache_dir)
399
+ shutil.rmtree(cache_dir, ignore_errors=True)
400
+
401
+ def _load_with_jina_patch() -> SentenceTransformer:
402
+ if not trust_remote_code:
403
+ return _instantiate_model()
404
+
405
+ from transformers import AutoModel, AutoTokenizer
406
+ from transformers.modeling_utils import PreTrainedModel
407
+
408
+ original_from_pretrained = AutoTokenizer.from_pretrained
409
+ original_model_from_pretrained = AutoModel.from_pretrained
410
+ original_pretrained_model_from_pretrained = PreTrainedModel.from_pretrained
411
+ original_pretrained_model_from_config = PreTrainedModel._from_config
412
+ model_refs = {str(local_model_dir), str(local_model_dir.resolve())}
413
+
414
+ def _patched_from_pretrained(*args, **kwargs):
415
+ model_ref = args[0] if args else kwargs.get("pretrained_model_name_or_path")
416
+ if model_ref is not None and str(model_ref) in model_refs:
417
+ kwargs.setdefault("fix_mistral_regex", True)
418
+ return original_from_pretrained(*args, **kwargs)
419
+
420
+ def _patched_model_from_pretrained(*args, **kwargs):
421
+ model_ref = args[0] if args else kwargs.get("pretrained_model_name_or_path")
422
+ if model_ref is not None and str(model_ref) in model_refs and "torch_dtype" in kwargs:
423
+ kwargs = dict(kwargs)
424
+ if "dtype" not in kwargs:
425
+ kwargs["dtype"] = kwargs["torch_dtype"]
426
+ kwargs.pop("torch_dtype", None)
427
+ return original_model_from_pretrained(*args, **kwargs)
428
+
429
+ original_pretrained_model_from_pretrained_fn = original_pretrained_model_from_pretrained.__func__
430
+
431
+ @classmethod
432
+ def _patched_pretrained_model_from_pretrained(cls, *args, **kwargs):
433
+ if "torch_dtype" in kwargs:
434
+ kwargs = dict(kwargs)
435
+ if "dtype" not in kwargs:
436
+ kwargs["dtype"] = kwargs["torch_dtype"]
437
+ kwargs.pop("torch_dtype", None)
438
+ return original_pretrained_model_from_pretrained_fn(cls, *args, **kwargs)
439
+
440
+ original_pretrained_model_from_config_fn = original_pretrained_model_from_config.__func__
441
+
442
+ @classmethod
443
+ def _patched_pretrained_model_from_config(cls, *args, **kwargs):
444
+ if "torch_dtype" in kwargs:
445
+ kwargs = dict(kwargs)
446
+ if "dtype" not in kwargs:
447
+ kwargs["dtype"] = kwargs["torch_dtype"]
448
+ kwargs.pop("torch_dtype", None)
449
+ return original_pretrained_model_from_config_fn(cls, *args, **kwargs)
450
+
451
+ AutoTokenizer.from_pretrained = _patched_from_pretrained
452
+ AutoModel.from_pretrained = _patched_model_from_pretrained
453
+ PreTrainedModel.from_pretrained = _patched_pretrained_model_from_pretrained
454
+ PreTrainedModel._from_config = _patched_pretrained_model_from_config
455
+ try:
456
+ return _instantiate_model()
457
+ finally:
458
+ AutoTokenizer.from_pretrained = original_from_pretrained
459
+ AutoModel.from_pretrained = original_model_from_pretrained
460
+ PreTrainedModel.from_pretrained = original_pretrained_model_from_pretrained
461
+ PreTrainedModel._from_config = original_pretrained_model_from_config
462
+
463
+ try:
464
+ return _load_with_jina_patch()
465
+ except FileNotFoundError as e:
466
+ if trust_remote_code and "transformers_modules" in str(e):
467
+ log.warning("Cache dinâmico inconsistente detectado: %s", e)
468
+ _clear_hf_dynamic_modules_cache()
469
+ return _load_with_jina_patch()
470
+ raise
471
+
472
+
473
+ def _apply_jina_quantization_if_needed(model: SentenceTransformer, model_id: str) -> SentenceTransformer:
474
+ if model_id != JINA_V3_EMBEDDING_MODEL or JINA_QUANTIZATION == "default":
475
+ return model
476
+
477
+ try:
478
+ import torch
479
+ import warnings
480
+
481
+ quantized_layers = 0
482
+ for module in model.modules():
483
+ if type(module).__name__ != "ParametrizedLinear":
484
+ continue
485
+
486
+ float_linear = torch.nn.Linear(
487
+ module.in_features,
488
+ module.out_features,
489
+ bias=module.bias is not None,
490
+ )
491
+ with torch.no_grad():
492
+ float_linear.weight.copy_(module.weight.detach().to(torch.float32))
493
+ if module.bias is not None:
494
+ float_linear.bias.copy_(module.bias.detach().to(torch.float32))
495
+
496
+ with warnings.catch_warnings():
497
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
498
+ quantized_linear = torch.quantization.quantize_dynamic(
499
+ torch.nn.Sequential(float_linear),
500
+ {torch.nn.Linear},
501
+ dtype=torch.qint8,
502
+ )[0]
503
+
504
+ module._dynamic_int8_linear = quantized_linear
505
+
506
+ def _forward_dynamic_int8(self, input, task_id=None, residual=False):
507
+ out = self._dynamic_int8_linear(input)
508
+ if residual:
509
+ return out, input
510
+ return out
511
+
512
+ module.forward = _forward_dynamic_int8.__get__(module, module.__class__)
513
+ quantized_layers += 1
514
+
515
+ if quantized_layers == 0:
516
+ log.warning("Nenhuma camada ParametrizedLinear encontrada para dynamic-int8 no Jina.")
517
+ return model
518
+
519
+ log.info("Quantizacao Jina aplicada: dynamic-int8 (CPU, %s camadas).", quantized_layers)
520
+ return model
521
+ except Exception as quant_error:
522
+ log.warning("Falha ao aplicar dynamic-int8 no Jina (%s); usando modelo padrao.", quant_error)
523
+ return model
524
+
525
+
526
+ def get_embedding_model(model_choice: str) -> SentenceTransformer:
527
+ if model_choice in _models:
528
+ return _models[model_choice]
529
+
530
+ if model_choice in _model_load_errors:
531
+ raise RuntimeError(_model_load_errors[model_choice])
532
+
533
+ if model_choice == "jina":
534
+ model_id = JINA_V3_EMBEDDING_MODEL
535
+ elif model_choice == "jina_v2":
536
+ model_id = JINA_V2_EMBEDDING_MODEL
537
+ elif model_choice == "bge":
538
+ model_id = BGE_EMBEDDING_MODEL
539
+ else:
540
+ raise RuntimeError(f"Modelo não suportado: {model_choice}")
541
+
542
+ try:
543
+ MODEL_DIR.mkdir(parents=True, exist_ok=True)
544
+ preferred_model_cache_dir = _model_cache_dir(MODEL_DIR, model_id)
545
+ log.info("Carregando embeddings '%s' em CPU (cache: %s)", model_id, preferred_model_cache_dir)
546
+
547
+ selection = download_model_with_fallback(
548
+ preferred_model_id=model_id,
549
+ fallback_model_id=model_id,
550
+ local_dir=MODEL_DIR,
551
+ )
552
+ model = _load_sentence_transformer_from_local(selection.model_id, selection.local_dir)
553
+ if model_choice == "jina":
554
+ model = _apply_jina_quantization_if_needed(model, selection.model_id)
555
+
556
+ _models[model_choice] = model
557
+ log.info(
558
+ "Modelo de embeddings pronto: %s (provider=%s, path=%s)",
559
+ selection.model_id,
560
+ selection.provider,
561
+ selection.local_dir,
562
+ )
563
+ return model
564
+ except Exception as e:
565
+ message = f"Falha ao carregar modelo '{model_choice}' ({model_id}): {e}"
566
+ _model_load_errors[model_choice] = message
567
+ raise RuntimeError(message)
568
+
569
+
570
+ def get_reranker() -> CrossEncoder | None:
571
+ global _reranker, _reranker_error
572
+
573
+ if not RERANK_ENABLED:
574
+ return None
575
+ if _reranker is not None:
576
+ return _reranker
577
+ if _reranker_error is not None:
578
+ return None
579
+
580
+ try:
581
+ MODEL_DIR.mkdir(parents=True, exist_ok=True)
582
+ selection = download_model_with_fallback(
583
+ preferred_model_id=RERANK_MODEL_ID,
584
+ fallback_model_id=RERANK_MODEL_ID,
585
+ local_dir=MODEL_DIR,
586
+ )
587
+
588
+ reranker = CrossEncoder(
589
+ str(selection.local_dir),
590
+ device="cpu",
591
+ max_length=RERANKER_MAX_LENGTH,
592
+ trust_remote_code=False,
593
+ )
594
+
595
+ if RERANKER_QUANTIZATION == "dynamic-int8":
596
+ try:
597
+ import torch
598
+
599
+ reranker.model = torch.quantization.quantize_dynamic(
600
+ reranker.model,
601
+ {torch.nn.Linear},
602
+ dtype=torch.qint8,
603
+ )
604
+ log.info("Reranker com quantizacao dynamic-int8 habilitada.")
605
+ except Exception as quant_error:
606
+ log.warning("Falha ao quantizar reranker (%s). Seguindo sem quantizacao.", quant_error)
607
+
608
+ _reranker = reranker
609
+ log.info(
610
+ "Reranker pronto: %s (provider=%s, path=%s)",
611
+ selection.model_id,
612
+ selection.provider,
613
+ selection.local_dir,
614
+ )
615
+ return _reranker
616
+ except Exception as e:
617
+ _reranker_error = str(e)
618
+ log.warning("Reranker indisponível. Busca seguirá sem reranking. Erro: %s", e)
619
+ return None
620
+
621
+
622
+ def get_splitter() -> RecursiveCharacterTextSplitter:
623
+ global _splitter
624
+ if _splitter is None:
625
+ _splitter = RecursiveCharacterTextSplitter(
626
+ chunk_size=CHUNK_SIZE,
627
+ chunk_overlap=CHUNK_OVERLAP,
628
+ length_function=len,
629
+ separators=["\n\n", "\n", " ", ""],
630
+ )
631
+ return _splitter
632
+
633
+
634
+ # ---------------------------------------------------------------------------
635
+ # Indexação interna
636
+ # ---------------------------------------------------------------------------
637
+
638
+
639
+ def _make_chunk_id(file_path: str, chunk_index: int) -> str:
640
+ raw = f"{file_path}::chunk::{chunk_index}"
641
+ return hashlib.md5(raw.encode()).hexdigest()
642
+
643
+
644
+ def _make_result_key(metadata: dict[str, object], fallback_id: str) -> str:
645
+ file_path = str(metadata.get("file_path", ""))
646
+ chunk_index = str(metadata.get("chunk_index", ""))
647
+ if file_path and chunk_index:
648
+ return f"{file_path}::chunk::{chunk_index}"
649
+ return fallback_id
650
+
651
+
652
+ def _delete_file_chunks(collection: chromadb.Collection, file_path: str) -> int:
653
+ # Pede apenas IDs para não materializar documentos/metadata desnecessários em RAM.
654
+ results = collection.get(where={"file_path": file_path}, include=[])
655
+ ids = results.get("ids", []) if results else []
656
+ if ids:
657
+ collection.delete(ids=ids)
658
+ return len(ids)
659
+
660
+
661
+ def _read_file_safe(filepath: Path) -> str | None:
662
+ for encoding in ("utf-8", "latin-1", "cp1252"):
663
+ try:
664
+ return filepath.read_text(encoding=encoding)
665
+ except UnicodeDecodeError:
666
+ continue
667
+ except OSError:
668
+ return None
669
+ return None
670
+
671
+
672
+ def _scan_folder(folder_path: Path) -> Iterator[Path]:
673
+ for dirpath, dirnames, filenames in os.walk(folder_path):
674
+ dirnames[:] = [
675
+ d for d in dirnames
676
+ if d not in IGNORED_DIRS and not d.startswith(".")
677
+ ]
678
+ dirnames.sort()
679
+ for filename in sorted(filenames):
680
+ fp = Path(dirpath) / filename
681
+ if fp.suffix.lower() in IGNORED_EXTENSIONS:
682
+ continue
683
+ try:
684
+ if fp.stat().st_size > MAX_FILE_SIZE_BYTES:
685
+ continue
686
+ except OSError:
687
+ continue
688
+ yield fp
689
+
690
+
691
+ def _classify_file_targets(filepath: Path) -> list[BranchSpec]:
692
+ suffix = filepath.suffix.lower()
693
+ is_code = suffix in CODE_EXTENSIONS
694
+ is_doc = suffix in DOC_EXTENSIONS
695
+
696
+ if is_code and not is_doc:
697
+ return [BRANCH_SPECS["jina_code"]]
698
+ if is_doc and not is_code:
699
+ return [BRANCH_SPECS["bge_doc"]]
700
+
701
+ # Arquivos ambíguos/extensão desconhecida: indexa em ambas para não perder recall.
702
+ return [BRANCH_SPECS["jina_code"], BRANCH_SPECS["bge_doc"]]
703
+
704
+
705
+ def _index_single_file_for_branch(
706
+ filepath: Path,
707
+ branch: BranchSpec,
708
+ splitter: RecursiveCharacterTextSplitter,
709
+ *,
710
+ delete_existing: bool = True,
711
+ ) -> int:
712
+ content = _read_file_safe(filepath)
713
+ if not content or not content.strip():
714
+ return 0
715
+
716
+ abs_path = str(filepath.resolve())
717
+ model = get_embedding_model(branch.model_choice)
718
+ collection = get_chroma_collection(branch.collection_name)
719
+
720
+ chunks = splitter.split_text(content)
721
+ if not chunks:
722
+ return 0
723
+
724
+ # Atualização idempotente por arquivo em cada coleção.
725
+ if delete_existing:
726
+ _delete_file_chunks(collection, abs_path)
727
+
728
+ inserted_chunks = 0
729
+ batch_ids: list[str] = []
730
+ batch_docs: list[str] = []
731
+ batch_metadatas: list[dict[str, object]] = []
732
+
733
+ def _flush_batch() -> None:
734
+ nonlocal inserted_chunks
735
+ if not batch_ids:
736
+ return
737
+ embeddings = model.encode(
738
+ batch_docs,
739
+ show_progress_bar=False,
740
+ batch_size=EMBEDDING_BATCH_SIZE,
741
+ ).tolist()
742
+ collection.upsert(
743
+ ids=batch_ids,
744
+ embeddings=embeddings,
745
+ documents=batch_docs,
746
+ metadatas=batch_metadatas,
747
+ )
748
+ inserted_chunks += len(batch_ids)
749
+ del embeddings
750
+ batch_ids.clear()
751
+ batch_docs.clear()
752
+ batch_metadatas.clear()
753
+
754
+ for i, chunk in enumerate(chunks):
755
+ batch_ids.append(_make_chunk_id(abs_path, i))
756
+ batch_docs.append(chunk)
757
+ batch_metadatas.append(
758
+ {
759
+ "file_path": abs_path,
760
+ "file_name": filepath.name,
761
+ "chunk_index": i,
762
+ "source_collection": branch.collection_name,
763
+ "source_model_choice": branch.model_choice,
764
+ "source_model_id": branch.model_id,
765
+ "content_domain": branch.content_domain,
766
+ }
767
+ )
768
+ if len(batch_ids) >= EMBEDDING_BATCH_SIZE:
769
+ _flush_batch()
770
+
771
+ _flush_batch()
772
+ return inserted_chunks
773
+
774
+
775
+ def _remove_file_from_all_collections(abs_path: str) -> tuple[dict[str, int], list[str]]:
776
+ deleted_per_branch: dict[str, int] = {}
777
+ errors: list[str] = []
778
+
779
+ for branch in BRANCH_SPECS.values():
780
+ try:
781
+ collection = get_chroma_collection(branch.collection_name)
782
+ deleted = _delete_file_chunks(collection, abs_path)
783
+ deleted_per_branch[branch.key] = deleted
784
+ except Exception as e:
785
+ errors.append(f"{branch.key}: {e}")
786
+ return deleted_per_branch, errors
787
+
788
+
789
+ # ---------------------------------------------------------------------------
790
+ # Busca semântica híbrida
791
+ # ---------------------------------------------------------------------------
792
+
793
+
794
+ def _query_branch(branch: BranchSpec, query: str, n_results: int) -> tuple[list[RetrievedHit], str | None]:
795
+ try:
796
+ collection = get_chroma_collection(branch.collection_name)
797
+ model = get_embedding_model(branch.model_choice)
798
+ except Exception as e:
799
+ return [], f"{branch.key}: recurso indisponível ({e})"
800
+
801
+ try:
802
+ query_embedding = model.encode([query], show_progress_bar=False).tolist()
803
+ results = collection.query(
804
+ query_embeddings=query_embedding,
805
+ n_results=n_results,
806
+ include=["documents", "metadatas", "distances"],
807
+ )
808
+ except Exception as e:
809
+ return [], f"{branch.key}: falha na query ({e})"
810
+
811
+ documents = results.get("documents", [[]])[0]
812
+ metadatas = results.get("metadatas", [[]])[0]
813
+ distances = results.get("distances", [[]])[0]
814
+ ids = results.get("ids", [[]])[0]
815
+
816
+ hits: list[RetrievedHit] = []
817
+ for idx, (doc, meta, dist) in enumerate(zip(documents, metadatas, distances), start=1):
818
+ metadata = meta or {}
819
+ fallback_id = ids[idx - 1] if idx - 1 < len(ids) else f"{branch.key}:{idx}"
820
+ key = _make_result_key(metadata, fallback_id)
821
+
822
+ similarity = None
823
+ if dist is not None:
824
+ try:
825
+ similarity = 1.0 - float(dist)
826
+ except Exception:
827
+ similarity = None
828
+
829
+ hits.append(
830
+ RetrievedHit(
831
+ key=key,
832
+ document=(doc or ""),
833
+ metadata=metadata,
834
+ distance=float(dist) if dist is not None else None,
835
+ similarity=similarity,
836
+ branch=branch,
837
+ rank=idx,
838
+ )
839
+ )
840
+
841
+ return hits, None
842
+
843
+
844
+ def _rrf_fuse(hits_by_branch: dict[str, list[RetrievedHit]], top_limit: int) -> list[FusedHit]:
845
+ fused: dict[str, FusedHit] = {}
846
+
847
+ for branch_key, hits in hits_by_branch.items():
848
+ _ = branch_key
849
+ for rank, hit in enumerate(hits, start=1):
850
+ contribution = 1.0 / (RRF_K + rank)
851
+ entry = fused.get(hit.key)
852
+
853
+ if entry is None:
854
+ entry = FusedHit(
855
+ key=hit.key,
856
+ document=hit.document,
857
+ metadata=dict(hit.metadata),
858
+ rrf_score=0.0,
859
+ source_details={},
860
+ )
861
+ fused[hit.key] = entry
862
+
863
+ entry.rrf_score += contribution
864
+ entry.source_details[hit.branch.key] = {
865
+ "rank": rank,
866
+ "distance": hit.distance,
867
+ "similarity": hit.similarity,
868
+ "collection": hit.branch.collection_name,
869
+ "model_choice": hit.branch.model_choice,
870
+ "model_id": hit.branch.model_id,
871
+ "content_domain": hit.branch.content_domain,
872
+ }
873
+
874
+ # Usa metadados do hit com melhor similaridade local como base principal.
875
+ current_sim = entry.metadata.get("_best_similarity", -10.0)
876
+ candidate_sim = hit.similarity if hit.similarity is not None else -10.0
877
+ if candidate_sim > current_sim:
878
+ entry.document = hit.document
879
+ entry.metadata = dict(hit.metadata)
880
+ entry.metadata["_best_similarity"] = candidate_sim
881
+
882
+ fused_hits = list(fused.values())
883
+ fused_hits.sort(key=lambda h: h.rrf_score, reverse=True)
884
+
885
+ # Limita o pool antes do reranking para reduzir CPU/RAM.
886
+ return fused_hits[:top_limit]
887
+
888
+
889
+ def _apply_rerank(query: str, fused_hits: list[FusedHit], top_k: int) -> tuple[list[FusedHit], bool, str | None]:
890
+ if not fused_hits:
891
+ return [], False, None
892
+
893
+ reranker = get_reranker()
894
+ if reranker is None:
895
+ reason = _reranker_error if _reranker_error else "reranker_desabilitado"
896
+ return fused_hits[:top_k], False, reason
897
+
898
+ try:
899
+ pairs = [(query, hit.document) for hit in fused_hits]
900
+ scores = reranker.predict(pairs, show_progress_bar=False, convert_to_numpy=True)
901
+
902
+ for hit, score in zip(fused_hits, scores):
903
+ hit.rerank_score = float(score)
904
+
905
+ fused_hits.sort(
906
+ key=lambda h: (
907
+ h.rerank_score if h.rerank_score is not None else -1e9,
908
+ h.rrf_score,
909
+ ),
910
+ reverse=True,
911
+ )
912
+ return fused_hits[:top_k], True, None
913
+ except Exception as e:
914
+ return fused_hits[:top_k], False, str(e)
915
+
916
+
917
+ def _format_similarity(similarity: float | None) -> str:
918
+ if similarity is None:
919
+ return "n/a"
920
+ return f"{round(similarity * 100, 1)}%"
921
+
922
+
923
+ def _format_fused_results(
924
+ *,
925
+ query: str,
926
+ mode: str,
927
+ hits: list[FusedHit],
928
+ branch_errors: list[str],
929
+ rerank_applied: bool,
930
+ rerank_error: str | None,
931
+ ) -> str:
932
+ if not hits:
933
+ msg = "Nenhum resultado encontrado. As coleções podem estar vazias."
934
+ if branch_errors:
935
+ msg += "\nFalhas detectadas: " + " | ".join(branch_errors)
936
+ return msg
937
+
938
+ lines: list[str] = [f"# Resultados para: '{query}'", f"**Modo:** {mode}"]
939
+
940
+ if branch_errors:
941
+ lines.append("**Avisos de branch:** " + " | ".join(branch_errors))
942
+
943
+ if mode == "ensemble":
944
+ if rerank_applied:
945
+ lines.append(f"**Reranking:** ativo ({RERANK_MODEL_ID})")
946
+ else:
947
+ lines.append(f"**Reranking:** indisponível ({rerank_error or 'sem detalhes'})")
948
+
949
+ lines.append("")
950
+
951
+ for idx, hit in enumerate(hits, start=1):
952
+ metadata = dict(hit.metadata)
953
+ metadata.pop("_best_similarity", None)
954
+
955
+ file_path = str(metadata.get("file_path", "desconhecido"))
956
+ chunk_index = metadata.get("chunk_index", "?")
957
+ file_name = str(metadata.get("file_name", Path(file_path).name if file_path != "desconhecido" else "?"))
958
+
959
+ source_models = sorted({str(v.get("model_choice", "?")) for v in hit.source_details.values()})
960
+ source_collections = sorted({str(v.get("collection", "?")) for v in hit.source_details.values()})
961
+
962
+ source_parts: list[str] = []
963
+ for source_key, details in sorted(
964
+ hit.source_details.items(),
965
+ key=lambda item: int(item[1].get("rank", 999999)),
966
+ ):
967
+ source_parts.append(
968
+ f"{source_key}(rank={details.get('rank')}, sim={_format_similarity(details.get('similarity'))})"
969
+ )
970
+
971
+ snippet = hit.document.strip()
972
+ if len(snippet) > 800:
973
+ snippet = snippet[:800] + "\n... [truncado]"
974
+
975
+ score_line = f"RRF={hit.rrf_score:.4f}"
976
+ if hit.rerank_score is not None:
977
+ score_line += f" | rerank={hit.rerank_score:.4f}"
978
+
979
+ lines.append(f"## [{idx}] {file_path}")
980
+ lines.append(f"**Scores:** {score_line}")
981
+ lines.append(f"**Fontes de recuperação:** {', '.join(source_parts)}")
982
+ lines.append(
983
+ "**Metadados unificados:** "
984
+ f"file_name={file_name} | chunk_index={chunk_index} | "
985
+ f"source_models={source_models} | source_collections={source_collections}"
986
+ )
987
+ lines.append("")
988
+ lines.append(f"```\n{snippet}\n```")
989
+ lines.append("")
990
+
991
+ return "\n".join(lines)
992
+
993
+
994
+ def _run_single_mode(query: str, top_k: int) -> tuple[list[FusedHit], list[str], bool, str | None]:
995
+ primary_branch = BRANCH_SPECS[DEFAULT_SINGLE_BRANCH_KEY]
996
+
997
+ hits, error = _query_branch(primary_branch, query, top_k)
998
+ errors: list[str] = []
999
+ if error:
1000
+ errors.append(error)
1001
+
1002
+ # Fallback automático para a branch alternativa, preservando disponibilidade.
1003
+ if not hits:
1004
+ fallback_branch_key = "bge_doc" if primary_branch.key == "jina_code" else "jina_code"
1005
+ fallback_hits, fallback_error = _query_branch(BRANCH_SPECS[fallback_branch_key], query, top_k)
1006
+ if fallback_error:
1007
+ errors.append(fallback_error)
1008
+ if fallback_hits:
1009
+ hits = fallback_hits
1010
+
1011
+ if not hits:
1012
+ return [], errors, False, None
1013
+
1014
+ fused = _rrf_fuse({"single": hits}, top_k)
1015
+ return fused, errors, False, None
1016
+
1017
+
1018
+ def _run_ensemble_mode(query: str, top_k: int) -> tuple[list[FusedHit], list[str], bool, str | None]:
1019
+ per_branch_k = min(MAX_QUERY_RESULTS, max(top_k * 2, top_k))
1020
+ branches = [BRANCH_SPECS["jina_code"], BRANCH_SPECS["bge_doc"]]
1021
+
1022
+ hits_by_branch: dict[str, list[RetrievedHit]] = {}
1023
+ branch_errors: list[str] = []
1024
+
1025
+ with ThreadPoolExecutor(max_workers=len(branches)) as executor:
1026
+ futures = {
1027
+ executor.submit(_query_branch, branch, query, per_branch_k): branch
1028
+ for branch in branches
1029
+ }
1030
+ for future in as_completed(futures):
1031
+ branch = futures[future]
1032
+ try:
1033
+ hits, error = future.result()
1034
+ if error:
1035
+ branch_errors.append(error)
1036
+ if hits:
1037
+ hits_by_branch[branch.key] = hits
1038
+ except Exception as e:
1039
+ branch_errors.append(f"{branch.key}: falha inesperada ({e})")
1040
+
1041
+ if not hits_by_branch:
1042
+ return [], branch_errors, False, None
1043
+
1044
+ candidate_pool = min(RERANK_MAX_CANDIDATES, max(top_k, top_k * RERANK_CANDIDATE_MULTIPLIER))
1045
+ fused_candidates = _rrf_fuse(hits_by_branch, candidate_pool)
1046
+ reranked_hits, rerank_applied, rerank_error = _apply_rerank(query, fused_candidates, top_k)
1047
+ return reranked_hits, branch_errors, rerank_applied, rerank_error
1048
+
1049
+
1050
+ # ---------------------------------------------------------------------------
1051
+ # Servidor MCP via FastMCP
1052
+ # ---------------------------------------------------------------------------
1053
+
1054
+ mcp = FastMCP(
1055
+ name="rag-codebase",
1056
+ instructions=(
1057
+ "Servidor RAG para busca semântica em código-fonte local com suporte a ensemble híbrido. "
1058
+ "No modo hybrid, a branch de código usa Jina v2 e a de documentação usa BGE. "
1059
+ "Use semantic_search_code(query, top_k, mode='ensemble') para combinar Jina+BGE com RRF e reranking. "
1060
+ "Use update_file_index após editar um arquivo para manter as duas coleções sincronizadas. "
1061
+ "Use index_specific_folder para indexação recursiva sob demanda."
1062
+ ),
1063
+ )
1064
+
1065
+
1066
+ # ---------------------------------------------------------------------------
1067
+ # Tool 1: semantic_search_code
1068
+ # ---------------------------------------------------------------------------
1069
+
1070
+ @mcp.tool()
1071
+ def semantic_search_code(query: str, top_k: int = TOP_K_RESULTS, mode: str = SEARCH_MODE_DEFAULT) -> str:
1072
+ """
1073
+ Busca semântica no índice vetorial local.
1074
+
1075
+ Modos:
1076
+ - single: usa apenas uma branch (Jina/BGE conforme MCP_EMBEDDING_MODEL; no hybrid, Jina v2).
1077
+ - ensemble: consulta em paralelo code_vectors_jina + doc_vectors_bge,
1078
+ faz fusão via Reciprocal Rank Fusion (RRF) e reranking leve.
1079
+
1080
+ Args:
1081
+ query: Descrição do que procurar.
1082
+ top_k: Quantidade final de resultados.
1083
+ mode: "single" (padrão) ou "ensemble".
1084
+
1085
+ Returns:
1086
+ Resultado textual formatado para consumo pelo LLM.
1087
+ """
1088
+ raw_query = (query or "").strip()
1089
+ search_mode = (mode or SEARCH_MODE_DEFAULT).strip().lower()
1090
+
1091
+ _log_tool_usage(
1092
+ event="tool_call_start",
1093
+ tool_name="semantic_search_code",
1094
+ details={
1095
+ "query_preview": _safe_preview(raw_query),
1096
+ "query_len": len(raw_query),
1097
+ "top_k": top_k,
1098
+ "mode": search_mode,
1099
+ },
1100
+ )
1101
+
1102
+ if not raw_query:
1103
+ _log_tool_usage(
1104
+ event="tool_call_end",
1105
+ tool_name="semantic_search_code",
1106
+ details={"status": "error", "reason": "empty_query"},
1107
+ )
1108
+ return "Erro: a query não pode ser vazia."
1109
+
1110
+ top_k = max(1, min(top_k, 20))
1111
+ if search_mode not in {"single", "ensemble"}:
1112
+ _log_tool_usage(
1113
+ event="tool_call_end",
1114
+ tool_name="semantic_search_code",
1115
+ details={"status": "error", "reason": "invalid_mode", "mode": search_mode},
1116
+ )
1117
+ return "Erro: mode inválido. Use 'single' ou 'ensemble'."
1118
+
1119
+ try:
1120
+ if search_mode == "ensemble":
1121
+ hits, branch_errors, rerank_applied, rerank_error = _run_ensemble_mode(raw_query, top_k)
1122
+ else:
1123
+ hits, branch_errors, rerank_applied, rerank_error = _run_single_mode(raw_query, top_k)
1124
+
1125
+ result_text = _format_fused_results(
1126
+ query=raw_query,
1127
+ mode=search_mode,
1128
+ hits=hits,
1129
+ branch_errors=branch_errors,
1130
+ rerank_applied=rerank_applied,
1131
+ rerank_error=rerank_error,
1132
+ )
1133
+
1134
+ _log_tool_usage(
1135
+ event="tool_call_end",
1136
+ tool_name="semantic_search_code",
1137
+ details={
1138
+ "status": "ok",
1139
+ "mode": search_mode,
1140
+ "result_count": len(hits),
1141
+ "branch_errors": len(branch_errors),
1142
+ "rerank_applied": rerank_applied,
1143
+ },
1144
+ )
1145
+ return result_text
1146
+ except Exception as e:
1147
+ _log_tool_usage(
1148
+ event="tool_call_end",
1149
+ tool_name="semantic_search_code",
1150
+ details={"status": "error", "reason": "search_failed", "error": str(e), "mode": search_mode},
1151
+ )
1152
+ return f"Erro ao executar busca semântica ({search_mode}): {e}"
1153
+
1154
+
1155
+ # ---------------------------------------------------------------------------
1156
+ # Tool 2: update_file_index
1157
+ # ---------------------------------------------------------------------------
1158
+
1159
+ @mcp.tool()
1160
+ def update_file_index(file_path: str) -> str:
1161
+ """
1162
+ Atualiza o índice RAG para um arquivo específico.
1163
+
1164
+ O arquivo é classificado como código/doc e indexado na coleção apropriada.
1165
+ Para extensões ambíguas, indexa em ambas as coleções.
1166
+ """
1167
+ filepath = Path(file_path).resolve()
1168
+ abs_path = str(filepath)
1169
+
1170
+ _log_tool_usage(
1171
+ event="tool_call_start",
1172
+ tool_name="update_file_index",
1173
+ details={"file_path": abs_path},
1174
+ )
1175
+
1176
+ if not filepath.exists():
1177
+ _log_tool_usage(
1178
+ event="tool_call_end",
1179
+ tool_name="update_file_index",
1180
+ details={"status": "error", "reason": "file_not_found", "file_path": abs_path},
1181
+ )
1182
+ return f"Erro: arquivo não encontrado: {filepath}"
1183
+
1184
+ if not filepath.is_file():
1185
+ _log_tool_usage(
1186
+ event="tool_call_end",
1187
+ tool_name="update_file_index",
1188
+ details={"status": "error", "reason": "not_a_file", "file_path": abs_path},
1189
+ )
1190
+ return f"Erro: o caminho não aponta para um arquivo: {filepath}"
1191
+
1192
+ if filepath.stat().st_size > MAX_FILE_SIZE_BYTES:
1193
+ _log_tool_usage(
1194
+ event="tool_call_end",
1195
+ tool_name="update_file_index",
1196
+ details={"status": "error", "reason": "file_too_large", "file_path": abs_path},
1197
+ )
1198
+ return f"Erro: arquivo muito grande (>{MAX_FILE_SIZE_BYTES // 1024}KB): {filepath}"
1199
+
1200
+ splitter = get_splitter()
1201
+ targets = _classify_file_targets(filepath)
1202
+
1203
+ deleted_per_branch, deletion_errors = _remove_file_from_all_collections(abs_path)
1204
+
1205
+ inserted_per_branch: dict[str, int] = {}
1206
+ index_errors: list[str] = []
1207
+ for branch in targets:
1208
+ try:
1209
+ inserted = _index_single_file_for_branch(
1210
+ filepath,
1211
+ branch,
1212
+ splitter,
1213
+ delete_existing=False, # já removido em todas as coleções acima
1214
+ )
1215
+ inserted_per_branch[branch.key] = inserted
1216
+ except Exception as e:
1217
+ index_errors.append(f"{branch.key}: {e}")
1218
+
1219
+ success_branches = [k for k, v in inserted_per_branch.items() if v > 0]
1220
+
1221
+ details = {
1222
+ "status": "ok" if success_branches else "error",
1223
+ "file_path": abs_path,
1224
+ "targets": [b.key for b in targets],
1225
+ "deleted_per_branch": deleted_per_branch,
1226
+ "inserted_per_branch": inserted_per_branch,
1227
+ "deletion_errors": len(deletion_errors),
1228
+ "index_errors": len(index_errors),
1229
+ }
1230
+ _log_tool_usage(event="tool_call_end", tool_name="update_file_index", details=details)
1231
+
1232
+ if not success_branches and index_errors:
1233
+ return (
1234
+ "Erro: não foi possível reindexar o arquivo em nenhuma coleção.\n"
1235
+ f"Arquivo: {filepath}\n"
1236
+ "Falhas: " + " | ".join(index_errors)
1237
+ )
1238
+
1239
+ lines = [
1240
+ "Arquivo reindexado.",
1241
+ f" Arquivo : {filepath}",
1242
+ f" Coleções alvo: {[b.collection_name for b in targets]}",
1243
+ f" Remoções por coleção: {deleted_per_branch}",
1244
+ f" Inserções por coleção: {inserted_per_branch}",
1245
+ ]
1246
+ if deletion_errors:
1247
+ lines.append(" Avisos na remoção: " + " | ".join(deletion_errors))
1248
+ if index_errors:
1249
+ lines.append(" Avisos na indexação: " + " | ".join(index_errors))
1250
+ return "\n".join(lines)
1251
+
1252
+
1253
+ # ---------------------------------------------------------------------------
1254
+ # Tool 3: delete_file_index
1255
+ # ---------------------------------------------------------------------------
1256
+
1257
+ @mcp.tool()
1258
+ def delete_file_index(file_path: str) -> str:
1259
+ """
1260
+ Remove um arquivo do índice em todas as coleções gerenciadas.
1261
+ """
1262
+ filepath = Path(file_path).resolve()
1263
+ abs_path = str(filepath)
1264
+
1265
+ _log_tool_usage(
1266
+ event="tool_call_start",
1267
+ tool_name="delete_file_index",
1268
+ details={"file_path": abs_path},
1269
+ )
1270
+
1271
+ deleted_per_branch, errors = _remove_file_from_all_collections(abs_path)
1272
+ total_deleted = sum(deleted_per_branch.values())
1273
+
1274
+ _log_tool_usage(
1275
+ event="tool_call_end",
1276
+ tool_name="delete_file_index",
1277
+ details={
1278
+ "status": "ok" if total_deleted > 0 else "warning",
1279
+ "file_path": abs_path,
1280
+ "deleted_per_branch": deleted_per_branch,
1281
+ "errors": len(errors),
1282
+ },
1283
+ )
1284
+
1285
+ if total_deleted == 0:
1286
+ base = f"Nenhum chunk encontrado para o arquivo: {abs_path}"
1287
+ if errors:
1288
+ base += "\nFalhas parciais: " + " | ".join(errors)
1289
+ return base
1290
+
1291
+ out = [
1292
+ "Removido do índice com sucesso.",
1293
+ f" Arquivo : {abs_path}",
1294
+ f" Deleções por coleção: {deleted_per_branch}",
1295
+ ]
1296
+ if errors:
1297
+ out.append(" Avisos: " + " | ".join(errors))
1298
+ return "\n".join(out)
1299
+
1300
+
1301
+ # ---------------------------------------------------------------------------
1302
+ # Tool 4: index_specific_folder
1303
+ # ---------------------------------------------------------------------------
1304
+
1305
+ @mcp.tool()
1306
+ def index_specific_folder(folder_path: str) -> str:
1307
+ """
1308
+ Indexa recursivamente uma pasta em coleções separadas por domínio.
1309
+ """
1310
+ folder = Path(folder_path).resolve()
1311
+
1312
+ _log_tool_usage(
1313
+ event="tool_call_start",
1314
+ tool_name="index_specific_folder",
1315
+ details={"folder_path": str(folder)},
1316
+ )
1317
+
1318
+ if not folder.exists():
1319
+ _log_tool_usage(
1320
+ event="tool_call_end",
1321
+ tool_name="index_specific_folder",
1322
+ details={"status": "error", "reason": "folder_not_found", "folder_path": str(folder)},
1323
+ )
1324
+ return f"Erro: pasta não encontrada: {folder}"
1325
+
1326
+ if not folder.is_dir():
1327
+ _log_tool_usage(
1328
+ event="tool_call_end",
1329
+ tool_name="index_specific_folder",
1330
+ details={"status": "error", "reason": "not_a_folder", "folder_path": str(folder)},
1331
+ )
1332
+ return f"Erro: o caminho não é um diretório: {folder}"
1333
+
1334
+ splitter = get_splitter()
1335
+
1336
+ processed_files = 0
1337
+ branch_file_counts = {key: 0 for key in BRANCH_SPECS}
1338
+ branch_chunk_counts = {key: 0 for key in BRANCH_SPECS}
1339
+ error_count = 0
1340
+ error_samples: list[str] = []
1341
+
1342
+ for filepath in _scan_folder(folder):
1343
+ processed_files += 1
1344
+ targets = _classify_file_targets(filepath)
1345
+
1346
+ for branch in targets:
1347
+ try:
1348
+ n_chunks = _index_single_file_for_branch(filepath, branch, splitter)
1349
+ branch_file_counts[branch.key] += 1
1350
+ branch_chunk_counts[branch.key] += n_chunks
1351
+ except Exception as e:
1352
+ error_count += 1
1353
+ if len(error_samples) < 10:
1354
+ error_samples.append(f"{filepath.name} [{branch.key}]: {e}")
1355
+
1356
+ if processed_files == 0:
1357
+ _log_tool_usage(
1358
+ event="tool_call_end",
1359
+ tool_name="index_specific_folder",
1360
+ details={"status": "ok", "folder_path": str(folder), "files_processed": 0, "chunks": 0, "errors": 0},
1361
+ )
1362
+ return f"Nenhum arquivo indexável encontrado em: {folder}"
1363
+
1364
+ total_chunks = sum(branch_chunk_counts.values())
1365
+
1366
+ _log_tool_usage(
1367
+ event="tool_call_end",
1368
+ tool_name="index_specific_folder",
1369
+ details={
1370
+ "status": "ok",
1371
+ "folder_path": str(folder),
1372
+ "files_processed": processed_files,
1373
+ "chunks": total_chunks,
1374
+ "errors": error_count,
1375
+ "branch_file_counts": branch_file_counts,
1376
+ "branch_chunk_counts": branch_chunk_counts,
1377
+ },
1378
+ )
1379
+
1380
+ report = [
1381
+ "Indexação da pasta concluída.",
1382
+ f" Pasta: {folder}",
1383
+ f" Arquivos processados: {processed_files}",
1384
+ f" Total de chunks: {total_chunks}",
1385
+ f" Arquivos por branch: {branch_file_counts}",
1386
+ f" Chunks por branch: {branch_chunk_counts}",
1387
+ ]
1388
+
1389
+ if error_count:
1390
+ report.append(f" Erros ({error_count}):")
1391
+ for err in error_samples:
1392
+ report.append(f" - {err}")
1393
+ if error_count > len(error_samples):
1394
+ report.append(f" ... e mais {error_count - len(error_samples)} erros.")
1395
+
1396
+ return "\n".join(report)
1397
+
1398
+
1399
+ # ---------------------------------------------------------------------------
1400
+ # Ponto de entrada
1401
+ # ---------------------------------------------------------------------------
1402
+
1403
+ if __name__ == "__main__":
1404
+ log.info("Iniciando servidor MCP RAG (stdio)...")
1405
+ log.info("ChromaDB: %s:%s", CHROMA_HOST, CHROMA_PORT)
1406
+ log.info(
1407
+ "Coleções: %s (%s), %s (%s)",
1408
+ COLLECTION_CODE_JINA,
1409
+ BRANCH_SPECS["jina_code"].model_id,
1410
+ COLLECTION_DOC_BGE,
1411
+ BRANCH_SPECS["bge_doc"].model_id,
1412
+ )
1413
+ log.info("Modo padrão de busca: %s", SEARCH_MODE_DEFAULT)
1414
+ log.info("Modelo single padrão: %s", BRANCH_SPECS[DEFAULT_SINGLE_BRANCH_KEY].model_id)
1415
+ log.info("Quantizacao Jina: %s", JINA_QUANTIZATION)
1416
+ log.info("Config de tuning carregada de: %s (found=%s)", INDEXER_CONFIG_PATH, bool(INDEXER_TUNING_CONFIG))
1417
+ log.info("Embedding batch size: %s", EMBEDDING_BATCH_SIZE)
1418
+ log.info("Chunk params: size=%s overlap=%s", CHUNK_SIZE, CHUNK_OVERLAP)
1419
+ log.info("Reranker: %s (enabled=%s, quant=%s)", RERANK_MODEL_ID, RERANK_ENABLED, RERANKER_QUANTIZATION)
1420
+ log.info("Pasta de modelos locais: %s", MODEL_DIR)
1421
+ log.info("Uso MCP será registrado em: %s", MCP_USAGE_LOG_PATH)
1422
+
1423
+ # Pré-aquece somente conexão Chroma; modelos ficam lazy para poupar RAM.
1424
+ try:
1425
+ _get_chroma_client()
1426
+ get_chroma_collection(COLLECTION_CODE_JINA)
1427
+ get_chroma_collection(COLLECTION_DOC_BGE)
1428
+ log.info("Conexão Chroma inicializada. Modelos serão carregados sob demanda.")
1429
+ except Exception as e:
1430
+ log.error("Falha ao inicializar ChromaDB: %s", e)
1431
+ log.error("O servidor continuará, mas as ferramentas retornarão erro até o ChromaDB estar disponível.")
1432
+
1433
+ mcp.run(transport="stdio")