own-rag-cli 0.0.1-snapshot

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1129 @@
1
+ #!/usr/bin/env bash
2
+ # =============================================================================
3
+ # rag-setup-macos.run — Instalador macOS auto-suficiente (ChromaDB + MCP + RAG)
4
+ # =============================================================================
5
+ # Gerado automaticamente por build_run_macos.sh
6
+ # Versão: 2026-03-06 17:03
7
+ # MCP checksum (payload sem shebang): 3246eeb57f901742d915e0bce37fa96f059e149a57bbce73095ff4e5ea51d8d4
8
+ #
9
+ # Uso:
10
+ # chmod +x rag-setup-macos.run
11
+ # ./rag-setup-macos.run [path/to/project] [--skip-index] [--only-index]
12
+ # ./rag-setup-macos.run --reinstall
13
+ # ./rag-setup-macos.run --change-model|-cm
14
+ # =============================================================================
15
+
16
+ set -euo pipefail
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Argumentos
20
+ # ---------------------------------------------------------------------------
21
+ SKIP_INDEX=false
22
+ ONLY_INDEX=false
23
+ REINSTALL=false
24
+ CHANGE_MODEL=false
25
+ CUSTOM_PROJECT_DIR=""
26
+
27
+ for arg in "$@"; do
28
+ case "$arg" in
29
+ --skip-index) SKIP_INDEX=true ;;
30
+ --only-index) ONLY_INDEX=true ;;
31
+ --reinstall) REINSTALL=true ;;
32
+ --change-model|-cm|--chage-model|-cg) CHANGE_MODEL=true ;;
33
+ --help|-h)
34
+ echo "Usage: $0 [path/to/project] [--skip-index] [--only-index] [--reinstall] [--change-model|-cm]"
35
+ exit 0 ;;
36
+ -*)
37
+ echo "Unknown option: $arg"
38
+ exit 1 ;;
39
+ *)
40
+ CUSTOM_PROJECT_DIR="$arg" ;;
41
+ esac
42
+ done
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # Cores
46
+ # ---------------------------------------------------------------------------
47
+ RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
48
+ BLUE='\033[0;34m'; CYAN='\033[0;36m'; BOLD='\033[1m'; DIM='\033[2m'; NC='\033[0m'
49
+
50
+ UI_LANG="${RAG_SETUP_LANG:-}"
51
+ YES_NO_HINT="[s/N]"
52
+
53
+ set_lang_defaults() {
54
+ if [[ "$UI_LANG" == "en-us" ]]; then
55
+ YES_NO_HINT="[y/N]"
56
+ else
57
+ YES_NO_HINT="[s/N]"
58
+ fi
59
+ }
60
+
61
+ select_ui_language() {
62
+ if [[ -n "$UI_LANG" ]]; then
63
+ UI_LANG="$(echo "$UI_LANG" | tr '[:upper:]' '[:lower:]')"
64
+ case "$UI_LANG" in
65
+ pt-br|pt|en-us|en) ;;
66
+ *) UI_LANG="pt-br" ;;
67
+ esac
68
+ [[ "$UI_LANG" == "pt" ]] && UI_LANG="pt-br"
69
+ [[ "$UI_LANG" == "en" ]] && UI_LANG="en-us"
70
+ set_lang_defaults
71
+ return
72
+ fi
73
+
74
+ if [[ ! -t 0 ]]; then
75
+ UI_LANG="pt-br"
76
+ set_lang_defaults
77
+ return
78
+ fi
79
+
80
+ echo ""
81
+ echo -e "${GREEN}Idioma / Language: [1] PT-BR [2] EN-US (padrão/default: 1)${NC}"
82
+ read -r -p "> " LANG_CHOICE
83
+ case "$LANG_CHOICE" in
84
+ 2|en|EN|en-us|EN-US|english|English) UI_LANG="en-us" ;;
85
+ *) UI_LANG="pt-br" ;;
86
+ esac
87
+ set_lang_defaults
88
+ }
89
+
90
+ t() {
91
+ local key="$1"
92
+ if [[ "$UI_LANG" == "en-us" ]]; then
93
+ case "$key" in
94
+ err_prefix) echo "ERROR" ;;
95
+ title) echo "RAG Local Setup (macOS) — ChromaDB + MCP" ;;
96
+ project) echo "Project" ;;
97
+ platform_only) echo "This installer is only for macOS (Darwin)." ;;
98
+ extracting) echo "Extracting embedded files" ;;
99
+ extracted_to) echo "Extracted to" ;;
100
+ checking_prereq) echo "Checking prerequisites" ;;
101
+ python_missing) echo "Python 3 not found. Install with Homebrew: brew install python" ;;
102
+ python_min) echo "Python 3.10+ required. Current" ;;
103
+ py_venv_missing) echo "Python venv module not found." ;;
104
+ docker_missing) echo "Docker not found. Install Docker Desktop: brew install --cask docker" ;;
105
+ docker_daemon) echo "Docker is not running. Open Docker Desktop and wait until it is ready." ;;
106
+ compose_missing) echo "Docker Compose not available (docker compose)." ;;
107
+ prereq_ok) echo "Prerequisites OK." ;;
108
+ no_curl) echo "curl not found. Chroma healthcheck will be skipped." ;;
109
+ reset_start) echo "Resetting previous environment" ;;
110
+ reset_done) echo "Reset completed." ;;
111
+ op_cancelled) echo "Operation canceled by user." ;;
112
+ change_model_msg) echo "Model change requested. Chroma reset + full reindex required." ;;
113
+ change_model_confirm) echo "Confirm Chroma reset and full reindex?" ;;
114
+ change_model_noninteractive) echo "Non-interactive mode: proceeding with reset for --change-model." ;;
115
+ only_index_mode) echo "Mode: only index" ;;
116
+ venv_not_found) echo "Venv not found at" ;;
117
+ path_not_found) echo "Path not found" ;;
118
+ indexing) echo "Indexing" ;;
119
+ indexing_done) echo "Indexing completed." ;;
120
+ index_failed_code) echo "Indexer failed. Exit code" ;;
121
+ index_oom_title) echo "Indexing stopped due to out-of-memory (OOM killer, exit 137)." ;;
122
+ section_venv) echo "Setting up Python venv (~/.rag_venv)" ;;
123
+ deps_ok) echo "Dependencies already installed. Skipping." ;;
124
+ deps_reinstall) echo "Installing/updating dependencies..." ;;
125
+ creating_venv) echo "Creating venv at" ;;
126
+ venv_created) echo "Venv created." ;;
127
+ upgrading_pip) echo "Upgrading pip..." ;;
128
+ deps_installed) echo "Dependencies installed." ;;
129
+ section_chroma) echo "Setting up ChromaDB (Docker Desktop)" ;;
130
+ compose_installed) echo "docker-compose.yml installed at" ;;
131
+ compose_keep) echo "docker-compose.yml already exists. Keeping current file." ;;
132
+ chroma_port_prompt) echo "Choose ChromaDB host port [1-65535] (default: 8000): " ;;
133
+ chroma_port_selected) echo "Using ChromaDB host port:" ;;
134
+ chroma_port_invalid) echo "Invalid port. Using default 8000." ;;
135
+ chroma_port_in_use) echo "Port is already in use:" ;;
136
+ chroma_port_try_another) echo "Choose another port." ;;
137
+ chroma_port_auto_fallback) echo "Non-interactive mode: using next free port:" ;;
138
+ chroma_port_no_free) echo "Could not find a free TCP port for ChromaDB." ;;
139
+ chroma_running) echo "chromadb-rag container already running." ;;
140
+ chroma_start) echo "Starting ChromaDB..." ;;
141
+ chroma_wait) echo "Waiting ChromaDB to initialize..." ;;
142
+ chroma_ready) echo "ChromaDB is responding at" ;;
143
+ chroma_timeout) echo "ChromaDB did not respond in time. Check Docker Desktop and container logs." ;;
144
+ section_install_mcp) echo "Installing mcp-rag-server globally" ;;
145
+ mcp_keep) echo "mcp-rag-server already up to date. Keeping." ;;
146
+ mcp_outdated) echo "mcp-rag-server is outdated." ;;
147
+ mcp_prompt_update) echo "Update mcp-rag-server now? $YES_NO_HINT " ;;
148
+ mcp_prompt_reinstall) echo "mcp-rag-server is current. Reinstall anyway? $YES_NO_HINT " ;;
149
+ mcp_skip) echo "Keeping current mcp-rag-server." ;;
150
+ mcp_installed) echo "mcp-rag-server installed" ;;
151
+ mod_hf_installed) echo "Download module installed" ;;
152
+ mod_ms_installed) echo "Optional provider module installed" ;;
153
+ shebang_set) echo "Shebang" ;;
154
+ path_added) echo "Added ~/.local/bin to PATH in" ;;
155
+ section_mcp_cfg) echo "Optional MCP configuration (Claude/Cursor)" ;;
156
+ no_cfg_files) echo "No config files detected for automatic MCP update." ;;
157
+ cfg_all_current) echo "MCP 'rag-codebase' already up to date in detected files. Skipping." ;;
158
+ cfg_detected) echo "Config files pending update" ;;
159
+ ask_apply_cfg) echo "Apply MCP 'rag-codebase' update in these files? $YES_NO_HINT " ;;
160
+ noninteractive_cfg_skip) echo "Non-interactive mode: skipping automatic MCP config update." ;;
161
+ cannot_update_cfg) echo "Could not update" ;;
162
+ already_updated) echo "MCP 'rag-codebase' already up to date. Skipping." ;;
163
+ updated_cfg) echo "MCP 'rag-codebase' updated to version" ;;
164
+ replaced_cfg) echo "Old RAG key replaced by 'rag-codebase' (version" ;;
165
+ added_cfg) echo "MCP 'rag-codebase' added (version" ;;
166
+ setup_done) echo "Setup completed!" ;;
167
+ next) echo "Next" ;;
168
+ next_1) echo "Restart Claude Code CLI" ;;
169
+ next_2) echo "Use semantic_search_code" ;;
170
+ next_3) echo "Reindex: ./rag-setup-macos.run --only-index" ;;
171
+ restart_tools) echo "Restart the tools that use your MCP rag." ;;
172
+ hf_detected) echo "HF token detected in environment." ;;
173
+ hf_noninteractive) echo "Non-interactive mode: continuing without HF token prompt." ;;
174
+ hf_title) echo "Hugging Face token (optional)" ;;
175
+ hf_desc) echo "Speeds up model download/rate limits." ;;
176
+ hf_prompt_now) echo "Provide HF token now? $YES_NO_HINT " ;;
177
+ hf_prompt_paste) echo "Paste HF_TOKEN (hidden): " ;;
178
+ hf_set) echo "HF token set for this run." ;;
179
+ hf_empty) echo "Empty token. Continuing without auth." ;;
180
+ hf_skip) echo "Continuing without HF token." ;;
181
+ invalid_option) echo "Invalid option. Use allowed answers." ;;
182
+ *) echo "$key" ;;
183
+ esac
184
+ else
185
+ case "$key" in
186
+ err_prefix) echo "ERRO" ;;
187
+ title) echo "RAG Local Setup (macOS) — ChromaDB + MCP" ;;
188
+ project) echo "Projeto" ;;
189
+ platform_only) echo "Este instalador é apenas para macOS (Darwin)." ;;
190
+ extracting) echo "Extraindo arquivos embutidos" ;;
191
+ extracted_to) echo "Extraído em" ;;
192
+ checking_prereq) echo "Verificando pré-requisitos" ;;
193
+ python_missing) echo "Python 3 não encontrado. Instale via Homebrew: brew install python" ;;
194
+ python_min) echo "Python 3.10+ necessário. Atual" ;;
195
+ py_venv_missing) echo "Módulo venv do Python não encontrado." ;;
196
+ docker_missing) echo "Docker não encontrado. Instale Docker Desktop: brew install --cask docker" ;;
197
+ docker_daemon) echo "Docker não está rodando. Abra o Docker Desktop e aguarde ficar pronto." ;;
198
+ compose_missing) echo "Docker Compose indisponível (docker compose)." ;;
199
+ prereq_ok) echo "Pré-requisitos OK." ;;
200
+ no_curl) echo "curl não encontrado. Healthcheck do Chroma será pulado." ;;
201
+ reset_start) echo "Zerando ambiente anterior" ;;
202
+ reset_done) echo "Reset concluído." ;;
203
+ op_cancelled) echo "Operação cancelada pelo usuário." ;;
204
+ change_model_msg) echo "Troca de modelo solicitada. Reset do Chroma + reindexação total." ;;
205
+ change_model_confirm) echo "Confirmar reset do Chroma e reindexação total?" ;;
206
+ change_model_noninteractive) echo "Modo não interativo: seguindo com reset por --change-model." ;;
207
+ only_index_mode) echo "Modo: apenas indexação" ;;
208
+ venv_not_found) echo "Venv não encontrado em" ;;
209
+ path_not_found) echo "Caminho não encontrado" ;;
210
+ indexing) echo "Indexando" ;;
211
+ indexing_done) echo "Indexação concluída." ;;
212
+ index_failed_code) echo "Indexador falhou. Código de saída" ;;
213
+ index_oom_title) echo "Indexação interrompida por falta de memória (OOM killer, exit 137)." ;;
214
+ section_venv) echo "Configurando venv Python (~/.rag_venv)" ;;
215
+ deps_ok) echo "Dependências já instaladas. Pulando." ;;
216
+ deps_reinstall) echo "Instalando/atualizando dependências..." ;;
217
+ creating_venv) echo "Criando venv em" ;;
218
+ venv_created) echo "Venv criado." ;;
219
+ upgrading_pip) echo "Atualizando pip..." ;;
220
+ deps_installed) echo "Dependências instaladas." ;;
221
+ section_chroma) echo "Configurando ChromaDB (Docker Desktop)" ;;
222
+ compose_installed) echo "docker-compose.yml instalado em" ;;
223
+ compose_keep) echo "docker-compose.yml já existe. Mantendo arquivo atual." ;;
224
+ chroma_port_prompt) echo "Porta do ChromaDB [1-65535] (padrão: 8000): " ;;
225
+ chroma_port_selected) echo "Porta escolhida do ChromaDB:" ;;
226
+ chroma_port_invalid) echo "Porta inválida. Usando padrão 8000." ;;
227
+ chroma_port_in_use) echo "Porta já está em uso:" ;;
228
+ chroma_port_try_another) echo "Escolha outra porta." ;;
229
+ chroma_port_auto_fallback) echo "Modo não interativo: usando próxima porta livre:" ;;
230
+ chroma_port_no_free) echo "Não foi possível encontrar porta TCP livre para o ChromaDB." ;;
231
+ chroma_running) echo "Container chromadb-rag já está rodando." ;;
232
+ chroma_start) echo "Iniciando ChromaDB..." ;;
233
+ chroma_wait) echo "Aguardando ChromaDB inicializar..." ;;
234
+ chroma_ready) echo "ChromaDB respondendo em" ;;
235
+ chroma_timeout) echo "ChromaDB não respondeu no tempo esperado. Verifique Docker Desktop e logs." ;;
236
+ section_install_mcp) echo "Instalando mcp-rag-server globalmente" ;;
237
+ mcp_keep) echo "mcp-rag-server já está atualizado. Mantendo." ;;
238
+ mcp_outdated) echo "mcp-rag-server está desatualizado." ;;
239
+ mcp_prompt_update) echo "Atualizar mcp-rag-server agora? $YES_NO_HINT " ;;
240
+ mcp_prompt_reinstall) echo "mcp-rag-server já atualizado. Reinstalar mesmo assim? $YES_NO_HINT " ;;
241
+ mcp_skip) echo "Mantendo mcp-rag-server atual." ;;
242
+ mcp_installed) echo "mcp-rag-server instalado" ;;
243
+ mod_hf_installed) echo "Módulo de download instalado" ;;
244
+ mod_ms_installed) echo "Módulo de provider opcional instalado" ;;
245
+ shebang_set) echo "Shebang" ;;
246
+ path_added) echo "Adicionado ~/.local/bin ao PATH em" ;;
247
+ section_mcp_cfg) echo "Configuração opcional de MCP (Claude/Cursor)" ;;
248
+ no_cfg_files) echo "Nenhum arquivo de config detectado para atualização automática do MCP." ;;
249
+ cfg_all_current) echo "MCP 'rag-codebase' já está atualizado nos arquivos detectados. Pulando." ;;
250
+ cfg_detected) echo "Arquivos de config pendentes de atualização" ;;
251
+ ask_apply_cfg) echo "Aplicar atualização do MCP 'rag-codebase' nesses arquivos? $YES_NO_HINT " ;;
252
+ noninteractive_cfg_skip) echo "Modo não interativo: pulando atualização automática de config MCP." ;;
253
+ cannot_update_cfg) echo "Não foi possível atualizar" ;;
254
+ already_updated) echo "MCP 'rag-codebase' já atualizado. Ignorando." ;;
255
+ updated_cfg) echo "MCP 'rag-codebase' atualizado para versão" ;;
256
+ replaced_cfg) echo "Chave RAG antiga substituída por 'rag-codebase' (versão" ;;
257
+ added_cfg) echo "MCP 'rag-codebase' adicionado (versão" ;;
258
+ setup_done) echo "Setup concluído!" ;;
259
+ next) echo "Próximos" ;;
260
+ next_1) echo "Reinicie o Claude Code CLI" ;;
261
+ next_2) echo "Use semantic_search_code" ;;
262
+ next_3) echo "Reindexar: ./rag-setup-macos.run --only-index" ;;
263
+ restart_tools) echo "Reinicie as ferramentas que usam seu MCP rag." ;;
264
+ hf_detected) echo "Token HF detectado no ambiente." ;;
265
+ hf_noninteractive) echo "Modo não interativo: seguindo sem prompt de token HF." ;;
266
+ hf_title) echo "Token Hugging Face (opcional)" ;;
267
+ hf_desc) echo "Acelera download de modelos e limites de taxa." ;;
268
+ hf_prompt_now) echo "Informar HF token agora? $YES_NO_HINT " ;;
269
+ hf_prompt_paste) echo "Cole o HF_TOKEN (oculto): " ;;
270
+ hf_set) echo "HF token definido para este run." ;;
271
+ hf_empty) echo "Token vazio. Seguindo sem autenticação." ;;
272
+ hf_skip) echo "Seguindo sem HF token." ;;
273
+ invalid_option) echo "Opção inválida. Use respostas permitidas." ;;
274
+ *) echo "$key" ;;
275
+ esac
276
+ fi
277
+ }
278
+
279
+ log_info() { echo -e "${GREEN}[+]${NC} $*"; }
280
+ log_warn() { echo -e "${YELLOW}[!]${NC} $*"; }
281
+ log_error() { echo -e "${RED}[$(t err_prefix)]${NC} $*" >&2; }
282
+
283
+ is_yes_answer() {
284
+ local ans="$(echo "${1:-}" | tr '[:upper:]' '[:lower:]')"
285
+ if [[ "$UI_LANG" == "en-us" ]]; then
286
+ [[ "$ans" == "y" || "$ans" == "yes" ]]
287
+ else
288
+ [[ "$ans" == "s" || "$ans" == "sim" || "$ans" == "y" || "$ans" == "yes" ]]
289
+ fi
290
+ }
291
+
292
+ is_no_answer() {
293
+ local ans="$(echo "${1:-}" | tr '[:upper:]' '[:lower:]')"
294
+ if [[ -z "$ans" ]]; then
295
+ return 0
296
+ fi
297
+ if [[ "$UI_LANG" == "en-us" ]]; then
298
+ [[ "$ans" == "n" || "$ans" == "no" ]]
299
+ else
300
+ [[ "$ans" == "n" || "$ans" == "nao" || "$ans" == "não" || "$ans" == "no" ]]
301
+ fi
302
+ }
303
+
304
+ ask_yes_no_loop() {
305
+ local prompt="$1"
306
+ local answer=""
307
+ while true; do
308
+ read -r -p "$prompt" answer
309
+ if is_yes_answer "$answer"; then
310
+ return 0
311
+ fi
312
+ if is_no_answer "$answer"; then
313
+ return 1
314
+ fi
315
+ log_warn "$(t invalid_option)"
316
+ done
317
+ }
318
+
319
+ normalize_port() {
320
+ local raw_port="${1:-}"
321
+ if [[ "${raw_port}" =~ ^[0-9]+$ ]] && (( raw_port >= 1 && raw_port <= 65535 )); then
322
+ echo "${raw_port}"
323
+ return 0
324
+ fi
325
+ return 1
326
+ }
327
+
328
+ extract_chroma_port_from_compose() {
329
+ local compose_file="$1"
330
+ local parsed_port=""
331
+
332
+ if [[ ! -f "${compose_file}" ]]; then
333
+ return 1
334
+ fi
335
+
336
+ parsed_port="$(sed -n 's/^[[:space:]]*-[[:space:]]*"\?\([0-9]\{1,5\}\):8000"\?.*/\1/p' "${compose_file}" | head -n 1)"
337
+ if normalize_port "${parsed_port}" >/dev/null; then
338
+ echo "${parsed_port}"
339
+ return 0
340
+ fi
341
+ return 1
342
+ }
343
+
344
+ is_port_in_use() {
345
+ local port="$1"
346
+ local os_name
347
+ os_name="$(uname -s 2>/dev/null || echo Darwin)"
348
+
349
+ if [[ "${os_name}" == "Darwin" ]]; then
350
+ if command -v lsof >/dev/null 2>&1; then
351
+ lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1
352
+ return $?
353
+ fi
354
+ if command -v netstat >/dev/null 2>&1; then
355
+ netstat -an -p tcp 2>/dev/null | grep -E "[\\.:]${port}[[:space:]].*LISTEN" >/dev/null 2>&1
356
+ return $?
357
+ fi
358
+ else
359
+ if command -v ss >/dev/null 2>&1; then
360
+ ss -ltnH "( sport = :${port} )" 2>/dev/null | grep -q .
361
+ return $?
362
+ fi
363
+ if command -v lsof >/dev/null 2>&1; then
364
+ lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null 2>&1
365
+ return $?
366
+ fi
367
+ if command -v netstat >/dev/null 2>&1; then
368
+ netstat -ltn 2>/dev/null | awk '{print $4}' | grep -E "(^|:)${port}$" >/dev/null 2>&1
369
+ return $?
370
+ fi
371
+ fi
372
+
373
+ return 1
374
+ }
375
+
376
+ find_next_free_port() {
377
+ local start_port="$1"
378
+ local port=0
379
+ for ((port=start_port; port<=65535; port++)); do
380
+ if ! is_port_in_use "${port}"; then
381
+ echo "${port}"
382
+ return 0
383
+ fi
384
+ done
385
+ return 1
386
+ }
387
+
388
+ choose_chroma_port_for_install() {
389
+ local default_port="$1"
390
+ local preferred_port="${2:-}"
391
+ local chosen_port=""
392
+ local normalized=""
393
+ local fallback_port=""
394
+
395
+ chosen_port="${preferred_port}"
396
+
397
+ while true; do
398
+ if [[ -z "${chosen_port}" ]]; then
399
+ if [[ ! -t 0 ]]; then
400
+ chosen_port="${default_port}"
401
+ else
402
+ read -r -p "$(t chroma_port_prompt)" chosen_port
403
+ if [[ -z "${chosen_port}" ]]; then
404
+ chosen_port="${default_port}"
405
+ fi
406
+ fi
407
+ fi
408
+
409
+ if ! normalized="$(normalize_port "${chosen_port}")"; then
410
+ log_warn "$(t chroma_port_invalid)"
411
+ if [[ -t 0 ]]; then
412
+ chosen_port=""
413
+ continue
414
+ fi
415
+ normalized="${default_port}"
416
+ fi
417
+
418
+ if is_port_in_use "${normalized}"; then
419
+ log_warn "$(t chroma_port_in_use) ${normalized}. $(t chroma_port_try_another)"
420
+ if [[ -t 0 ]]; then
421
+ chosen_port=""
422
+ continue
423
+ fi
424
+ if fallback_port="$(find_next_free_port "$((normalized + 1))")"; then
425
+ log_warn "$(t chroma_port_auto_fallback) ${fallback_port}"
426
+ echo "${fallback_port}"
427
+ return 0
428
+ fi
429
+ log_error "$(t chroma_port_no_free)"
430
+ exit 1
431
+ fi
432
+
433
+ echo "${normalized}"
434
+ return 0
435
+ done
436
+ }
437
+
438
+ decode_to_file() {
439
+ local payload="$1"
440
+ local destination="$2"
441
+ printf '%s' "$payload" | python3 - "$destination" <<'PYEOF'
442
+ import base64
443
+ import pathlib
444
+ import sys
445
+ out = pathlib.Path(sys.argv[1])
446
+ out.write_bytes(base64.b64decode(sys.stdin.read().encode("ascii")))
447
+ PYEOF
448
+ }
449
+
450
+ replace_shebang() {
451
+ local file="$1"
452
+ local new_shebang="$2"
453
+ local tmp_file
454
+ tmp_file="$(mktemp "${TMPDIR:-/tmp}/rag-shebang.XXXXXX")"
455
+ {
456
+ printf '%s\n' "#!${new_shebang}"
457
+ tail -n +2 "$file"
458
+ } > "$tmp_file"
459
+ mv "$tmp_file" "$file"
460
+ }
461
+
462
+ detect_current_mcp_version() {
463
+ python3 - "${HOME}" <<'PYEOF'
464
+ import json
465
+ import re
466
+ import sys
467
+ from pathlib import Path
468
+
469
+ home = Path(sys.argv[1]).expanduser()
470
+ paths = [
471
+ home / ".claude.json",
472
+ home / ".cursor" / "mcp.json",
473
+ home / "Library" / "Application Support" / "Cursor" / "User" / "mcp.json",
474
+ home / ".config" / "Cursor" / "User" / "mcp.json",
475
+ ]
476
+ version_re = re.compile(r"^(\d+)\.(\d+)$")
477
+ best = None
478
+
479
+ for p in paths:
480
+ if not p.exists():
481
+ continue
482
+ try:
483
+ data = json.loads(p.read_text(encoding="utf-8"))
484
+ except Exception:
485
+ continue
486
+ if not isinstance(data, dict):
487
+ continue
488
+ servers = data.get("mcpServers")
489
+ if not isinstance(servers, dict):
490
+ continue
491
+ cfg = servers.get("rag-codebase")
492
+ if not isinstance(cfg, dict):
493
+ continue
494
+ version = cfg.get("version")
495
+ if not isinstance(version, str):
496
+ continue
497
+ m = version_re.match(version.strip())
498
+ if not m:
499
+ continue
500
+ parsed = (int(m.group(1)), int(m.group(2)))
501
+ if best is None or parsed > best:
502
+ best = parsed
503
+
504
+ if best is None:
505
+ print("1.0")
506
+ else:
507
+ print(f"{best[0]}.{best[1]}")
508
+ PYEOF
509
+ }
510
+
511
+ select_ui_language
512
+
513
+ USER_HOME="${HOME}"
514
+ VENV_DIR="${USER_HOME}/.rag_venv"
515
+ VENV_PYTHON="${VENV_DIR}/bin/python3"
516
+ VENV_PIP="${VENV_DIR}/bin/pip"
517
+ DOCKER_COMPOSE_DIR="${USER_HOME}/docker-chromadb"
518
+ RAG_DB_DIR="${USER_HOME}/.rag_db"
519
+ CHROMA_HOST="localhost"
520
+ CHROMA_PORT_DEFAULT="8000"
521
+ CHROMA_PORT_FROM_ENV=false
522
+ if CHROMA_PORT_NORMALIZED="$(normalize_port "${MCP_CHROMA_PORT:-}")"; then
523
+ CHROMA_PORT="${CHROMA_PORT_NORMALIZED}"
524
+ CHROMA_PORT_FROM_ENV=true
525
+ else
526
+ CHROMA_PORT="${CHROMA_PORT_DEFAULT}"
527
+ fi
528
+ BIN_DIR="${USER_HOME}/.local/bin"
529
+ MCP_SERVER_DEST="${BIN_DIR}/mcp-rag-server"
530
+ MODEL_DL_HF_DEST="${BIN_DIR}/download_model_from_hugginface.py"
531
+ MODEL_DL_MS_DEST="${BIN_DIR}/download_model_from_modelscope.py"
532
+ MODEL_CACHE_DIR="${USER_HOME}/.cache/my-custom-rag-python/models"
533
+ EXTRACT_DIR="$(mktemp -d "${TMPDIR:-/tmp}/rag-setup-macos.XXXXXX")"
534
+ DOCKER_COMPOSE_FILE_PATH="${DOCKER_COMPOSE_DIR}/docker-compose.yml"
535
+
536
+ if [[ -n "$CUSTOM_PROJECT_DIR" ]]; then
537
+ PROJECT_DIR="$(cd "$CUSTOM_PROJECT_DIR" && pwd)"
538
+ else
539
+ PROJECT_DIR="$(pwd)"
540
+ fi
541
+
542
+ trap 'rm -rf "$EXTRACT_DIR"' EXIT
543
+
544
+ echo ""
545
+ echo -e "${BOLD}${BLUE}================================================================${NC}"
546
+ echo -e "${BOLD}${BLUE} $(t title)${NC}"
547
+ echo -e "${BOLD}${BLUE}================================================================${NC}"
548
+ echo -e " $(t project): ${BOLD}${PROJECT_DIR}${NC}"
549
+ echo ""
550
+
551
+ if [[ "$(uname -s)" != "Darwin" ]]; then
552
+ log_error "$(t platform_only)"
553
+ exit 1
554
+ fi
555
+
556
+ # ---------------------------------------------------------------------------
557
+ # Extração
558
+ # ---------------------------------------------------------------------------
559
+ log_info "$(t extracting)"
560
+
561
+ B64_COMPOSE="c2VydmljZXM6CiAgY2hyb21hZGI6CiAgICBpbWFnZTogY2hyb21hZGIvY2hyb21hOmxhdGVzdAogICAgY29udGFpbmVyX25hbWU6IGNocm9tYWRiLXJhZwogICAgcG9ydHM6CiAgICAgIC0gIjgwMDA6ODAwMCIKICAgIHZvbHVtZXM6CiAgICAgICMgUGVyc2lzdGUgbyBiYW5jbyBkaXJldGFtZW50ZSBuYSBwYXN0YSBkbyB1c3XDoXJpbyBubyBob3N0CiAgICAgIC0gJHtIT01FfS8ucmFnX2RiOi9jaHJvbWEvY2hyb21hCiAgICBlbnZpcm9ubWVudDoKICAgICAgIyBIYWJpbGl0YSBhdXRlbnRpY2HDp8OjbyBhbsO0bmltYSAoc2VtIHRva2VuKSBwYXJhIHVzbyBsb2NhbAogICAgICAtIEFOT05ZTUlaRURfVEVMRU1FVFJZPWZhbHNlCiAgICAgIC0gQ0hST01BX1NFUlZFUl9BVVRITl9DUkVERU5USUFMU19GSUxFPSIiCiAgICAgIC0gQ0hST01BX1NFUlZFUl9BVVRITl9QUk9WSURFUj0iIgogICAgcmVzdGFydDogYWx3YXlzCiAgICBoZWFsdGhjaGVjazoKICAgICAgdGVzdDogWyJDTUQiLCAiY3VybCIsICItZiIsICJodHRwOi8vbG9jYWxob3N0OjgwMDAvYXBpL3YxL2hlYXJ0YmVhdCJdCiAgICAgIGludGVydmFsOiAzMHMKICAgICAgdGltZW91dDogMTBzCiAgICAgIHJldHJpZXM6IDMKICAgICAgc3RhcnRfcGVyaW9kOiAxMHMK"
562
+ B64_REQUIREMENTS="IyBDbGllbnRlIEhUVFAgZG8gQ2hyb21hREIgKGNvbmVjdGEgYW8gc2Vydmlkb3IgRG9ja2VyKQpjaHJvbWFkYj49MC41LjAKCiMgTW9kZWxvIGRlIGVtYmVkZGluZ3MgbG9jYWwgcm9kYW5kbyBuYSBDUFUKc2VudGVuY2UtdHJhbnNmb3JtZXJzPj0zLjAuMAojIERlcGVuZMOqbmNpYSB1c2FkYSBwb3IgbW9kZWxvcyBKaW5hIHYzIGNvbSBjw7NkaWdvIHJlbW90bwplaW5vcHM+PTAuNy4wCiMgRXZpdGEgaW5jb21wYXRpYmlsaWRhZGVzIGNvbmhlY2lkYXMgZG8gSmluYSB2MyBjb20gQVBJcyBub3ZhcyBkbyB0cmFuc2Zvcm1lcnMgNS54CnRyYW5zZm9ybWVyczw1CgojIFNwbGl0dGVyIGRlIHRleHRvIHBhcmEgY2h1bmtpbmcgaW50ZWxpZ2VudGUKbGFuZ2NoYWluLXRleHQtc3BsaXR0ZXJzPj0wLjIuMAoKIyBCYXJyYSBkZSBwcm9ncmVzc28gdmlzdWFsIG5vIHRlcm1pbmFsCnRxZG0+PTQuNjYuMAoKIyBNb25pdG9yIGRlIG1lbcOzcmlhIGVtIHRlbXBvIHJlYWwgZHVyYW50ZSBpbmRleGHDp8Ojbwpwc3V0aWw+PTUuOS4wCgojIFNESyBvZmljaWFsIGRvIE1DUCAoTW9kZWwgQ29udGV4dCBQcm90b2NvbCkgZGEgQW50aHJvcGljCm1jcD49MS4wLjAK"
563
+ B64_INDEXER="#!/usr/bin/env python3
"""
indexer_full.py — Script standalone de indexação do RAG local.

Uso:
    python indexer_full.py [caminho_do_projeto]

Se nenhum caminho for passado, usa o diretório atual.
O ChromaDB deve estar rodando via Docker em localhost:8000.
"""

import os
import sys
import hashlib
import argparse
import shutil
import logging
import gc
import json
from time import perf_counter, time
from collections.abc import Iterator
from pathlib import Path
from dataclasses import dataclass
from datetime import datetime

# Evita avisos "advisory" ruidosos do transformers no fluxo interativo.
os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")


class _TorchDtypeWarningFilter(logging.Filter):
    def filter(self, record: logging.LogRecord) -> bool:
        return "`torch_dtype` is deprecated! Use `dtype` instead!" not in record.getMessage()


for _logger_name in ("transformers.configuration_utils", "transformers.modeling_utils"):
    logging.getLogger(_logger_name).addFilter(_TorchDtypeWarningFilter())

import chromadb
from sentence_transformers import SentenceTransformer
from langchain_text_splitters import RecursiveCharacterTextSplitter
from tqdm import tqdm
from download_model_from_hugginface import download_model_with_fallback

# ---------------------------------------------------------------------------
# Configurações globais
# ---------------------------------------------------------------------------


def _env_int(name: str, default: int, *, min_value: int = 1) -> int:
    raw = os.environ.get(name)
    if raw is None:
        return max(min_value, default)
    try:
        return max(min_value, int(raw))
    except ValueError:
        return max(min_value, default)

CHROMA_HOST = "localhost"
CHROMA_PORT = _env_int("MCP_CHROMA_PORT", 8000, min_value=1)
COLLECTION_CODE_JINA = "code_vectors_jina"
COLLECTION_DOC_BGE = "doc_vectors_bge"

# Pastas e extensões ignoradas durante a varredura
IGNORED_DIRS = {
    ".git", "node_modules", "__pycache__", ".venv", "venv", "env",
    "dist", "build", "out", ".next", ".nuxt", ".cache", "coverage",
    ".pytest_cache", ".mypy_cache", ".ruff_cache", "target", "bin", "obj",
    ".idea", ".vscode", ".DS_Store", "vendor", "tmp", "temp", "logs",
    ".rag_db",
}

IGNORED_EXTENSIONS = {
    # Binários e imagens
    ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".webp", ".bmp",
    ".mp4", ".mp3", ".wav", ".ogg", ".avi", ".mov",
    # Pacotes e compilados
    ".zip", ".tar", ".gz", ".rar", ".7z", ".jar", ".war", ".ear",
    ".pyc", ".pyo", ".so", ".dll", ".exe", ".bin",
    # Lockfiles e gerados
    ".lock", ".sum",
    # Banco de dados
    ".sqlite", ".db", ".sqlite3",
    # Fontes
    ".ttf", ".woff", ".woff2", ".eot",
    # PDF/Documentos binários
    ".pdf", ".docx", ".xlsx", ".pptx",
}

CODE_EXTENSIONS = {
    ".py", ".js", ".ts", ".tsx", ".jsx", ".java", ".c", ".h", ".cpp", ".hpp",
    ".go", ".rs", ".rb", ".php", ".cs", ".swift", ".kt", ".kts", ".scala", ".sql",
    ".sh", ".bash", ".zsh", ".ps1", ".yaml", ".yml", ".toml", ".ini", ".conf",
    ".json", ".xml", ".html", ".css", ".scss", ".sass", ".vue", ".svelte", ".dart",
    ".lua", ".r", ".m", ".mm",
}

DOC_EXTENSIONS = {
    ".md", ".mdx", ".rst", ".txt", ".adoc", ".org", ".tex", ".csv",
}

# Tamanho máximo de arquivo (evita indexar arquivos enormes gerados)
MAX_FILE_SIZE_BYTES = 500 * 1024  # 500 KB

# Parâmetros do splitter e batch (perfil low-memory por padrão).
CHUNK_SIZE = _env_int("MCP_CHUNK_SIZE", 3000, min_value=256)
CHUNK_OVERLAP = min(CHUNK_SIZE - 1, _env_int("MCP_CHUNK_OVERLAP", 400, min_value=0))
EMBEDDING_BATCH_SIZE = _env_int("MCP_EMBEDDING_BATCH_SIZE", 4, min_value=1)
DEFAULT_PERF_PROFILE = "autotune"
INDEXER_CONFIG_PATH = Path(
    os.environ.get("MCP_INDEXER_CONFIG_FILE", str(Path.home() / ".rag_db" / "indexer_tuning.json"))
).expanduser()
INDEXER_CONFIG_FALLBACK_PATH = Path.home() / ".cache" / "my-custom-rag-python" / "indexer_tuning.json"

# Modelo de embeddings (roda na CPU)
JINA_V3_EMBEDDING_MODEL = "jinaai/jina-embeddings-v3"
JINA_V2_EMBEDDING_MODEL = "jinaai/jina-embeddings-v2-base-code"
BGE_EMBEDDING_MODEL = "BAAI/bge-m3"
DEFAULT_EMBEDDING_MODEL_CHOICE = "jina"
DEFAULT_JINA_QUANTIZATION = "dynamic-int8"
MODEL_CACHE_BASE_DIR = Path(
    os.environ.get("MCP_MODEL_DIR", str(Path.home() / ".cache" / "my-custom-rag-python" / "models"))
).expanduser()
JINA_RECOMMENDED_RAM_GB_DEFAULT = 64
JINA_RECOMMENDED_RAM_GB_DYNAMIC_INT8 = 48
JINA_RECOMMENDED_SWAP_GB = 16
JINA_MIN_AVAILABLE_RAM_GB_HINT = 12


def _env_bool(name: str, default: bool = False) -> bool:
    raw = os.environ.get(name)
    if raw is None:
        return default
    return raw.strip().lower() in {"1", "true", "yes", "on"}


def _clamp(value: float, low: float, high: float) -> float:
    return max(low, min(high, value))


def _is_memory_related_error(exc: Exception) -> bool:
    if isinstance(exc, MemoryError):
        return True
    msg = str(exc).lower()
    memory_markers = (
        "out of memory",
        "oom",
        "cannot allocate memory",
        "std::bad_alloc",
        "bad alloc",
        "insufficient memory",
    )
    return any(marker in msg for marker in memory_markers)


def _is_dimension_mismatch_error(exc: Exception) -> bool:
    msg = str(exc).lower()
    return (
        "expecting embedding with dimension" in msg
        or ("embedding" in msg and "dimension" in msg and "got" in msg)
    )


def _format_exception(exc: Exception) -> str:
    message = str(exc).strip()
    if message:
        return message
    return repr(exc)


@dataclass(frozen=True)
class IndexTarget:
    model_choice: str
    collection_name: str
    label: str


def _resolve_model_id(model_choice: str) -> str:
    if model_choice == "jina":
        return JINA_V3_EMBEDDING_MODEL
    if model_choice == "jina-v2":
        return JINA_V2_EMBEDDING_MODEL
    if model_choice == "bge":
        return BGE_EMBEDDING_MODEL
    raise ValueError(f"Modelo não suportado: {model_choice}")


def _resolve_fallback_model_id(model_choice: str) -> str:
    return BGE_EMBEDDING_MODEL


def _describe_embedding_choice(model_choice: str) -> str:
    if model_choice == "jina":
        return f"jina ({JINA_V3_EMBEDDING_MODEL})"
    if model_choice == "bge":
        return f"bge ({BGE_EMBEDDING_MODEL})"
    if model_choice == "hybrid":
        return f"hybrid ({JINA_V2_EMBEDDING_MODEL} + {BGE_EMBEDDING_MODEL})"
    return model_choice


def _resolve_index_targets(model_choice: str) -> list[IndexTarget]:
    if model_choice == "jina":
        return [
            IndexTarget(
                model_choice="jina",
                collection_name=COLLECTION_CODE_JINA,
                label="Code/Jina",
            )
        ]
    if model_choice == "bge":
        return [
            IndexTarget(
                model_choice="bge",
                collection_name=COLLECTION_DOC_BGE,
                label="Doc/BGE",
            )
        ]
    if model_choice == "hybrid":
        return [
            IndexTarget(
                model_choice="jina-v2",
                collection_name=COLLECTION_CODE_JINA,
                label="Code/Jina v2",
            ),
            IndexTarget(
                model_choice="bge",
                collection_name=COLLECTION_DOC_BGE,
                label="Doc/BGE",
            ),
        ]
    raise ValueError(f"Modelo não suportado: {model_choice}")


def _classify_file_targets(filepath: Path, model_choice: str) -> set[str]:
    if model_choice != "hybrid":
        return {model_choice}

    suffix = filepath.suffix.lower()
    is_code = suffix in CODE_EXTENSIONS
    is_doc = suffix in DOC_EXTENSIONS

    if is_code and not is_doc:
        return {"jina-v2"}
    if is_doc and not is_code:
        return {"bge"}

    # Extensão desconhecida/ambígua: indexa nos dois ramos para manter recall.
    return {"jina-v2", "bge"}


def _model_cache_dir(base_dir: Path, model_id: str) -> Path:
    safe_name = model_id.replace("/", "__").replace(":", "_")
    return base_dir / safe_name


def _pick_with_prompt(
    *,
    current_value: str | None,
    default_value: str,
    title: str,
    options: list[tuple[str, str]],
) -> str:
    if current_value:
        return current_value
    if not sys.stdin.isatty():
        return default_value

    print(f"\n[CONFIG] {title}")
    for index, (_, description) in enumerate(options, start=1):
        print(f"  {index}) {description}")
    print(f"  Enter = padrão ({default_value})")

    answer = input("> Escolha: ").strip()
    if not answer:
        return default_value
    if answer.isdigit():
        idx = int(answer) - 1
        if 0 <= idx < len(options):
            return options[idx][0]
    lowered = answer.lower()
    valid_keys = {k for k, _ in options}
    if lowered in valid_keys:
        return lowered
    print(f"[AVISO] Opção inválida '{answer}'. Usando padrão: {default_value}")
    return default_value


def resolve_embedding_config(
    model_choice_arg: str | None,
    jina_quantization_arg: str | None,
    persisted_config: dict[str, object] | None = None,
) -> tuple[str, str]:
    persisted_config = persisted_config or {}
    model_choice_from_config = persisted_config.get("embedding_model")
    model_choice = model_choice_arg or os.environ.get("MCP_EMBEDDING_MODEL")
    if not model_choice and isinstance(model_choice_from_config, str):
        model_choice = model_choice_from_config
    if model_choice:
        model_choice = model_choice.strip().lower()
    model_choice = _pick_with_prompt(
        current_value=model_choice,
        default_value=DEFAULT_EMBEDDING_MODEL_CHOICE,
        title="Escolha do modelo de embeddings",
        options=[
            (
                "jina",
                f"jina ({JINA_V3_EMBEDDING_MODEL}) - foco em código.",
            ),
            (
                "bge",
                f"bge ({BGE_EMBEDDING_MODEL}) - conteúdo misto.",
            ),
            (
                "hybrid",
                f"hybrid (Jina v2 {JINA_V2_EMBEDDING_MODEL} + BGE) - duas coleções.",
            ),
        ],
    )
    if model_choice not in {"jina", "bge", "hybrid"}:
        print(f"[AVISO] MCP_EMBEDDING_MODEL inválido '{model_choice}'. Usando '{DEFAULT_EMBEDDING_MODEL_CHOICE}'.")
        model_choice = DEFAULT_EMBEDDING_MODEL_CHOICE

    quantization_from_config = persisted_config.get("jina_quantization")
    jina_quantization = jina_quantization_arg or os.environ.get("MCP_JINA_QUANTIZATION")
    if not jina_quantization and isinstance(quantization_from_config, str):
        jina_quantization = quantization_from_config
    if jina_quantization:
        jina_quantization = jina_quantization.strip().lower().replace("_", "-")

    if model_choice == "jina":
        jina_quantization = _pick_with_prompt(
            current_value=jina_quantization,
            default_value=DEFAULT_JINA_QUANTIZATION,
            title="Quantizacao do Jina (apenas para CPU)",
            options=[
                ("default", "default (sem quantizacao) - maior qualidade, indexacao mais lenta."),
                ("dynamic-int8", "dynamic-int8 - indexacao mais rapida e menor uso de RAM, com pequena perda de qualidade."),
            ],
        )
        if jina_quantization not in {"default", "dynamic-int8"}:
            print(
                f"[AVISO] MCP_JINA_QUANTIZATION inválido '{jina_quantization}'. "
                f"Usando '{DEFAULT_JINA_QUANTIZATION}'."
            )
            jina_quantization = DEFAULT_JINA_QUANTIZATION
    else:
        jina_quantization = "default"

    return model_choice, jina_quantization


def _indexer_config_candidates() -> list[Path]:
    candidates = [INDEXER_CONFIG_PATH]
    if INDEXER_CONFIG_FALLBACK_PATH not in candidates:
        candidates.append(INDEXER_CONFIG_FALLBACK_PATH)
    return candidates


def load_indexer_tuning_config(force_reconfigure: bool) -> dict[str, object]:
    if force_reconfigure:
        return {}
    for candidate in _indexer_config_candidates():
        try:
            if not candidate.exists():
                continue
            data = json.loads(candidate.read_text(encoding="utf-8"))
            if isinstance(data, dict):
                return data
        except Exception:
            continue
    return {}


def save_indexer_tuning_config(config: dict[str, object]) -> None:
    payload = {
        **config,
        "updated_at": int(time()),
    }
    write_errors: list[tuple[Path, Exception]] = []

    for candidate in _indexer_config_candidates():
        try:
            candidate.parent.mkdir(parents=True, exist_ok=True)
            candidate.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
            if candidate == INDEXER_CONFIG_PATH:
                print(f"[CONFIG] Configuração persistida em: {candidate}")
            else:
                print(
                    f"[CONFIG] Configuração persistida em fallback: {candidate} "
                    f"(destino primário sem permissão: {INDEXER_CONFIG_PATH})"
                )
            return
        except Exception as e:
            write_errors.append((candidate, e))

    details = " | ".join(f"{path}: {_format_exception(err)}" for path, err in write_errors)
    print(f"[AVISO] Não foi possível persistir configuração: {details}")


def resolve_perf_profile(perf_profile_arg: str | None, persisted_config: dict[str, object]) -> str:
    profile_from_config = persisted_config.get("perf_profile")
    profile = perf_profile_arg or os.environ.get("MCP_PERF_PROFILE")
    if not profile and isinstance(profile_from_config, str):
        profile = profile_from_config
    if profile:
        profile = profile.strip().lower()

    profile = _pick_with_prompt(
        current_value=profile,
        default_value=DEFAULT_PERF_PROFILE,
        title="Perfil de performance da indexação",
        options=[
            (
                "autotune",
                "autotune - equilíbrio (recomendado).",
            ),
            (
                "max-performance",
                "max-performance - máximo throughput (mais RAM).",
            ),
        ],
    )
    if profile not in {"autotune", "max-performance"}:
        print(f"[AVISO] Perfil inválido '{profile}'. Usando '{DEFAULT_PERF_PROFILE}'.")
        profile = DEFAULT_PERF_PROFILE
    return profile


def _parse_config_int(config: dict[str, object], key: str) -> int | None:
    raw = config.get(key)
    if isinstance(raw, int):
        return raw
    if isinstance(raw, str) and raw.isdigit():
        return int(raw)
    return None


def _read_meminfo_gib() -> tuple[float | None, float | None, float | None]:
    """Retorna (mem_total, mem_available, swap_total) em GiB, quando disponível."""
    mem_total_kib: int | None = None
    mem_available_kib: int | None = None
    swap_total_kib: int | None = None

    try:
        for line in Path("/proc/meminfo").read_text(encoding="utf-8").splitlines():
            if line.startswith("MemTotal:"):
                mem_total_kib = int(line.split()[1])
            elif line.startswith("MemAvailable:"):
                mem_available_kib = int(line.split()[1])
            elif line.startswith("SwapTotal:"):
                swap_total_kib = int(line.split()[1])
    except (OSError, ValueError, IndexError):
        return None, None, None

    to_gib = lambda kib: (kib / (1024 * 1024)) if kib is not None else None
    return to_gib(mem_total_kib), to_gib(mem_available_kib), to_gib(swap_total_kib)


def warn_if_jina_memory_risk(model_choice: str, jina_quantization: str) -> None:
    """Mostra aviso de risco de OOM para o modelo Jina em máquinas com pouca memória."""
    if model_choice not in {"jina", "hybrid"}:
        return

    mem_total_gib, mem_available_gib, swap_total_gib = _read_meminfo_gib()
    if mem_total_gib is None:
        return

    recommended_ram_gib = (
        JINA_RECOMMENDED_RAM_GB_DEFAULT
        if jina_quantization == "default"
        else JINA_RECOMMENDED_RAM_GB_DYNAMIC_INT8
    )

    reasons: list[str] = []
    if mem_total_gib < recommended_ram_gib:
        reasons.append(
            f"RAM total detectada: {mem_total_gib:.1f} GiB (recomendado >= {recommended_ram_gib} GiB para Jina/{jina_quantization})."
        )
    if swap_total_gib is not None and swap_total_gib < JINA_RECOMMENDED_SWAP_GB:
        reasons.append(
            f"Swap detectada: {swap_total_gib:.1f} GiB (recomendado >= {JINA_RECOMMENDED_SWAP_GB} GiB)."
        )
    if mem_available_gib is not None and mem_available_gib < JINA_MIN_AVAILABLE_RAM_GB_HINT:
        reasons.append(
            f"RAM livre atual: {mem_available_gib:.1f} GiB (baixo para a carga inicial do Jina)."
        )

    if not reasons:
        return

    print("[AVISO] Alto risco de OOM com Jina nesta máquina/carga.")
    for reason in reasons:
        print(f"        - {reason}")
    print("        - Se ocorrer 'Killed' (exit 137), use BGE: --embedding-model bge")
    print("        - Ou rode o Jina em máquina com mais RAM/swap e menos processos concorrentes.")


@dataclass(frozen=True)
class RuntimeIndexingParams:
    chunk_size: int
    chunk_overlap: int
    embedding_batch_size: int
    reasons: list[str]


def _resolve_max_performance_params(
    *,
    chunk_size_locked: bool,
    chunk_overlap_locked: bool,
    batch_size_locked: bool,
    chunk_size: int,
    chunk_overlap: int,
    embedding_batch_size: int,
) -> RuntimeIndexingParams:
    mem_total_gib, mem_available_gib, _ = _read_meminfo_gib()
    reasons = [
        "Perfil selecionado: max-performance.",
        "Modo pode elevar consideravelmente o consumo de memória e causar encerramento por OOM (exit 137).",
    ]

    tuned_chunk_size = chunk_size
    tuned_chunk_overlap = chunk_overlap
    tuned_batch = embedding_batch_size

    if not chunk_size_locked:
        if mem_total_gib is not None and mem_total_gib >= 64 and (mem_available_gib or 0) >= 16:
            tuned_chunk_size = 7000
        else:
            tuned_chunk_size = 6000
        reasons.append(f"chunk_size ajustado para {tuned_chunk_size} no perfil max-performance.")

    if not chunk_overlap_locked:
        tuned_chunk_overlap = min(tuned_chunk_size - 1, max(300, int(tuned_chunk_size * 0.15)))
        reasons.append(f"chunk_overlap ajustado para {tuned_chunk_overlap}.")

    if not batch_size_locked:
        if mem_total_gib is not None and mem_total_gib >= 64 and (mem_available_gib or 0) >= 16:
            tuned_batch = 24
        elif mem_total_gib is not None and mem_total_gib >= 32:
            tuned_batch = 16
        else:
            tuned_batch = 12
        reasons.append(f"embedding_batch_size ajustado para {tuned_batch}.")

    return RuntimeIndexingParams(
        chunk_size=tuned_chunk_size,
        chunk_overlap=tuned_chunk_overlap,
        embedding_batch_size=max(1, tuned_batch),
        reasons=reasons,
    )


def _resolve_autotuned_params(
    *,
    model: SentenceTransformer,
    chunk_size_locked: bool,
    chunk_overlap_locked: bool,
    batch_size_locked: bool,
    chunk_size: int,
    chunk_overlap: int,
    embedding_batch_size: int,
) -> RuntimeIndexingParams:
    reasons: list[str] = ["Perfil selecionado: autotune (custo-benefício)."]
    verbose_autotune = _env_bool("MCP_AUTOTUNE_VERBOSE", default=False)

    try:
        import psutil  # type: ignore
    except Exception:
        reasons.append("psutil indisponível; mantendo parâmetros atuais sem benchmark.")
        return RuntimeIndexingParams(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            embedding_batch_size=embedding_batch_size,
            reasons=reasons,
        )

    vm = psutil.virtual_memory()
    swap = psutil.swap_memory()
    mem_total_gib = vm.total / (1024**3)
    mem_available_gib = vm.available / (1024**3)
    swap_total_gib = swap.total / (1024**3)

    target_ram_percent = _clamp(
        float(os.environ.get("MCP_AUTOTUNE_TARGET_RAM_PERCENT", "68")),
        60.0,
        75.0,
    )
    if mem_available_gib < 6 or swap_total_gib < 4:
        target_ram_percent = min(target_ram_percent, 63.0)
    reasons.append(
        f"Memória detectada: total={mem_total_gib:.1f} GiB, livre={mem_available_gib:.1f} GiB, "
        f"swap={swap_total_gib:.1f} GiB, alvo={target_ram_percent:.1f}%."
    )

    tuned_chunk_size = chunk_size
    tuned_chunk_overlap = chunk_overlap
    tuned_batch = embedding_batch_size

    if not chunk_size_locked:
        if mem_total_gib < 8 or mem_available_gib < 3:
            tuned_chunk_size = 1800
        elif mem_total_gib < 16 or mem_available_gib < 6:
            tuned_chunk_size = 2400
        elif mem_total_gib < 32 or mem_available_gib < 12:
            tuned_chunk_size = 3200
        else:
            tuned_chunk_size = 4200
        reasons.append(f"chunk_size autotunado para {tuned_chunk_size}.")

    if not chunk_overlap_locked:
        tuned_chunk_overlap = min(tuned_chunk_size - 1, max(120, int(tuned_chunk_size * 0.15)))
        reasons.append(f"chunk_overlap autotunado para {tuned_chunk_overlap}.")

    if not batch_size_locked:
        max_candidate = 16
        if mem_total_gib < 8 or mem_available_gib < 3 or swap_total_gib < 2:
            max_candidate = 2
        elif mem_total_gib < 16 or mem_available_gib < 6:
            max_candidate = 4
        elif mem_total_gib < 32 or mem_available_gib < 10:
            max_candidate = 8

        candidates = [2, 4, 6, 8, 12, 16]
        candidates = [c for c in candidates if c <= max_candidate]
        if not candidates:
            candidates = [2]

        process = psutil.Process()
        sample_size = min(max(512, tuned_chunk_size), 3000)
        sample_text = ("# autotune-sample\n" + ("x" * sample_size))

        best_batch = candidates[0]
        best_score = -1.0
        best_memory_pct = 100.0
        selected_benchmark_line: str | None = None
        benchmark_lines: list[str] = []

        # Warmup curto para estabilizar cache interno.
        try:
            _ = model.encode([sample_text], show_progress_bar=False, batch_size=1)
        except Exception:
            pass

        for candidate in candidates:
            docs = [sample_text] * candidate
            gc.collect()
            before_vm = psutil.virtual_memory().percent
            before_rss = process.memory_info().rss / (1024**2)
            started = perf_counter()
            try:
                embeddings = model.encode(
                    docs,
                    show_progress_bar=False,
                    batch_size=candidate,
                )
            except Exception as e:
                benchmark_lines.append(f"batch={candidate}: erro ({e})")
                continue

            elapsed = max(perf_counter() - started, 1e-6)
            after_vm = psutil.virtual_memory().percent
            after_rss = process.memory_info().rss / (1024**2)
            del embeddings
            gc.collect()

            throughput = candidate / elapsed
            safe = after_vm <= (target_ram_percent + 3.0)
            benchmark_lines.append(
                f"batch={candidate}: {throughput:.2f} itens/s, vm={after_vm:.1f}%, rss_delta={after_rss - before_rss:+.1f} MiB"
            )

            if safe and throughput > best_score:
                best_score = throughput
                best_batch = candidate
                best_memory_pct = after_vm
                selected_benchmark_line = benchmark_lines[-1]
            elif best_score < 0 and after_vm < best_memory_pct:
                # Se nenhum candidato ficou "safe", escolhe o menos agressivo em memória.
                best_batch = candidate
                best_memory_pct = after_vm
                selected_benchmark_line = benchmark_lines[-1]

            # Se já passou muito do limite, evita tentar batches maiores.
            if after_vm > target_ram_percent + 8.0:
                break

            # Evita escolher candidato que já começou acima do limite.
            if before_vm > target_ram_percent + 5.0:
                break

        tuned_batch = max(1, best_batch)
        if verbose_autotune:
            reasons.extend(benchmark_lines)
        elif selected_benchmark_line:
            reasons.append(f"Micro-benchmark: {selected_benchmark_line}")
        reasons.append(
            f"embedding_batch_size autotunado para {tuned_batch} (alvo de memória: {target_ram_percent:.1f}%)."
        )

    return RuntimeIndexingParams(
        chunk_size=tuned_chunk_size,
        chunk_overlap=tuned_chunk_overlap,
        embedding_batch_size=max(1, tuned_batch),
        reasons=reasons,
    )


# ---------------------------------------------------------------------------
# Funções auxiliares
# ---------------------------------------------------------------------------

def get_text_splitter(chunk_size: int, chunk_overlap: int) -> RecursiveCharacterTextSplitter:
    """Retorna o splitter compartilhado com as configurações padrão do projeto."""
    return RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", " ", ""],
    )


def load_embedding_model(model_choice: str, jina_quantization: str) -> SentenceTransformer:
    """Carrega o modelo de embeddings forçando uso de CPU."""
    embedding_model_id = _resolve_model_id(model_choice)
    fallback_model_id = _resolve_fallback_model_id(model_choice)

    model_base_dir = MODEL_CACHE_BASE_DIR
    model_base_dir.mkdir(parents=True, exist_ok=True)
    preferred_model_cache_dir = _model_cache_dir(model_base_dir, embedding_model_id)

    print(f"[+] Baixando modelo preferido: {embedding_model_id}")
    print(f"[+] Diretório de download/cache do modelo: {preferred_model_cache_dir}")
    selection = download_model_with_fallback(
        preferred_model_id=embedding_model_id,
        fallback_model_id=fallback_model_id,
        local_dir=model_base_dir,
    )
    selected_model_dir = selection.local_dir
    print(
        f"[+] Modelo selecionado: {selection.model_id} "
        f"(provider={selection.provider}, path={selected_model_dir})"
    )

    def _clear_hf_dynamic_modules_cache() -> None:
        cache_dir = Path.home() / ".cache" / "huggingface" / "modules" / "transformers_modules"
        if cache_dir.exists():
            print(f"[!] Limpando cache de módulos dinâmicos do Hugging Face: {cache_dir}")
            shutil.rmtree(cache_dir, ignore_errors=True)

    def _load_from_local_dir(model_id: str) -> SentenceTransformer:
        # O modelo da Jina depende de código remoto; fallback normalmente não.
        trust_remote_code = model_id.startswith("jinaai/")
        tokenizer_kwargs = {"fix_mistral_regex": True}

        def _instantiate_model() -> SentenceTransformer:
            return SentenceTransformer(
                str(selected_model_dir),
                device="cpu",
                trust_remote_code=trust_remote_code,
                tokenizer_kwargs=tokenizer_kwargs,
            )

        def _load_with_mistral_regex_patch() -> SentenceTransformer:
            # O código remoto da Jina instancia um tokenizer interno sem repassar tokenizer_kwargs.
            if not trust_remote_code:
                return _instantiate_model()

            from transformers import AutoModel, AutoTokenizer
            from transformers.modeling_utils import PreTrainedModel

            original_from_pretrained = AutoTokenizer.from_pretrained
            original_model_from_pretrained = AutoModel.from_pretrained
            original_pretrained_model_from_pretrained = PreTrainedModel.from_pretrained
            original_pretrained_model_from_config = PreTrainedModel._from_config
            model_refs = {str(selected_model_dir), str(selected_model_dir.resolve())}

            def _patched_from_pretrained(*args, **kwargs):
                model_ref = args[0] if args else kwargs.get("pretrained_model_name_or_path")
                if model_ref is not None and str(model_ref) in model_refs:
                    kwargs.setdefault("fix_mistral_regex", True)
                return original_from_pretrained(*args, **kwargs)

            def _patched_model_from_pretrained(*args, **kwargs):
                model_ref = args[0] if args else kwargs.get("pretrained_model_name_or_path")
                if model_ref is not None and str(model_ref) in model_refs and "torch_dtype" in kwargs:
                    kwargs = dict(kwargs)
                    if "dtype" not in kwargs:
                        kwargs["dtype"] = kwargs["torch_dtype"]
                    kwargs.pop("torch_dtype", None)
                return original_model_from_pretrained(*args, **kwargs)

            original_pretrained_model_from_pretrained_fn = original_pretrained_model_from_pretrained.__func__

            @classmethod
            def _patched_pretrained_model_from_pretrained(cls, *args, **kwargs):
                if "torch_dtype" in kwargs:
                    kwargs = dict(kwargs)
                    if "dtype" not in kwargs:
                        kwargs["dtype"] = kwargs["torch_dtype"]
                    kwargs.pop("torch_dtype", None)
                return original_pretrained_model_from_pretrained_fn(cls, *args, **kwargs)

            original_pretrained_model_from_config_fn = original_pretrained_model_from_config.__func__

            @classmethod
            def _patched_pretrained_model_from_config(cls, *args, **kwargs):
                if "torch_dtype" in kwargs:
                    kwargs = dict(kwargs)
                    if "dtype" not in kwargs:
                        kwargs["dtype"] = kwargs["torch_dtype"]
                    kwargs.pop("torch_dtype", None)
                return original_pretrained_model_from_config_fn(cls, *args, **kwargs)

            AutoTokenizer.from_pretrained = _patched_from_pretrained
            AutoModel.from_pretrained = _patched_model_from_pretrained
            PreTrainedModel.from_pretrained = _patched_pretrained_model_from_pretrained
            PreTrainedModel._from_config = _patched_pretrained_model_from_config
            try:
                return _instantiate_model()
            finally:
                AutoTokenizer.from_pretrained = original_from_pretrained
                AutoModel.from_pretrained = original_model_from_pretrained
                PreTrainedModel.from_pretrained = original_pretrained_model_from_pretrained
                PreTrainedModel._from_config = original_pretrained_model_from_config

        print(f"[+] Carregando modelo de embeddings a partir de: {selected_model_dir} (CPU)...")
        try:
            return _load_with_mistral_regex_patch()
        except FileNotFoundError as e:
            # Corrige corrupção/incompletude no cache dinâmico do transformers.
            if trust_remote_code and "transformers_modules" in str(e):
                print(f"[!] Cache dinâmico inconsistente detectado: {e}")
                _clear_hf_dynamic_modules_cache()
                return _load_with_mistral_regex_patch()
            raise

    def _apply_jina_quantization_if_needed(model: SentenceTransformer, model_id: str) -> SentenceTransformer:
        if model_id != JINA_V3_EMBEDDING_MODEL or jina_quantization == "default":
            return model
        try:
            import torch
            import warnings

            quantized_layers = 0
            for module in model.modules():
                if type(module).__name__ != "ParametrizedLinear":
                    continue

                float_linear = torch.nn.Linear(
                    module.in_features,
                    module.out_features,
                    bias=module.bias is not None,
                )
                with torch.no_grad():
                    float_linear.weight.copy_(module.weight.detach().to(torch.float32))
                    if module.bias is not None:
                        float_linear.bias.copy_(module.bias.detach().to(torch.float32))

                with warnings.catch_warnings():
                    warnings.filterwarnings("ignore", category=DeprecationWarning)
                    quantized_linear = torch.quantization.quantize_dynamic(
                        torch.nn.Sequential(float_linear),
                        {torch.nn.Linear},
                        dtype=torch.qint8,
                    )[0]

                module._dynamic_int8_linear = quantized_linear

                def _forward_dynamic_int8(self, input, task_id=None, residual=False):
                    out = self._dynamic_int8_linear(input)
                    if residual:
                        return out, input
                    return out

                module.forward = _forward_dynamic_int8.__get__(module, module.__class__)
                quantized_layers += 1

            if quantized_layers == 0:
                print(
                    "[AVISO] Nenhuma camada ParametrizedLinear encontrada para dynamic-int8; usando modelo padrao."
                )
                return model

            print(f"[+] Quantizacao Jina aplicada: dynamic-int8 (CPU, {quantized_layers} camadas).")
            return model
        except Exception as quant_error:
            print(f"[AVISO] Falha ao aplicar dynamic-int8 ({quant_error}); usando modelo padrao.")
            return model

    try:
        model = _load_from_local_dir(selection.model_id)
        model = _apply_jina_quantization_if_needed(model, selection.model_id)
        print("[+] Modelo carregado com sucesso.")
        return model
    except Exception as first_error:
        if selection.model_id == fallback_model_id:
            raise RuntimeError(
                f"Falha ao carregar o modelo fallback '{fallback_model_id}': {first_error}"
            ) from first_error

        print(
            f"[!] Falha ao carregar '{selection.model_id}': {_format_exception(first_error)}\n"
            f"    Tentando fallback de carregamento: {fallback_model_id}"
        )
        fallback_selection = download_model_with_fallback(
            preferred_model_id=fallback_model_id,
            fallback_model_id=fallback_model_id,
            local_dir=model_base_dir,
        )
        selected_model_dir = fallback_selection.local_dir
        print(
            f"[+] Modelo selecionado: {fallback_selection.model_id} "
            f"(provider={fallback_selection.provider}, path={selected_model_dir})"
        )
        model = _load_from_local_dir(fallback_selection.model_id)
        model = _apply_jina_quantization_if_needed(model, fallback_selection.model_id)
        print("[+] Modelo fallback carregado com sucesso.")
        return model


def connect_to_chroma() -> chromadb.HttpClient:
    """Conecta ao ChromaDB via HTTP e valida a conexão."""
    try:
        client = chromadb.HttpClient(host=CHROMA_HOST, port=CHROMA_PORT)
        # Faz um heartbeat para confirmar que o servidor está no ar
        client.heartbeat()
        print(f"[+] Conectado ao ChromaDB em {CHROMA_HOST}:{CHROMA_PORT}")
        return client
    except Exception as e:
        print(f"[ERRO] Não foi possível conectar ao ChromaDB: {e}")
        print("       Verifique se o container Docker está rodando:")
        print("       docker compose up -d")
        sys.exit(1)


def scan_files(root_path: Path) -> Iterator[Path]:
    """
    Varre recursivamente o diretório raiz, retornando em streaming
    os arquivos de texto relevantes para indexação.
    """
    for dirpath, dirnames, filenames in os.walk(root_path):
        # Remove dirs ignorados in-place para que os.walk não desça neles
        dirnames[:] = [
            d for d in dirnames
            if d not in IGNORED_DIRS and not d.startswith(".")
        ]
        dirnames.sort()

        for filename in sorted(filenames):
            filepath = Path(dirpath) / filename

            # Ignora por extensão
            if filepath.suffix.lower() in IGNORED_EXTENSIONS:
                continue

            # Ignora arquivos muito grandes
            try:
                if filepath.stat().st_size > MAX_FILE_SIZE_BYTES:
                    continue
            except OSError:
                continue

            yield filepath


def make_chunk_id(file_path: str, chunk_index: int) -> str:
    """Gera um ID determinístico para cada chunk baseado no caminho + índice."""
    raw = f"{file_path}::chunk::{chunk_index}"
    return hashlib.md5(raw.encode()).hexdigest()


def read_file_safe(filepath: Path) -> str | None:
    """Lê um arquivo de texto, tentando múltiplos encodings."""
    for encoding in ("utf-8", "latin-1", "cp1252"):
        try:
            return filepath.read_text(encoding=encoding)
        except UnicodeDecodeError:
            continue
        except OSError as e:
            print(f"  [AVISO] Não foi possível ler {filepath}: {e}")
            return None
    # Se nenhum encoding funcionou, é provavelmente binário disfarçado
    return None


def delete_file_chunks(collection: chromadb.Collection, file_path: str) -> None:
    """Remove todos os chunks de um arquivo específico da coleção."""
    try:
        # Pede somente IDs para evitar materializar docs/metadata na memória.
        results = collection.get(where={"file_path": file_path}, include=[])
        if results and results["ids"]:
            collection.delete(ids=results["ids"])
    except Exception as e:
        print(f"  [AVISO] Erro ao deletar chunks de {file_path}: {_format_exception(e)}")


# ---------------------------------------------------------------------------
# Indexação de um único arquivo
# ---------------------------------------------------------------------------

def index_file(
    filepath: Path,
    collection: chromadb.Collection,
    model: SentenceTransformer,
    splitter: RecursiveCharacterTextSplitter,
    root_path: Path,
    embedding_batch_size: int,
) -> int:
    """
    Indexa um único arquivo: lê, divide em chunks, gera embeddings e faz upsert.
    Retorna o número de chunks indexados.
    """
    content = read_file_safe(filepath)
    if not content or not content.strip():
        return 0

    # Usa caminho absoluto como metadado
    abs_path = str(filepath.resolve())

    # Remove chunks antigos deste arquivo (atualização idempotente)
    delete_file_chunks(collection, abs_path)

    chunks = splitter.split_text(content)
    if not chunks:
        return 0

    relative_path = str(filepath.relative_to(root_path))
    inserted_chunks = 0
    batch_ids: list[str] = []
    batch_docs: list[str] = []
    batch_metadatas: list[dict[str, object]] = []

    def _flush_batch() -> None:
        nonlocal inserted_chunks
        if not batch_ids:
            return

        embeddings = model.encode(
            batch_docs,
            show_progress_bar=False,
            batch_size=embedding_batch_size,
        ).tolist()
        collection.upsert(
            ids=batch_ids,
            embeddings=embeddings,
            documents=batch_docs,
            metadatas=batch_metadatas,
        )
        inserted_chunks += len(batch_ids)
        del embeddings
        batch_ids.clear()
        batch_docs.clear()
        batch_metadatas.clear()
        gc.collect()

    for i, chunk in enumerate(chunks):
        batch_ids.append(make_chunk_id(abs_path, i))
        batch_docs.append(chunk)
        batch_metadatas.append(
            {
                "file_path": abs_path,
                "chunk_index": i,
                "file_name": filepath.name,
                # Caminho relativo à raiz do projeto para exibição compacta
                "relative_path": relative_path,
            }
        )
        if len(batch_ids) >= embedding_batch_size:
            _flush_batch()

    _flush_batch()
    return inserted_chunks


# ---------------------------------------------------------------------------
# Ponto de entrada principal
# ---------------------------------------------------------------------------

def main():
    parser = argparse.ArgumentParser(
        description="Indexa um projeto de código no ChromaDB para RAG local."
    )
    parser.add_argument(
        "project_path",
        nargs="?",
        default=".",
        help="Caminho raiz do projeto a indexar (padrão: diretório atual)",
    )
    parser.add_argument(
        "--clear",
        action="store_true",
        help="Limpa toda a coleção antes de reindexar",
    )
    parser.add_argument(
        "--embedding-model",
        choices=["jina", "bge", "hybrid"],
        help=(
            "Modelo de embeddings: 'jina' (codigo), "
            "'bge' (conteudo misto) ou 'hybrid' (duas colecoes: Jina v2 + BGE)."
        ),
    )
    parser.add_argument(
        "--jina-quantization",
        choices=["default", "dynamic-int8"],
        help="Quantizacao para Jina: 'default' (mais qualidade) ou 'dynamic-int8' (mais velocidade).",
    )
    parser.add_argument(
        "--perf-profile",
        choices=["autotune", "max-performance"],
        help=(
            "Perfil de performance da indexação: "
            "'autotune' (custo-benefício) ou 'max-performance' (mais throughput, maior uso de RAM)."
        ),
    )
    args = parser.parse_args()

    root_path = Path(args.project_path).resolve()
    if not root_path.is_dir():
        print(f"[ERRO] Caminho não existe ou não é um diretório: {root_path}")
        sys.exit(1)

    print(f"\n{'='*60}")
    print(f"  RAG Indexer — Projeto: {root_path}")
    print(f"{'='*60}\n")
    index_started_at = datetime.now()
    print(f"[INFO] Início: {index_started_at.strftime('%Y-%m-%d %H:%M:%S')}")

    force_model_reconfigure = _env_bool("MCP_FORCE_MODEL_RECONFIG", default=False)
    persisted_config = load_indexer_tuning_config(force_model_reconfigure)
    model_choice, jina_quantization = resolve_embedding_config(
        args.embedding_model,
        args.jina_quantization,
        persisted_config=persisted_config,
    )
    perf_profile = resolve_perf_profile(args.perf_profile, persisted_config)

    chunk_size_locked = "MCP_CHUNK_SIZE" in os.environ
    chunk_overlap_locked = "MCP_CHUNK_OVERLAP" in os.environ
    batch_size_locked = "MCP_EMBEDDING_BATCH_SIZE" in os.environ

    persisted_chunk_size = _parse_config_int(persisted_config, "chunk_size")
    persisted_chunk_overlap = _parse_config_int(persisted_config, "chunk_overlap")
    persisted_batch_size = _parse_config_int(persisted_config, "embedding_batch_size")

    effective_chunk_size = CHUNK_SIZE
    if not chunk_size_locked and persisted_chunk_size is not None:
        effective_chunk_size = max(256, persisted_chunk_size)

    effective_chunk_overlap = CHUNK_OVERLAP
    if not chunk_overlap_locked and persisted_chunk_overlap is not None:
        effective_chunk_overlap = max(0, min(effective_chunk_size - 1, persisted_chunk_overlap))

    effective_batch_size = EMBEDDING_BATCH_SIZE
    if not batch_size_locked and persisted_batch_size is not None:
        effective_batch_size = max(1, persisted_batch_size)

    print(
        f"[CONFIG] Modelo escolhido: {model_choice} "
        f"({_describe_embedding_choice(model_choice)})"
    )
    if model_choice == "jina":
        print(f"[CONFIG] Quantizacao Jina: {jina_quantization}")
    elif model_choice == "hybrid":
        print("[CONFIG] Quantizacao Jina: nao aplicavel no hybrid (Jina v2 + BGE)")
    else:
        print("[CONFIG] Quantizacao Jina: nao aplicavel (modelo BGE selecionado)")
    print(f"[CONFIG] Perfil de performance: {perf_profile}")
    if perf_profile == "max-performance":
        print(
            "[AVISO] Este modo pode elevar consideravelmente o consumo de memória "
            "e causar encerramento por OOM (exit 137)."
        )
    warn_if_jina_memory_risk(model_choice, jina_quantization)

    # Inicializa componentes
    client = connect_to_chroma()
    targets = _resolve_index_targets(model_choice)

    # Obtém ou recria as coleções envolvidas.
    collections: dict[str, chromadb.Collection] = {}
    collection_dimension_reset_done: dict[str, bool] = {}
    for target in targets:
        if args.clear:
            try:
                client.delete_collection(target.collection_name)
                print(f"[!] Coleção '{target.collection_name}' removida para reindexação limpa.")
            except Exception:
                pass
        collections[target.collection_name] = client.get_or_create_collection(
            name=target.collection_name,
            metadata={"hnsw:space": "cosine"},
        )
        collection_dimension_reset_done[target.collection_name] = False

    # Carrega modelos de forma lazy e reaproveita por target.
    loaded_models: dict[str, SentenceTransformer] = {}
    total_chunks = 0
    errors = 0
    files_scanned = 0
    files_processed_total = 0
    chunks_by_collection = {target.collection_name: 0 for target in targets}
    files_by_collection = {target.collection_name: 0 for target in targets}
    files_eligible_by_collection = {target.collection_name: 0 for target in targets}
    errors_by_collection = {target.collection_name: 0 for target in targets}
    error_samples_by_collection: dict[str, list[str]] = {target.collection_name: [] for target in targets}
    target_by_model = {target.model_choice: target for target in targets}

    # Carrega o primeiro modelo antes para autotune com micro-benchmark.
    primary_target = targets[0]
    primary_quantization = jina_quantization if primary_target.model_choice == "jina" else "default"
    loaded_models[primary_target.model_choice] = load_embedding_model(primary_target.model_choice, primary_quantization)
    primary_model = loaded_models[primary_target.model_choice]

    if perf_profile == "autotune":
        tuned = _resolve_autotuned_params(
            model=primary_model,
            chunk_size_locked=chunk_size_locked,
            chunk_overlap_locked=chunk_overlap_locked,
            batch_size_locked=batch_size_locked,
            chunk_size=effective_chunk_size,
            chunk_overlap=effective_chunk_overlap,
            embedding_batch_size=effective_batch_size,
        )
    else:
        tuned = _resolve_max_performance_params(
            chunk_size_locked=chunk_size_locked,
            chunk_overlap_locked=chunk_overlap_locked,
            batch_size_locked=batch_size_locked,
            chunk_size=effective_chunk_size,
            chunk_overlap=effective_chunk_overlap,
            embedding_batch_size=effective_batch_size,
        )

    effective_chunk_size = max(256, tuned.chunk_size)
    effective_chunk_overlap = max(0, min(effective_chunk_size - 1, tuned.chunk_overlap))
    effective_batch_size = max(1, tuned.embedding_batch_size)

    for reason in tuned.reasons:
        print(f"[CONFIG] {reason}")

    print(
        f"[CONFIG] Parâmetros finais: "
        f"chunk_size={effective_chunk_size}, chunk_overlap={effective_chunk_overlap}, "
        f"embedding_batch={effective_batch_size}"
    )

    save_indexer_tuning_config(
        {
            "embedding_model": model_choice,
            "jina_quantization": jina_quantization,
            "perf_profile": perf_profile,
            "chunk_size": effective_chunk_size,
            "chunk_overlap": effective_chunk_overlap,
            "embedding_batch_size": effective_batch_size,
        }
    )

    splitter = get_text_splitter(effective_chunk_size, effective_chunk_overlap)

    print(f"\n[+] Varrendo e indexando arquivos em: {root_path}")
    files = list(scan_files(root_path))
    files_scanned = len(files)
    if files_scanned == 0:
        print("[AVISO] Nenhum arquivo encontrado. Verifique o caminho e os filtros.")
        sys.exit(0)

    print(f"[+] {files_scanned} arquivo(s) elegível(is) para indexação.")
    with tqdm(
        total=files_scanned,
        desc="Indexando",
        unit="arquivo",
        bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}, {rate_fmt}]",
    ) as pbar:
        for filepath in files:
            target_models = _classify_file_targets(filepath, model_choice)

            for target_model in target_models:
                target = target_by_model.get(target_model)
                if target is None:
                    continue

                if target.model_choice not in loaded_models:
                    target_quantization = jina_quantization if target.model_choice == "jina" else "default"
                    try:
                        loaded_models[target.model_choice] = load_embedding_model(
                            target.model_choice,
                            target_quantization,
                        )
                    except Exception as load_error:
                        # Em hybrid, pode faltar RAM ao manter dois modelos grandes simultaneamente.
                        if model_choice == "hybrid" and loaded_models and _is_memory_related_error(load_error):
                            print(
                                "[AVISO] Falha ao carregar modelo adicional no hybrid por memória. "
                                "Liberando modelo anterior e tentando novamente."
                            )
                            loaded_models.clear()
                            gc.collect()
                            loaded_models[target.model_choice] = load_embedding_model(
                                target.model_choice,
                                target_quantization,
                            )
                        else:
                            raise

                model = loaded_models[target.model_choice]
                collection = collections[target.collection_name]
                files_eligible_by_collection[target.collection_name] += 1

                while True:
                    try:
                        n_chunks = index_file(
                            filepath,
                            collection,
                            model,
                            splitter,
                            root_path,
                            embedding_batch_size=effective_batch_size,
                        )
                        total_chunks += n_chunks
                        files_processed_total += 1
                        chunks_by_collection[target.collection_name] += n_chunks
                        files_by_collection[target.collection_name] += 1
                        break
                    except Exception as e:
                        # Fallback automático para evitar quebra total em máquinas no limite de RAM.
                        if (
                            not batch_size_locked
                            and effective_batch_size > 1
                            and _is_memory_related_error(e)
                        ):
                            new_batch = max(1, effective_batch_size // 2)
                            if new_batch < effective_batch_size:
                                tqdm.write(
                                    f"  [AJUSTE] Memória alta em {target.label}. "
                                    f"Batch reduzido {effective_batch_size} -> {new_batch}."
                                )
                                effective_batch_size = new_batch
                                gc.collect()
                                continue

                        if (
                            _is_dimension_mismatch_error(e)
                            and not collection_dimension_reset_done[target.collection_name]
                        ):
                            tqdm.write(
                                f"  [AJUSTE] Dimensão incompatível detectada em '{target.collection_name}'. "
                                "Recriando coleção e tentando novamente."
                            )
                            try:
                                client.delete_collection(target.collection_name)
                            except Exception:
                                pass

                            collections[target.collection_name] = client.get_or_create_collection(
                                name=target.collection_name,
                                metadata={"hnsw:space": "cosine"},
                            )
                            collection_dimension_reset_done[target.collection_name] = True
                            gc.collect()
                            continue

                        errors += 1
                        errors_by_collection[target.collection_name] += 1
                        if len(error_samples_by_collection[target.collection_name]) < 3:
                            error_samples_by_collection[target.collection_name].append(
                                f"{filepath.name}: {_format_exception(e)}"
                            )
                        tqdm.write(f"  [ERRO] {filepath} [{target.label}]: {_format_exception(e)}")
                        break

            pbar.set_postfix({"chunks": total_chunks, "atual": filepath.name[:20]})
            pbar.update(1)

    for target in targets:
        collection_name = target.collection_name
        eligible = files_eligible_by_collection[collection_name]
        processed = files_by_collection[collection_name]
        target_errors = errors_by_collection[collection_name]

        if eligible == 0:
            print(f"[AVISO] Nenhum arquivo elegível para {target.label}; etapa ignorada.")
        elif processed == 0 and target_errors > 0:
            print(
                f"[AVISO] {eligible} arquivo(s) elegível(is) para {target.label}, "
                "mas todos falharam."
            )

        if target_errors:
            print(f"[AVISO] {target_errors} erro(s) durante a indexação do target {target.label}.")
            for sample in error_samples_by_collection[collection_name]:
                print(f"        - {sample}")

    index_finished_at = datetime.now()
    elapsed_seconds = int((index_finished_at - index_started_at).total_seconds())
    elapsed_h = elapsed_seconds // 3600
    elapsed_m = (elapsed_seconds % 3600) // 60
    elapsed_s = elapsed_seconds % 60
    print(f"\n{'='*60}")
    print(f"  Indexação concluída!")
    print(f"  Início               : {index_started_at.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"  Fim                  : {index_finished_at.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"  Duração              : {elapsed_h:02d}:{elapsed_m:02d}:{elapsed_s:02d}")
    print(f"  Arquivos varridos    : {files_scanned}")
    print(f"  Arquivos processados : {files_processed_total}")
    print(f"  Total de chunks      : {total_chunks}")
    print(f"  Erros                : {errors}")
    for target in targets:
        collection_name = target.collection_name
        print(
            f"  Coleção ChromaDB     : '{collection_name}' "
            f"(elegíveis={files_eligible_by_collection.get(collection_name, 0)}, "
            f"arquivos={files_by_collection.get(collection_name, 0)}, "
            f"chunks={chunks_by_collection.get(collection_name, 0)})"
        )
    print(f"{'='*60}\n")


if __name__ == "__main__":
    try:
        main()
    except MemoryError:
        print(
            "[ERRO] Falha de memória durante a indexação. "
            "Use --embedding-model bge ou execute o Jina em máquina com mais RAM/swap."
        )
        sys.exit(1)
"
564
+ B64_MCP="#!/usr/bin/env python3
from __future__ import annotations
"""
mcp_server.py — Servidor MCP para RAG local de codebase.

Expõe ferramentas de busca semântica e indexação via stdio para o Claude Code CLI.
Conecta-se ao ChromaDB rodando em Docker (localhost:8000).

Novidade: modo híbrido ensemble com duas coleções separadas + RRF + reranking leve.
"""

import sys
import os
import hashlib
import json
import logging
import getpass
import shutil
from collections.abc import Iterator
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path

# Evita mensagens advisory do transformers em stderr durante a carga do modelo.
os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")


class _TorchDtypeWarningFilter(logging.Filter):
    def filter(self, record: logging.LogRecord) -> bool:
        return "`torch_dtype` is deprecated! Use `dtype` instead!" not in record.getMessage()


for _logger_name in ("transformers.configuration_utils", "transformers.modeling_utils"):
    logging.getLogger(_logger_name).addFilter(_TorchDtypeWarningFilter())

import chromadb
from sentence_transformers import CrossEncoder, SentenceTransformer
from langchain_text_splitters import RecursiveCharacterTextSplitter
from mcp.server.fastmcp import FastMCP
from download_model_from_hugginface import download_model_with_fallback

# ---------------------------------------------------------------------------
# Configuração de logging (stderr para não poluir o protocolo stdio)
# ---------------------------------------------------------------------------

logging.basicConfig(
    level=logging.INFO,
    format="[MCP-RAG] %(asctime)s %(levelname)s: %(message)s",
    stream=sys.stderr,
)
log = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Log estruturado de uso MCP (JSONL)
# ---------------------------------------------------------------------------

MCP_USAGE_LOG_PATH = Path(
    os.environ.get("MCP_USAGE_LOG", str(Path.home() / ".rag_db" / "mcp_usage.log"))
).expanduser()


def _safe_preview(value: str, limit: int = 120) -> str:
    if len(value) <= limit:
        return value
    return value[:limit] + "...[truncated]"


def _get_parent_cmdline() -> str:
    ppid = os.getppid()
    cmdline_path = Path(f"/proc/{ppid}/cmdline")
    try:
        raw = cmdline_path.read_bytes()
        if not raw:
            return "unknown"
        parts = [p.decode("utf-8", errors="ignore") for p in raw.split(b"\x00") if p]
        return " ".join(parts) if parts else "unknown"
    except Exception:
        return "unknown"


def _infer_actor() -> dict[str, str]:
    actor = os.environ.get("MCP_CLIENT_NAME") or os.environ.get("CLAUDE_USER") or getpass.getuser()
    source = (
        "MCP_CLIENT_NAME" if os.environ.get("MCP_CLIENT_NAME")
        else "CLAUDE_USER" if os.environ.get("CLAUDE_USER")
        else "system_user"
    )
    return {
        "actor": actor,
        "actor_source": source,
        "client_process": _get_parent_cmdline(),
    }


def _log_tool_usage(event: str, tool_name: str, details: dict[str, object] | None = None) -> None:
    try:
        MCP_USAGE_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
        payload: dict[str, object] = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "event": event,
            "tool": tool_name,
            "pid": os.getpid(),
            **_infer_actor(),
        }
        if details:
            payload["details"] = details

        with MCP_USAGE_LOG_PATH.open("a", encoding="utf-8") as f:
            f.write(json.dumps(payload, ensure_ascii=True) + "\n")
    except Exception as e:
        log.warning("Falha ao registrar uso MCP em %s: %s", MCP_USAGE_LOG_PATH, e)


# ---------------------------------------------------------------------------
# Configurações
# ---------------------------------------------------------------------------


INDEXER_CONFIG_PATH = Path(
    os.environ.get("MCP_INDEXER_CONFIG_FILE", str(Path.home() / ".rag_db" / "indexer_tuning.json"))
).expanduser()


def _load_indexer_tuning_config() -> dict[str, object]:
    try:
        if not INDEXER_CONFIG_PATH.exists():
            return {}
        payload = json.loads(INDEXER_CONFIG_PATH.read_text(encoding="utf-8"))
        return payload if isinstance(payload, dict) else {}
    except Exception:
        return {}


INDEXER_TUNING_CONFIG = _load_indexer_tuning_config()


def _config_str(env_name: str, config_key: str, default: str) -> str:
    env_raw = os.environ.get(env_name)
    if env_raw is not None and env_raw.strip():
        return env_raw
    cfg_raw = INDEXER_TUNING_CONFIG.get(config_key)
    if isinstance(cfg_raw, str) and cfg_raw.strip():
        return cfg_raw
    return default


def _config_int(env_name: str, config_key: str, default: int, *, min_value: int = 1) -> int:
    env_raw = os.environ.get(env_name)
    if env_raw is not None and env_raw.strip():
        try:
            return max(min_value, int(env_raw))
        except ValueError:
            pass

    cfg_raw = INDEXER_TUNING_CONFIG.get(config_key)
    if isinstance(cfg_raw, int):
        return max(min_value, cfg_raw)
    if isinstance(cfg_raw, str):
        try:
            return max(min_value, int(cfg_raw))
        except ValueError:
            pass

    return max(min_value, default)


CHROMA_HOST = os.environ.get("CHROMA_HOST", "localhost")
CHROMA_PORT = int(os.environ.get("CHROMA_PORT", "8000"))

# Coleções separadas por especialização de embedding
COLLECTION_CODE_JINA = "code_vectors_jina"
COLLECTION_DOC_BGE = "doc_vectors_bge"

JINA_V3_EMBEDDING_MODEL = "jinaai/jina-embeddings-v3"
JINA_V2_EMBEDDING_MODEL = "jinaai/jina-embeddings-v2-base-code"
BGE_EMBEDDING_MODEL = "BAAI/bge-m3"

DEFAULT_EMBEDDING_MODEL_CHOICE = "jina"
DEFAULT_JINA_QUANTIZATION = "dynamic-int8"
DEFAULT_SEARCH_MODE = "single"  # single | ensemble

_embedding_model_choice = _config_str(
    "MCP_EMBEDDING_MODEL",
    "embedding_model",
    DEFAULT_EMBEDDING_MODEL_CHOICE,
).strip().lower()
if _embedding_model_choice not in {"jina", "bge", "hybrid"}:
    log.warning(
        "MCP_EMBEDDING_MODEL invalido '%s'. Usando '%s'.",
        _embedding_model_choice,
        DEFAULT_EMBEDDING_MODEL_CHOICE,
    )
    _embedding_model_choice = DEFAULT_EMBEDDING_MODEL_CHOICE

_raw_jina_quantization = _config_str(
    "MCP_JINA_QUANTIZATION",
    "jina_quantization",
    DEFAULT_JINA_QUANTIZATION,
)
JINA_QUANTIZATION = _raw_jina_quantization.strip().lower().replace("_", "-")
if JINA_QUANTIZATION not in {"default", "dynamic-int8"}:
    log.warning(
        "MCP_JINA_QUANTIZATION invalido '%s'. Usando '%s'.",
        JINA_QUANTIZATION,
        DEFAULT_JINA_QUANTIZATION,
    )
    JINA_QUANTIZATION = DEFAULT_JINA_QUANTIZATION

SEARCH_MODE_DEFAULT = os.environ.get("MCP_SEARCH_MODE", DEFAULT_SEARCH_MODE).strip().lower()
if SEARCH_MODE_DEFAULT not in {"single", "ensemble"}:
    SEARCH_MODE_DEFAULT = DEFAULT_SEARCH_MODE

if _embedding_model_choice == "hybrid" and "MCP_SEARCH_MODE" not in os.environ:
    # No modo híbrido, o comportamento esperado costuma ser ensemble por padrão.
    SEARCH_MODE_DEFAULT = "ensemble"

RERANK_MODEL_ID = os.environ.get("MCP_RERANK_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2")
RERANK_ENABLED = os.environ.get("MCP_RERANK_ENABLED", "true").strip().lower() in {"1", "true", "yes", "on"}
RERANK_CANDIDATE_MULTIPLIER = int(os.environ.get("MCP_RERANK_CANDIDATE_MULTIPLIER", "3"))
RERANK_MAX_CANDIDATES = int(os.environ.get("MCP_RERANK_MAX_CANDIDATES", "40"))
RERANKER_MAX_LENGTH = int(os.environ.get("MCP_RERANK_MAX_LENGTH", "512"))
RERANKER_QUANTIZATION = os.environ.get("MCP_RERANK_QUANTIZATION", "dynamic-int8").strip().lower()
if RERANKER_QUANTIZATION not in {"default", "dynamic-int8"}:
    RERANKER_QUANTIZATION = "dynamic-int8"

RRF_K = int(os.environ.get("MCP_RRF_K", "60"))
EMBEDDING_BATCH_SIZE = _config_int("MCP_EMBEDDING_BATCH_SIZE", "embedding_batch_size", 4, min_value=1)

_env_model_dir = os.environ.get("MCP_MODEL_DIR")
MODEL_DIR = (
    Path(_env_model_dir).expanduser()
    if _env_model_dir
    else Path.home() / ".cache" / "my-custom-rag-python" / "models"
)

# Parâmetros do splitter (alinhados com indexer_full.py, perfil low-memory)
CHUNK_SIZE = _config_int("MCP_CHUNK_SIZE", "chunk_size", 3000, min_value=256)
CHUNK_OVERLAP = min(CHUNK_SIZE - 1, _config_int("MCP_CHUNK_OVERLAP", "chunk_overlap", 400, min_value=0))

MAX_FILE_SIZE_BYTES = 500 * 1024  # 500 KB
TOP_K_RESULTS = 7
MAX_QUERY_RESULTS = 30

# Filtros de varredura
IGNORED_DIRS = {
    ".git", "node_modules", "__pycache__", ".venv", "venv", "env",
    "dist", "build", "out", ".next", ".nuxt", ".cache", "coverage",
    ".pytest_cache", ".mypy_cache", ".ruff_cache", "target", "bin", "obj",
    ".idea", ".vscode", "vendor", "tmp", "temp", "logs", ".rag_db",
}

IGNORED_EXTENSIONS = {
    ".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".webp", ".bmp",
    ".mp4", ".mp3", ".wav", ".ogg", ".avi", ".mov",
    ".zip", ".tar", ".gz", ".rar", ".7z", ".jar", ".war",
    ".pyc", ".pyo", ".so", ".dll", ".exe", ".bin",
    ".lock", ".sum", ".sqlite", ".db", ".sqlite3",
    ".ttf", ".woff", ".woff2", ".eot",
    ".pdf", ".docx", ".xlsx", ".pptx",
}

CODE_EXTENSIONS = {
    ".py", ".js", ".ts", ".tsx", ".jsx", ".java", ".c", ".h", ".cpp", ".hpp",
    ".go", ".rs", ".rb", ".php", ".cs", ".swift", ".kt", ".kts", ".scala", ".sql",
    ".sh", ".bash", ".zsh", ".ps1", ".yaml", ".yml", ".toml", ".ini", ".conf",
    ".json", ".xml", ".html", ".css", ".scss", ".sass", ".vue", ".svelte", ".dart",
    ".lua", ".r", ".m", ".mm",
}

DOC_EXTENSIONS = {
    ".md", ".mdx", ".rst", ".txt", ".adoc", ".org", ".tex", ".csv",
}


@dataclass(frozen=True)
class BranchSpec:
    key: str
    model_choice: str
    model_id: str
    collection_name: str
    content_domain: str
    label: str


JINA_CODE_BRANCH_MODEL_CHOICE = "jina_v2" if _embedding_model_choice == "hybrid" else "jina"
JINA_CODE_BRANCH_MODEL_ID = JINA_V2_EMBEDDING_MODEL if _embedding_model_choice == "hybrid" else JINA_V3_EMBEDDING_MODEL

BRANCH_SPECS: dict[str, BranchSpec] = {
    "jina_code": BranchSpec(
        key="jina_code",
        model_choice=JINA_CODE_BRANCH_MODEL_CHOICE,
        model_id=JINA_CODE_BRANCH_MODEL_ID,
        collection_name=COLLECTION_CODE_JINA,
        content_domain="code",
        label="Jina v2 Code" if _embedding_model_choice == "hybrid" else "Jina v3 Code",
    ),
    "bge_doc": BranchSpec(
        key="bge_doc",
        model_choice="bge",
        model_id=BGE_EMBEDDING_MODEL,
        collection_name=COLLECTION_DOC_BGE,
        content_domain="doc",
        label="BGE Docs",
    ),
}

DEFAULT_SINGLE_BRANCH_KEY = "bge_doc" if _embedding_model_choice == "bge" else "jina_code"


@dataclass
class RetrievedHit:
    key: str
    document: str
    metadata: dict[str, object]
    distance: float | None
    similarity: float | None
    branch: BranchSpec
    rank: int


@dataclass
class FusedHit:
    key: str
    document: str
    metadata: dict[str, object]
    rrf_score: float
    source_details: dict[str, dict[str, object]]
    rerank_score: float | None = None


# ---------------------------------------------------------------------------
# Runtime caches (lazy loading para economizar RAM)
# ---------------------------------------------------------------------------

_chroma_client: chromadb.HttpClient | None = None
_collections: dict[str, chromadb.Collection] = {}
_models: dict[str, SentenceTransformer] = {}
_model_load_errors: dict[str, str] = {}
_splitter: RecursiveCharacterTextSplitter | None = None
_reranker: CrossEncoder | None = None
_reranker_error: str | None = None


# ---------------------------------------------------------------------------
# Chroma e modelos
# ---------------------------------------------------------------------------


def _model_cache_dir(base_dir: Path, model_id: str) -> Path:
    safe_name = model_id.replace("/", "__").replace(":", "_")
    return base_dir / safe_name


def _get_chroma_client() -> chromadb.HttpClient:
    global _chroma_client
    if _chroma_client is None:
        _chroma_client = chromadb.HttpClient(host=CHROMA_HOST, port=CHROMA_PORT)
        _chroma_client.heartbeat()
        log.info("Conectado ao ChromaDB em %s:%s", CHROMA_HOST, CHROMA_PORT)
    return _chroma_client


def get_chroma_collection(collection_name: str) -> chromadb.Collection:
    if collection_name in _collections:
        return _collections[collection_name]

    try:
        client = _get_chroma_client()
        collection = client.get_or_create_collection(
            name=collection_name,
            metadata={"hnsw:space": "cosine"},
        )
        _collections[collection_name] = collection
        return collection
    except Exception as e:
        raise RuntimeError(
            f"Não foi possível acessar a coleção '{collection_name}' no ChromaDB "
            f"({CHROMA_HOST}:{CHROMA_PORT}). Erro: {e}"
        )


def _load_sentence_transformer_from_local(model_id: str, local_model_dir: Path) -> SentenceTransformer:
    trust_remote_code = model_id.startswith("jinaai/")
    tokenizer_kwargs = {"fix_mistral_regex": True}

    def _instantiate_model() -> SentenceTransformer:
        return SentenceTransformer(
            str(local_model_dir),
            device="cpu",
            trust_remote_code=trust_remote_code,
            tokenizer_kwargs=tokenizer_kwargs,
        )

    def _clear_hf_dynamic_modules_cache() -> None:
        cache_dir = Path.home() / ".cache" / "huggingface" / "modules" / "transformers_modules"
        if cache_dir.exists():
            log.warning("Limpando cache dinâmico do Hugging Face em %s", cache_dir)
            shutil.rmtree(cache_dir, ignore_errors=True)

    def _load_with_jina_patch() -> SentenceTransformer:
        if not trust_remote_code:
            return _instantiate_model()

        from transformers import AutoModel, AutoTokenizer
        from transformers.modeling_utils import PreTrainedModel

        original_from_pretrained = AutoTokenizer.from_pretrained
        original_model_from_pretrained = AutoModel.from_pretrained
        original_pretrained_model_from_pretrained = PreTrainedModel.from_pretrained
        original_pretrained_model_from_config = PreTrainedModel._from_config
        model_refs = {str(local_model_dir), str(local_model_dir.resolve())}

        def _patched_from_pretrained(*args, **kwargs):
            model_ref = args[0] if args else kwargs.get("pretrained_model_name_or_path")
            if model_ref is not None and str(model_ref) in model_refs:
                kwargs.setdefault("fix_mistral_regex", True)
            return original_from_pretrained(*args, **kwargs)

        def _patched_model_from_pretrained(*args, **kwargs):
            model_ref = args[0] if args else kwargs.get("pretrained_model_name_or_path")
            if model_ref is not None and str(model_ref) in model_refs and "torch_dtype" in kwargs:
                kwargs = dict(kwargs)
                if "dtype" not in kwargs:
                    kwargs["dtype"] = kwargs["torch_dtype"]
                kwargs.pop("torch_dtype", None)
            return original_model_from_pretrained(*args, **kwargs)

        original_pretrained_model_from_pretrained_fn = original_pretrained_model_from_pretrained.__func__

        @classmethod
        def _patched_pretrained_model_from_pretrained(cls, *args, **kwargs):
            if "torch_dtype" in kwargs:
                kwargs = dict(kwargs)
                if "dtype" not in kwargs:
                    kwargs["dtype"] = kwargs["torch_dtype"]
                kwargs.pop("torch_dtype", None)
            return original_pretrained_model_from_pretrained_fn(cls, *args, **kwargs)

        original_pretrained_model_from_config_fn = original_pretrained_model_from_config.__func__

        @classmethod
        def _patched_pretrained_model_from_config(cls, *args, **kwargs):
            if "torch_dtype" in kwargs:
                kwargs = dict(kwargs)
                if "dtype" not in kwargs:
                    kwargs["dtype"] = kwargs["torch_dtype"]
                kwargs.pop("torch_dtype", None)
            return original_pretrained_model_from_config_fn(cls, *args, **kwargs)

        AutoTokenizer.from_pretrained = _patched_from_pretrained
        AutoModel.from_pretrained = _patched_model_from_pretrained
        PreTrainedModel.from_pretrained = _patched_pretrained_model_from_pretrained
        PreTrainedModel._from_config = _patched_pretrained_model_from_config
        try:
            return _instantiate_model()
        finally:
            AutoTokenizer.from_pretrained = original_from_pretrained
            AutoModel.from_pretrained = original_model_from_pretrained
            PreTrainedModel.from_pretrained = original_pretrained_model_from_pretrained
            PreTrainedModel._from_config = original_pretrained_model_from_config

    try:
        return _load_with_jina_patch()
    except FileNotFoundError as e:
        if trust_remote_code and "transformers_modules" in str(e):
            log.warning("Cache dinâmico inconsistente detectado: %s", e)
            _clear_hf_dynamic_modules_cache()
            return _load_with_jina_patch()
        raise


def _apply_jina_quantization_if_needed(model: SentenceTransformer, model_id: str) -> SentenceTransformer:
    if model_id != JINA_V3_EMBEDDING_MODEL or JINA_QUANTIZATION == "default":
        return model

    try:
        import torch
        import warnings

        quantized_layers = 0
        for module in model.modules():
            if type(module).__name__ != "ParametrizedLinear":
                continue

            float_linear = torch.nn.Linear(
                module.in_features,
                module.out_features,
                bias=module.bias is not None,
            )
            with torch.no_grad():
                float_linear.weight.copy_(module.weight.detach().to(torch.float32))
                if module.bias is not None:
                    float_linear.bias.copy_(module.bias.detach().to(torch.float32))

            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=DeprecationWarning)
                quantized_linear = torch.quantization.quantize_dynamic(
                    torch.nn.Sequential(float_linear),
                    {torch.nn.Linear},
                    dtype=torch.qint8,
                )[0]

            module._dynamic_int8_linear = quantized_linear

            def _forward_dynamic_int8(self, input, task_id=None, residual=False):
                out = self._dynamic_int8_linear(input)
                if residual:
                    return out, input
                return out

            module.forward = _forward_dynamic_int8.__get__(module, module.__class__)
            quantized_layers += 1

        if quantized_layers == 0:
            log.warning("Nenhuma camada ParametrizedLinear encontrada para dynamic-int8 no Jina.")
            return model

        log.info("Quantizacao Jina aplicada: dynamic-int8 (CPU, %s camadas).", quantized_layers)
        return model
    except Exception as quant_error:
        log.warning("Falha ao aplicar dynamic-int8 no Jina (%s); usando modelo padrao.", quant_error)
        return model


def get_embedding_model(model_choice: str) -> SentenceTransformer:
    if model_choice in _models:
        return _models[model_choice]

    if model_choice in _model_load_errors:
        raise RuntimeError(_model_load_errors[model_choice])

    if model_choice == "jina":
        model_id = JINA_V3_EMBEDDING_MODEL
    elif model_choice == "jina_v2":
        model_id = JINA_V2_EMBEDDING_MODEL
    elif model_choice == "bge":
        model_id = BGE_EMBEDDING_MODEL
    else:
        raise RuntimeError(f"Modelo não suportado: {model_choice}")

    try:
        MODEL_DIR.mkdir(parents=True, exist_ok=True)
        preferred_model_cache_dir = _model_cache_dir(MODEL_DIR, model_id)
        log.info("Carregando embeddings '%s' em CPU (cache: %s)", model_id, preferred_model_cache_dir)

        selection = download_model_with_fallback(
            preferred_model_id=model_id,
            fallback_model_id=model_id,
            local_dir=MODEL_DIR,
        )
        model = _load_sentence_transformer_from_local(selection.model_id, selection.local_dir)
        if model_choice == "jina":
            model = _apply_jina_quantization_if_needed(model, selection.model_id)

        _models[model_choice] = model
        log.info(
            "Modelo de embeddings pronto: %s (provider=%s, path=%s)",
            selection.model_id,
            selection.provider,
            selection.local_dir,
        )
        return model
    except Exception as e:
        message = f"Falha ao carregar modelo '{model_choice}' ({model_id}): {e}"
        _model_load_errors[model_choice] = message
        raise RuntimeError(message)


def get_reranker() -> CrossEncoder | None:
    global _reranker, _reranker_error

    if not RERANK_ENABLED:
        return None
    if _reranker is not None:
        return _reranker
    if _reranker_error is not None:
        return None

    try:
        MODEL_DIR.mkdir(parents=True, exist_ok=True)
        selection = download_model_with_fallback(
            preferred_model_id=RERANK_MODEL_ID,
            fallback_model_id=RERANK_MODEL_ID,
            local_dir=MODEL_DIR,
        )

        reranker = CrossEncoder(
            str(selection.local_dir),
            device="cpu",
            max_length=RERANKER_MAX_LENGTH,
            trust_remote_code=False,
        )

        if RERANKER_QUANTIZATION == "dynamic-int8":
            try:
                import torch

                reranker.model = torch.quantization.quantize_dynamic(
                    reranker.model,
                    {torch.nn.Linear},
                    dtype=torch.qint8,
                )
                log.info("Reranker com quantizacao dynamic-int8 habilitada.")
            except Exception as quant_error:
                log.warning("Falha ao quantizar reranker (%s). Seguindo sem quantizacao.", quant_error)

        _reranker = reranker
        log.info(
            "Reranker pronto: %s (provider=%s, path=%s)",
            selection.model_id,
            selection.provider,
            selection.local_dir,
        )
        return _reranker
    except Exception as e:
        _reranker_error = str(e)
        log.warning("Reranker indisponível. Busca seguirá sem reranking. Erro: %s", e)
        return None


def get_splitter() -> RecursiveCharacterTextSplitter:
    global _splitter
    if _splitter is None:
        _splitter = RecursiveCharacterTextSplitter(
            chunk_size=CHUNK_SIZE,
            chunk_overlap=CHUNK_OVERLAP,
            length_function=len,
            separators=["\n\n", "\n", " ", ""],
        )
    return _splitter


# ---------------------------------------------------------------------------
# Indexação interna
# ---------------------------------------------------------------------------


def _make_chunk_id(file_path: str, chunk_index: int) -> str:
    raw = f"{file_path}::chunk::{chunk_index}"
    return hashlib.md5(raw.encode()).hexdigest()


def _make_result_key(metadata: dict[str, object], fallback_id: str) -> str:
    file_path = str(metadata.get("file_path", ""))
    chunk_index = str(metadata.get("chunk_index", ""))
    if file_path and chunk_index:
        return f"{file_path}::chunk::{chunk_index}"
    return fallback_id


def _delete_file_chunks(collection: chromadb.Collection, file_path: str) -> int:
    # Pede apenas IDs para não materializar documentos/metadata desnecessários em RAM.
    results = collection.get(where={"file_path": file_path}, include=[])
    ids = results.get("ids", []) if results else []
    if ids:
        collection.delete(ids=ids)
    return len(ids)


def _read_file_safe(filepath: Path) -> str | None:
    for encoding in ("utf-8", "latin-1", "cp1252"):
        try:
            return filepath.read_text(encoding=encoding)
        except UnicodeDecodeError:
            continue
        except OSError:
            return None
    return None


def _scan_folder(folder_path: Path) -> Iterator[Path]:
    for dirpath, dirnames, filenames in os.walk(folder_path):
        dirnames[:] = [
            d for d in dirnames
            if d not in IGNORED_DIRS and not d.startswith(".")
        ]
        dirnames.sort()
        for filename in sorted(filenames):
            fp = Path(dirpath) / filename
            if fp.suffix.lower() in IGNORED_EXTENSIONS:
                continue
            try:
                if fp.stat().st_size > MAX_FILE_SIZE_BYTES:
                    continue
            except OSError:
                continue
            yield fp


def _classify_file_targets(filepath: Path) -> list[BranchSpec]:
    suffix = filepath.suffix.lower()
    is_code = suffix in CODE_EXTENSIONS
    is_doc = suffix in DOC_EXTENSIONS

    if is_code and not is_doc:
        return [BRANCH_SPECS["jina_code"]]
    if is_doc and not is_code:
        return [BRANCH_SPECS["bge_doc"]]

    # Arquivos ambíguos/extensão desconhecida: indexa em ambas para não perder recall.
    return [BRANCH_SPECS["jina_code"], BRANCH_SPECS["bge_doc"]]


def _index_single_file_for_branch(
    filepath: Path,
    branch: BranchSpec,
    splitter: RecursiveCharacterTextSplitter,
    *,
    delete_existing: bool = True,
) -> int:
    content = _read_file_safe(filepath)
    if not content or not content.strip():
        return 0

    abs_path = str(filepath.resolve())
    model = get_embedding_model(branch.model_choice)
    collection = get_chroma_collection(branch.collection_name)

    chunks = splitter.split_text(content)
    if not chunks:
        return 0

    # Atualização idempotente por arquivo em cada coleção.
    if delete_existing:
        _delete_file_chunks(collection, abs_path)

    inserted_chunks = 0
    batch_ids: list[str] = []
    batch_docs: list[str] = []
    batch_metadatas: list[dict[str, object]] = []

    def _flush_batch() -> None:
        nonlocal inserted_chunks
        if not batch_ids:
            return
        embeddings = model.encode(
            batch_docs,
            show_progress_bar=False,
            batch_size=EMBEDDING_BATCH_SIZE,
        ).tolist()
        collection.upsert(
            ids=batch_ids,
            embeddings=embeddings,
            documents=batch_docs,
            metadatas=batch_metadatas,
        )
        inserted_chunks += len(batch_ids)
        del embeddings
        batch_ids.clear()
        batch_docs.clear()
        batch_metadatas.clear()

    for i, chunk in enumerate(chunks):
        batch_ids.append(_make_chunk_id(abs_path, i))
        batch_docs.append(chunk)
        batch_metadatas.append(
            {
                "file_path": abs_path,
                "file_name": filepath.name,
                "chunk_index": i,
                "source_collection": branch.collection_name,
                "source_model_choice": branch.model_choice,
                "source_model_id": branch.model_id,
                "content_domain": branch.content_domain,
            }
        )
        if len(batch_ids) >= EMBEDDING_BATCH_SIZE:
            _flush_batch()

    _flush_batch()
    return inserted_chunks


def _remove_file_from_all_collections(abs_path: str) -> tuple[dict[str, int], list[str]]:
    deleted_per_branch: dict[str, int] = {}
    errors: list[str] = []

    for branch in BRANCH_SPECS.values():
        try:
            collection = get_chroma_collection(branch.collection_name)
            deleted = _delete_file_chunks(collection, abs_path)
            deleted_per_branch[branch.key] = deleted
        except Exception as e:
            errors.append(f"{branch.key}: {e}")
    return deleted_per_branch, errors


# ---------------------------------------------------------------------------
# Busca semântica híbrida
# ---------------------------------------------------------------------------


def _query_branch(branch: BranchSpec, query: str, n_results: int) -> tuple[list[RetrievedHit], str | None]:
    try:
        collection = get_chroma_collection(branch.collection_name)
        model = get_embedding_model(branch.model_choice)
    except Exception as e:
        return [], f"{branch.key}: recurso indisponível ({e})"

    try:
        query_embedding = model.encode([query], show_progress_bar=False).tolist()
        results = collection.query(
            query_embeddings=query_embedding,
            n_results=n_results,
            include=["documents", "metadatas", "distances"],
        )
    except Exception as e:
        return [], f"{branch.key}: falha na query ({e})"

    documents = results.get("documents", [[]])[0]
    metadatas = results.get("metadatas", [[]])[0]
    distances = results.get("distances", [[]])[0]
    ids = results.get("ids", [[]])[0]

    hits: list[RetrievedHit] = []
    for idx, (doc, meta, dist) in enumerate(zip(documents, metadatas, distances), start=1):
        metadata = meta or {}
        fallback_id = ids[idx - 1] if idx - 1 < len(ids) else f"{branch.key}:{idx}"
        key = _make_result_key(metadata, fallback_id)

        similarity = None
        if dist is not None:
            try:
                similarity = 1.0 - float(dist)
            except Exception:
                similarity = None

        hits.append(
            RetrievedHit(
                key=key,
                document=(doc or ""),
                metadata=metadata,
                distance=float(dist) if dist is not None else None,
                similarity=similarity,
                branch=branch,
                rank=idx,
            )
        )

    return hits, None


def _rrf_fuse(hits_by_branch: dict[str, list[RetrievedHit]], top_limit: int) -> list[FusedHit]:
    fused: dict[str, FusedHit] = {}

    for branch_key, hits in hits_by_branch.items():
        _ = branch_key
        for rank, hit in enumerate(hits, start=1):
            contribution = 1.0 / (RRF_K + rank)
            entry = fused.get(hit.key)

            if entry is None:
                entry = FusedHit(
                    key=hit.key,
                    document=hit.document,
                    metadata=dict(hit.metadata),
                    rrf_score=0.0,
                    source_details={},
                )
                fused[hit.key] = entry

            entry.rrf_score += contribution
            entry.source_details[hit.branch.key] = {
                "rank": rank,
                "distance": hit.distance,
                "similarity": hit.similarity,
                "collection": hit.branch.collection_name,
                "model_choice": hit.branch.model_choice,
                "model_id": hit.branch.model_id,
                "content_domain": hit.branch.content_domain,
            }

            # Usa metadados do hit com melhor similaridade local como base principal.
            current_sim = entry.metadata.get("_best_similarity", -10.0)
            candidate_sim = hit.similarity if hit.similarity is not None else -10.0
            if candidate_sim > current_sim:
                entry.document = hit.document
                entry.metadata = dict(hit.metadata)
                entry.metadata["_best_similarity"] = candidate_sim

    fused_hits = list(fused.values())
    fused_hits.sort(key=lambda h: h.rrf_score, reverse=True)

    # Limita o pool antes do reranking para reduzir CPU/RAM.
    return fused_hits[:top_limit]


def _apply_rerank(query: str, fused_hits: list[FusedHit], top_k: int) -> tuple[list[FusedHit], bool, str | None]:
    if not fused_hits:
        return [], False, None

    reranker = get_reranker()
    if reranker is None:
        reason = _reranker_error if _reranker_error else "reranker_desabilitado"
        return fused_hits[:top_k], False, reason

    try:
        pairs = [(query, hit.document) for hit in fused_hits]
        scores = reranker.predict(pairs, show_progress_bar=False, convert_to_numpy=True)

        for hit, score in zip(fused_hits, scores):
            hit.rerank_score = float(score)

        fused_hits.sort(
            key=lambda h: (
                h.rerank_score if h.rerank_score is not None else -1e9,
                h.rrf_score,
            ),
            reverse=True,
        )
        return fused_hits[:top_k], True, None
    except Exception as e:
        return fused_hits[:top_k], False, str(e)


def _format_similarity(similarity: float | None) -> str:
    if similarity is None:
        return "n/a"
    return f"{round(similarity * 100, 1)}%"


def _format_fused_results(
    *,
    query: str,
    mode: str,
    hits: list[FusedHit],
    branch_errors: list[str],
    rerank_applied: bool,
    rerank_error: str | None,
) -> str:
    if not hits:
        msg = "Nenhum resultado encontrado. As coleções podem estar vazias."
        if branch_errors:
            msg += "\nFalhas detectadas: " + " | ".join(branch_errors)
        return msg

    lines: list[str] = [f"# Resultados para: '{query}'", f"**Modo:** {mode}"]

    if branch_errors:
        lines.append("**Avisos de branch:** " + " | ".join(branch_errors))

    if mode == "ensemble":
        if rerank_applied:
            lines.append(f"**Reranking:** ativo ({RERANK_MODEL_ID})")
        else:
            lines.append(f"**Reranking:** indisponível ({rerank_error or 'sem detalhes'})")

    lines.append("")

    for idx, hit in enumerate(hits, start=1):
        metadata = dict(hit.metadata)
        metadata.pop("_best_similarity", None)

        file_path = str(metadata.get("file_path", "desconhecido"))
        chunk_index = metadata.get("chunk_index", "?")
        file_name = str(metadata.get("file_name", Path(file_path).name if file_path != "desconhecido" else "?"))

        source_models = sorted({str(v.get("model_choice", "?")) for v in hit.source_details.values()})
        source_collections = sorted({str(v.get("collection", "?")) for v in hit.source_details.values()})

        source_parts: list[str] = []
        for source_key, details in sorted(
            hit.source_details.items(),
            key=lambda item: int(item[1].get("rank", 999999)),
        ):
            source_parts.append(
                f"{source_key}(rank={details.get('rank')}, sim={_format_similarity(details.get('similarity'))})"
            )

        snippet = hit.document.strip()
        if len(snippet) > 800:
            snippet = snippet[:800] + "\n... [truncado]"

        score_line = f"RRF={hit.rrf_score:.4f}"
        if hit.rerank_score is not None:
            score_line += f" | rerank={hit.rerank_score:.4f}"

        lines.append(f"## [{idx}] {file_path}")
        lines.append(f"**Scores:** {score_line}")
        lines.append(f"**Fontes de recuperação:** {', '.join(source_parts)}")
        lines.append(
            "**Metadados unificados:** "
            f"file_name={file_name} | chunk_index={chunk_index} | "
            f"source_models={source_models} | source_collections={source_collections}"
        )
        lines.append("")
        lines.append(f"```\n{snippet}\n```")
        lines.append("")

    return "\n".join(lines)


def _run_single_mode(query: str, top_k: int) -> tuple[list[FusedHit], list[str], bool, str | None]:
    primary_branch = BRANCH_SPECS[DEFAULT_SINGLE_BRANCH_KEY]

    hits, error = _query_branch(primary_branch, query, top_k)
    errors: list[str] = []
    if error:
        errors.append(error)

    # Fallback automático para a branch alternativa, preservando disponibilidade.
    if not hits:
        fallback_branch_key = "bge_doc" if primary_branch.key == "jina_code" else "jina_code"
        fallback_hits, fallback_error = _query_branch(BRANCH_SPECS[fallback_branch_key], query, top_k)
        if fallback_error:
            errors.append(fallback_error)
        if fallback_hits:
            hits = fallback_hits

    if not hits:
        return [], errors, False, None

    fused = _rrf_fuse({"single": hits}, top_k)
    return fused, errors, False, None


def _run_ensemble_mode(query: str, top_k: int) -> tuple[list[FusedHit], list[str], bool, str | None]:
    per_branch_k = min(MAX_QUERY_RESULTS, max(top_k * 2, top_k))
    branches = [BRANCH_SPECS["jina_code"], BRANCH_SPECS["bge_doc"]]

    hits_by_branch: dict[str, list[RetrievedHit]] = {}
    branch_errors: list[str] = []

    with ThreadPoolExecutor(max_workers=len(branches)) as executor:
        futures = {
            executor.submit(_query_branch, branch, query, per_branch_k): branch
            for branch in branches
        }
        for future in as_completed(futures):
            branch = futures[future]
            try:
                hits, error = future.result()
                if error:
                    branch_errors.append(error)
                if hits:
                    hits_by_branch[branch.key] = hits
            except Exception as e:
                branch_errors.append(f"{branch.key}: falha inesperada ({e})")

    if not hits_by_branch:
        return [], branch_errors, False, None

    candidate_pool = min(RERANK_MAX_CANDIDATES, max(top_k, top_k * RERANK_CANDIDATE_MULTIPLIER))
    fused_candidates = _rrf_fuse(hits_by_branch, candidate_pool)
    reranked_hits, rerank_applied, rerank_error = _apply_rerank(query, fused_candidates, top_k)
    return reranked_hits, branch_errors, rerank_applied, rerank_error


# ---------------------------------------------------------------------------
# Servidor MCP via FastMCP
# ---------------------------------------------------------------------------

mcp = FastMCP(
    name="rag-codebase",
    instructions=(
        "Servidor RAG para busca semântica em código-fonte local com suporte a ensemble híbrido. "
        "No modo hybrid, a branch de código usa Jina v2 e a de documentação usa BGE. "
        "Use semantic_search_code(query, top_k, mode='ensemble') para combinar Jina+BGE com RRF e reranking. "
        "Use update_file_index após editar um arquivo para manter as duas coleções sincronizadas. "
        "Use index_specific_folder para indexação recursiva sob demanda."
    ),
)


# ---------------------------------------------------------------------------
# Tool 1: semantic_search_code
# ---------------------------------------------------------------------------

@mcp.tool()
def semantic_search_code(query: str, top_k: int = TOP_K_RESULTS, mode: str = SEARCH_MODE_DEFAULT) -> str:
    """
    Busca semântica no índice vetorial local.

    Modos:
    - single: usa apenas uma branch (Jina/BGE conforme MCP_EMBEDDING_MODEL; no hybrid, Jina v2).
    - ensemble: consulta em paralelo code_vectors_jina + doc_vectors_bge,
      faz fusão via Reciprocal Rank Fusion (RRF) e reranking leve.

    Args:
        query: Descrição do que procurar.
        top_k: Quantidade final de resultados.
        mode: "single" (padrão) ou "ensemble".

    Returns:
        Resultado textual formatado para consumo pelo LLM.
    """
    raw_query = (query or "").strip()
    search_mode = (mode or SEARCH_MODE_DEFAULT).strip().lower()

    _log_tool_usage(
        event="tool_call_start",
        tool_name="semantic_search_code",
        details={
            "query_preview": _safe_preview(raw_query),
            "query_len": len(raw_query),
            "top_k": top_k,
            "mode": search_mode,
        },
    )

    if not raw_query:
        _log_tool_usage(
            event="tool_call_end",
            tool_name="semantic_search_code",
            details={"status": "error", "reason": "empty_query"},
        )
        return "Erro: a query não pode ser vazia."

    top_k = max(1, min(top_k, 20))
    if search_mode not in {"single", "ensemble"}:
        _log_tool_usage(
            event="tool_call_end",
            tool_name="semantic_search_code",
            details={"status": "error", "reason": "invalid_mode", "mode": search_mode},
        )
        return "Erro: mode inválido. Use 'single' ou 'ensemble'."

    try:
        if search_mode == "ensemble":
            hits, branch_errors, rerank_applied, rerank_error = _run_ensemble_mode(raw_query, top_k)
        else:
            hits, branch_errors, rerank_applied, rerank_error = _run_single_mode(raw_query, top_k)

        result_text = _format_fused_results(
            query=raw_query,
            mode=search_mode,
            hits=hits,
            branch_errors=branch_errors,
            rerank_applied=rerank_applied,
            rerank_error=rerank_error,
        )

        _log_tool_usage(
            event="tool_call_end",
            tool_name="semantic_search_code",
            details={
                "status": "ok",
                "mode": search_mode,
                "result_count": len(hits),
                "branch_errors": len(branch_errors),
                "rerank_applied": rerank_applied,
            },
        )
        return result_text
    except Exception as e:
        _log_tool_usage(
            event="tool_call_end",
            tool_name="semantic_search_code",
            details={"status": "error", "reason": "search_failed", "error": str(e), "mode": search_mode},
        )
        return f"Erro ao executar busca semântica ({search_mode}): {e}"


# ---------------------------------------------------------------------------
# Tool 2: update_file_index
# ---------------------------------------------------------------------------

@mcp.tool()
def update_file_index(file_path: str) -> str:
    """
    Atualiza o índice RAG para um arquivo específico.

    O arquivo é classificado como código/doc e indexado na coleção apropriada.
    Para extensões ambíguas, indexa em ambas as coleções.
    """
    filepath = Path(file_path).resolve()
    abs_path = str(filepath)

    _log_tool_usage(
        event="tool_call_start",
        tool_name="update_file_index",
        details={"file_path": abs_path},
    )

    if not filepath.exists():
        _log_tool_usage(
            event="tool_call_end",
            tool_name="update_file_index",
            details={"status": "error", "reason": "file_not_found", "file_path": abs_path},
        )
        return f"Erro: arquivo não encontrado: {filepath}"

    if not filepath.is_file():
        _log_tool_usage(
            event="tool_call_end",
            tool_name="update_file_index",
            details={"status": "error", "reason": "not_a_file", "file_path": abs_path},
        )
        return f"Erro: o caminho não aponta para um arquivo: {filepath}"

    if filepath.stat().st_size > MAX_FILE_SIZE_BYTES:
        _log_tool_usage(
            event="tool_call_end",
            tool_name="update_file_index",
            details={"status": "error", "reason": "file_too_large", "file_path": abs_path},
        )
        return f"Erro: arquivo muito grande (>{MAX_FILE_SIZE_BYTES // 1024}KB): {filepath}"

    splitter = get_splitter()
    targets = _classify_file_targets(filepath)

    deleted_per_branch, deletion_errors = _remove_file_from_all_collections(abs_path)

    inserted_per_branch: dict[str, int] = {}
    index_errors: list[str] = []
    for branch in targets:
        try:
            inserted = _index_single_file_for_branch(
                filepath,
                branch,
                splitter,
                delete_existing=False,  # já removido em todas as coleções acima
            )
            inserted_per_branch[branch.key] = inserted
        except Exception as e:
            index_errors.append(f"{branch.key}: {e}")

    success_branches = [k for k, v in inserted_per_branch.items() if v > 0]

    details = {
        "status": "ok" if success_branches else "error",
        "file_path": abs_path,
        "targets": [b.key for b in targets],
        "deleted_per_branch": deleted_per_branch,
        "inserted_per_branch": inserted_per_branch,
        "deletion_errors": len(deletion_errors),
        "index_errors": len(index_errors),
    }
    _log_tool_usage(event="tool_call_end", tool_name="update_file_index", details=details)

    if not success_branches and index_errors:
        return (
            "Erro: não foi possível reindexar o arquivo em nenhuma coleção.\n"
            f"Arquivo: {filepath}\n"
            "Falhas: " + " | ".join(index_errors)
        )

    lines = [
        "Arquivo reindexado.",
        f"  Arquivo : {filepath}",
        f"  Coleções alvo: {[b.collection_name for b in targets]}",
        f"  Remoções por coleção: {deleted_per_branch}",
        f"  Inserções por coleção: {inserted_per_branch}",
    ]
    if deletion_errors:
        lines.append("  Avisos na remoção: " + " | ".join(deletion_errors))
    if index_errors:
        lines.append("  Avisos na indexação: " + " | ".join(index_errors))
    return "\n".join(lines)


# ---------------------------------------------------------------------------
# Tool 3: delete_file_index
# ---------------------------------------------------------------------------

@mcp.tool()
def delete_file_index(file_path: str) -> str:
    """
    Remove um arquivo do índice em todas as coleções gerenciadas.
    """
    filepath = Path(file_path).resolve()
    abs_path = str(filepath)

    _log_tool_usage(
        event="tool_call_start",
        tool_name="delete_file_index",
        details={"file_path": abs_path},
    )

    deleted_per_branch, errors = _remove_file_from_all_collections(abs_path)
    total_deleted = sum(deleted_per_branch.values())

    _log_tool_usage(
        event="tool_call_end",
        tool_name="delete_file_index",
        details={
            "status": "ok" if total_deleted > 0 else "warning",
            "file_path": abs_path,
            "deleted_per_branch": deleted_per_branch,
            "errors": len(errors),
        },
    )

    if total_deleted == 0:
        base = f"Nenhum chunk encontrado para o arquivo: {abs_path}"
        if errors:
            base += "\nFalhas parciais: " + " | ".join(errors)
        return base

    out = [
        "Removido do índice com sucesso.",
        f"  Arquivo : {abs_path}",
        f"  Deleções por coleção: {deleted_per_branch}",
    ]
    if errors:
        out.append("  Avisos: " + " | ".join(errors))
    return "\n".join(out)


# ---------------------------------------------------------------------------
# Tool 4: index_specific_folder
# ---------------------------------------------------------------------------

@mcp.tool()
def index_specific_folder(folder_path: str) -> str:
    """
    Indexa recursivamente uma pasta em coleções separadas por domínio.
    """
    folder = Path(folder_path).resolve()

    _log_tool_usage(
        event="tool_call_start",
        tool_name="index_specific_folder",
        details={"folder_path": str(folder)},
    )

    if not folder.exists():
        _log_tool_usage(
            event="tool_call_end",
            tool_name="index_specific_folder",
            details={"status": "error", "reason": "folder_not_found", "folder_path": str(folder)},
        )
        return f"Erro: pasta não encontrada: {folder}"

    if not folder.is_dir():
        _log_tool_usage(
            event="tool_call_end",
            tool_name="index_specific_folder",
            details={"status": "error", "reason": "not_a_folder", "folder_path": str(folder)},
        )
        return f"Erro: o caminho não é um diretório: {folder}"

    splitter = get_splitter()

    processed_files = 0
    branch_file_counts = {key: 0 for key in BRANCH_SPECS}
    branch_chunk_counts = {key: 0 for key in BRANCH_SPECS}
    error_count = 0
    error_samples: list[str] = []

    for filepath in _scan_folder(folder):
        processed_files += 1
        targets = _classify_file_targets(filepath)

        for branch in targets:
            try:
                n_chunks = _index_single_file_for_branch(filepath, branch, splitter)
                branch_file_counts[branch.key] += 1
                branch_chunk_counts[branch.key] += n_chunks
            except Exception as e:
                error_count += 1
                if len(error_samples) < 10:
                    error_samples.append(f"{filepath.name} [{branch.key}]: {e}")

    if processed_files == 0:
        _log_tool_usage(
            event="tool_call_end",
            tool_name="index_specific_folder",
            details={"status": "ok", "folder_path": str(folder), "files_processed": 0, "chunks": 0, "errors": 0},
        )
        return f"Nenhum arquivo indexável encontrado em: {folder}"

    total_chunks = sum(branch_chunk_counts.values())

    _log_tool_usage(
        event="tool_call_end",
        tool_name="index_specific_folder",
        details={
            "status": "ok",
            "folder_path": str(folder),
            "files_processed": processed_files,
            "chunks": total_chunks,
            "errors": error_count,
            "branch_file_counts": branch_file_counts,
            "branch_chunk_counts": branch_chunk_counts,
        },
    )

    report = [
        "Indexação da pasta concluída.",
        f"  Pasta: {folder}",
        f"  Arquivos processados: {processed_files}",
        f"  Total de chunks: {total_chunks}",
        f"  Arquivos por branch: {branch_file_counts}",
        f"  Chunks por branch: {branch_chunk_counts}",
    ]

    if error_count:
        report.append(f"  Erros ({error_count}):")
        for err in error_samples:
            report.append(f"    - {err}")
        if error_count > len(error_samples):
            report.append(f"    ... e mais {error_count - len(error_samples)} erros.")

    return "\n".join(report)


# ---------------------------------------------------------------------------
# Ponto de entrada
# ---------------------------------------------------------------------------

if __name__ == "__main__":
    log.info("Iniciando servidor MCP RAG (stdio)...")
    log.info("ChromaDB: %s:%s", CHROMA_HOST, CHROMA_PORT)
    log.info(
        "Coleções: %s (%s), %s (%s)",
        COLLECTION_CODE_JINA,
        BRANCH_SPECS["jina_code"].model_id,
        COLLECTION_DOC_BGE,
        BRANCH_SPECS["bge_doc"].model_id,
    )
    log.info("Modo padrão de busca: %s", SEARCH_MODE_DEFAULT)
    log.info("Modelo single padrão: %s", BRANCH_SPECS[DEFAULT_SINGLE_BRANCH_KEY].model_id)
    log.info("Quantizacao Jina: %s", JINA_QUANTIZATION)
    log.info("Config de tuning carregada de: %s (found=%s)", INDEXER_CONFIG_PATH, bool(INDEXER_TUNING_CONFIG))
    log.info("Embedding batch size: %s", EMBEDDING_BATCH_SIZE)
    log.info("Chunk params: size=%s overlap=%s", CHUNK_SIZE, CHUNK_OVERLAP)
    log.info("Reranker: %s (enabled=%s, quant=%s)", RERANK_MODEL_ID, RERANK_ENABLED, RERANKER_QUANTIZATION)
    log.info("Pasta de modelos locais: %s", MODEL_DIR)
    log.info("Uso MCP será registrado em: %s", MCP_USAGE_LOG_PATH)

    # Pré-aquece somente conexão Chroma; modelos ficam lazy para poupar RAM.
    try:
        _get_chroma_client()
        get_chroma_collection(COLLECTION_CODE_JINA)
        get_chroma_collection(COLLECTION_DOC_BGE)
        log.info("Conexão Chroma inicializada. Modelos serão carregados sob demanda.")
    except Exception as e:
        log.error("Falha ao inicializar ChromaDB: %s", e)
        log.error("O servidor continuará, mas as ferramentas retornarão erro até o ChromaDB estar disponível.")

    mcp.run(transport="stdio")
"
565
+ B64_MODEL_DL_HF="#!/usr/bin/env python3
from __future__ import annotations

"""
download_model_from_hugginface.py

Camada de download de modelos com prioridade de provedores e fallback de modelo.
Fluxo padrão:
1) tenta baixar o modelo preferido via Hugging Face;
2) se falhar, tenta provedores alternativos (quando disponíveis);
3) se o modelo preferido falhar em todos os provedores, tenta modelo fallback.
"""

from dataclasses import dataclass
import getpass
import os
from pathlib import Path
import shutil
import sys
from typing import Protocol


class ModelDownloadStrategy(Protocol):
    name: str

    def download(self, model_id: str, local_dir: Path) -> None:
        """Baixa model_id para local_dir ou levanta exceção."""


class HuggingFaceDownloadStrategy:
    name = "huggingface"

    def download(self, model_id: str, local_dir: Path) -> None:
        from huggingface_hub import snapshot_download

        hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
        _download_with_hf_token_recovery(
            repo_id=model_id,
            local_dir=local_dir,
            hf_token=hf_token,
            snapshot_download=snapshot_download,
        )


@dataclass(frozen=True)
class DownloadSelection:
    model_id: str
    provider: str
    local_dir: Path


def _load_optional_strategies() -> list[ModelDownloadStrategy]:
    strategies: list[ModelDownloadStrategy] = []

    try:
        from download_model_from_modelscope import ModelScopeDownloadStrategy

        strategies.append(ModelScopeDownloadStrategy())
    except Exception:
        # Provider opcional: ignora se não estiver disponível no ambiente.
        pass

    return strategies


def build_default_strategies() -> list[ModelDownloadStrategy]:
    """Factory simples: ordem de prioridade de provedores de download."""
    return [HuggingFaceDownloadStrategy(), *_load_optional_strategies()]


_MODEL_READY_MARKER = ".download_complete"


def _prepare_destination(local_dir: Path, *, clean: bool) -> None:
    if clean and local_dir.exists():
        shutil.rmtree(local_dir)
    local_dir.mkdir(parents=True, exist_ok=True)


def _model_cache_dir(base_dir: Path, model_id: str) -> Path:
    # Evita colisão de nomes e mantém diretório seguro em qualquer SO.
    safe_name = model_id.replace("/", "__").replace(":", "_")
    return base_dir / safe_name


def _cache_ready(local_dir: Path) -> bool:
    marker = local_dir / _MODEL_READY_MARKER
    if not marker.exists() or not local_dir.exists():
        return False
    return any(p.name != _MODEL_READY_MARKER for p in local_dir.iterdir())


def _mark_cache_ready(local_dir: Path) -> None:
    (local_dir / _MODEL_READY_MARKER).write_text("ok\n", encoding="utf-8")


def _status_code_from_error(exc: Exception) -> int | None:
    response = getattr(exc, "response", None)
    if response is None:
        return None
    status_code = getattr(response, "status_code", None)
    if isinstance(status_code, int):
        return status_code
    return None


def _is_invalid_hf_token_error(exc: Exception) -> bool:
    message = str(exc).lower()
    status_code = _status_code_from_error(exc)
    token_keywords = ("invalid token", "token is invalid", "unauthorized", "401")
    if status_code == 401:
        return True
    return any(keyword in message for keyword in token_keywords)


def _prompt_recover_invalid_hf_token() -> tuple[str, str | None]:
    if not sys.stdin.isatty():
        return ("no-token", None)

    while True:
        print(
            "[!] O token do HuggingFace parece inválido. Escolha: "
            "[1] informar novo token, [2] continuar sem token.",
            file=sys.stderr,
        )
        answer = input("> Escolha [1/2]: ").strip().lower()
        if answer in {"1", "novo", "new"}:
            new_token = getpass.getpass("Cole o novo HF_TOKEN: ").strip()
            if new_token:
                return ("new-token", new_token)
            print("[!] Token vazio. Tente novamente.", file=sys.stderr)
            continue
        if answer in {"2", "", "sem", "no"}:
            return ("no-token", None)
        print("[!] Opção inválida. Digite 1 ou 2.", file=sys.stderr)


def _download_with_hf_token_recovery(
    *,
    repo_id: str,
    local_dir: Path,
    hf_token: str | None,
    snapshot_download,
) -> None:
    attempt_token = hf_token

    while True:
        try:
            snapshot_download(
                repo_id=repo_id,
                local_dir=str(local_dir),
                token=attempt_token,
            )
            if attempt_token:
                os.environ["HF_TOKEN"] = attempt_token
            else:
                os.environ.pop("HF_TOKEN", None)
            return
        except Exception as exc:
            if attempt_token and _is_invalid_hf_token_error(exc):
                print(
                    "[!] Falha de autenticação no HuggingFace com o token atual. "
                    "Você pode informar outro token ou seguir sem token.",
                    file=sys.stderr,
                )
                action, replacement = _prompt_recover_invalid_hf_token()
                if action == "new-token" and replacement:
                    attempt_token = replacement
                    continue
                attempt_token = None
                continue
            raise


def download_model_with_fallback(
    preferred_model_id: str,
    fallback_model_id: str,
    local_dir: Path,
    strategies: list[ModelDownloadStrategy] | None = None,
) -> DownloadSelection:
    """
    Tenta baixar `preferred_model_id`; se falhar em todos os provedores,
    tenta `fallback_model_id`.
    """
    base_dir = local_dir.expanduser()
    base_dir.mkdir(parents=True, exist_ok=True)
    providers = strategies or build_default_strategies()
    errors: list[str] = []

    for model_id in (preferred_model_id, fallback_model_id):
        model_local_dir = _model_cache_dir(base_dir, model_id)
        if _cache_ready(model_local_dir):
            return DownloadSelection(
                model_id=model_id,
                provider="local-cache",
                local_dir=model_local_dir,
            )

        for strategy in providers:
            try:
                print(
                    f"[+] Iniciando download do modelo '{model_id}' via {strategy.name} em: {model_local_dir}",
                    file=sys.stderr,
                )
                _prepare_destination(model_local_dir, clean=True)
                strategy.download(model_id=model_id, local_dir=model_local_dir)
                _mark_cache_ready(model_local_dir)
                return DownloadSelection(
                    model_id=model_id,
                    provider=strategy.name,
                    local_dir=model_local_dir,
                )
            except Exception as exc:
                errors.append(f"{strategy.name}:{model_id}: {exc}")

    raise RuntimeError(
        "Falha no download dos modelos em todos os provedores configurados. "
        + " | ".join(errors)
    )
"
566
+ B64_MODEL_DL_MS="IyEvdXNyL2Jpbi9lbnYgcHl0aG9uMwpmcm9tIF9fZnV0dXJlX18gaW1wb3J0IGFubm90YXRpb25zCgoiIiIKUHJvdmlkZXIgb3BjaW9uYWwgZGUgZG93bmxvYWQgdmlhIE1vZGVsU2NvcGUuClVzYWRvIGFwZW5hcyBzZSBvIHBhY290ZSBgbW9kZWxzY29wZWAgZXN0aXZlciBpbnN0YWxhZG8uCiIiIgoKZnJvbSBwYXRobGliIGltcG9ydCBQYXRoCgoKY2xhc3MgTW9kZWxTY29wZURvd25sb2FkU3RyYXRlZ3k6CiAgICBuYW1lID0gIm1vZGVsc2NvcGUiCgogICAgZGVmIGRvd25sb2FkKHNlbGYsIG1vZGVsX2lkOiBzdHIsIGxvY2FsX2RpcjogUGF0aCkgLT4gTm9uZToKICAgICAgICB0cnk6CiAgICAgICAgICAgIGZyb20gbW9kZWxzY29wZS5odWIuc25hcHNob3RfZG93bmxvYWQgaW1wb3J0IHNuYXBzaG90X2Rvd25sb2FkCiAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBleGM6CiAgICAgICAgICAgIHJhaXNlIFJ1bnRpbWVFcnJvcigKICAgICAgICAgICAgICAgICJQYWNvdGUgYG1vZGVsc2NvcGVgIGluZGlzcG9uw612ZWwgcGFyYSBwcm92aWRlciBhbHRlcm5hdGl2byIKICAgICAgICAgICAgKSBmcm9tIGV4YwoKICAgICAgICBzbmFwc2hvdF9kb3dubG9hZCgKICAgICAgICAgICAgbW9kZWxfaWQ9bW9kZWxfaWQsCiAgICAgICAgICAgIGxvY2FsX2Rpcj1zdHIobG9jYWxfZGlyKSwKICAgICAgICApCg=="
567
+
568
+ decode_to_file "$B64_COMPOSE" "${EXTRACT_DIR}/docker-compose.yml"
569
+ decode_to_file "$B64_REQUIREMENTS" "${EXTRACT_DIR}/requirements.txt"
570
+ decode_to_file "$B64_INDEXER" "${EXTRACT_DIR}/indexer_full.py"
571
+ decode_to_file "$B64_MCP" "${EXTRACT_DIR}/mcp_server.py"
572
+ decode_to_file "$B64_MODEL_DL_HF" "${EXTRACT_DIR}/download_model_from_hugginface.py"
573
+ decode_to_file "$B64_MODEL_DL_MS" "${EXTRACT_DIR}/download_model_from_modelscope.py"
574
+
575
+ log_info "$(t extracted_to): ${EXTRACT_DIR}"
576
+ sed -i '' -E 's/^CHROMA_PORT = 8000$/CHROMA_PORT = _env_int("MCP_CHROMA_PORT", 8000, min_value=1)/' "${EXTRACT_DIR}/indexer_full.py" || true
577
+
578
+ # ---------------------------------------------------------------------------
579
+ # Pré-requisitos
580
+ # ---------------------------------------------------------------------------
581
+ log_info "$(t checking_prereq)"
582
+
583
+ if ! command -v python3 >/dev/null 2>&1; then
584
+ log_error "$(t python_missing)"
585
+ exit 1
586
+ fi
587
+
588
+ PY_VER="$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")"
589
+ PY_MAJOR="$(echo "$PY_VER" | cut -d. -f1)"
590
+ PY_MINOR="$(echo "$PY_VER" | cut -d. -f2)"
591
+ if [[ "$PY_MAJOR" -lt 3 ]] || [[ "$PY_MAJOR" -eq 3 && "$PY_MINOR" -lt 10 ]]; then
592
+ log_error "$(t python_min): $PY_VER"
593
+ exit 1
594
+ fi
595
+ if ! python3 -m venv --help >/dev/null 2>&1; then
596
+ log_error "$(t py_venv_missing)"
597
+ exit 1
598
+ fi
599
+
600
+ if ! command -v docker >/dev/null 2>&1; then
601
+ log_error "$(t docker_missing)"
602
+ exit 1
603
+ fi
604
+ if ! docker info >/dev/null 2>&1; then
605
+ log_error "$(t docker_daemon)"
606
+ exit 1
607
+ fi
608
+ if ! docker compose version >/dev/null 2>&1; then
609
+ log_error "$(t compose_missing)"
610
+ exit 1
611
+ fi
612
+ DOCKER_COMPOSE_CMD="docker compose"
613
+
614
+ if ! command -v curl >/dev/null 2>&1; then
615
+ log_warn "$(t no_curl)"
616
+ HAS_CURL=false
617
+ else
618
+ HAS_CURL=true
619
+ fi
620
+
621
+ log_info "$(t prereq_ok)"
622
+
623
+ if EXISTING_CHROMA_PORT="$(extract_chroma_port_from_compose "${DOCKER_COMPOSE_FILE_PATH}")"; then
624
+ CHROMA_PORT="${EXISTING_CHROMA_PORT}"
625
+ fi
626
+
627
+ reset_rag_environment() {
628
+ log_info "$(t reset_start)"
629
+ rm -rf "${VENV_DIR}" "${RAG_DB_DIR}" "${DOCKER_COMPOSE_DIR}" "${MODEL_CACHE_DIR}"
630
+ rm -f "${MCP_SERVER_DEST}" "${MODEL_DL_HF_DEST}" "${MODEL_DL_MS_DEST}"
631
+ log_info "$(t reset_done)"
632
+ }
633
+
634
+ if [[ "$CHANGE_MODEL" == "true" ]]; then
635
+ if [[ "$ONLY_INDEX" == "true" ]]; then
636
+ ONLY_INDEX=false
637
+ fi
638
+ log_info "$(t change_model_msg)"
639
+ if [[ -t 0 ]]; then
640
+ if ! ask_yes_no_loop "$(t change_model_confirm) $YES_NO_HINT "; then
641
+ log_info "$(t op_cancelled)"
642
+ exit 0
643
+ fi
644
+ else
645
+ log_warn "$(t change_model_noninteractive)"
646
+ fi
647
+ export MCP_FORCE_MODEL_RECONFIG=1
648
+ REINSTALL=true
649
+ reset_rag_environment
650
+ fi
651
+
652
+ prompt_optional_hf_token() {
653
+ if [[ -n "${HF_TOKEN_PROMPTED:-}" ]]; then
654
+ return
655
+ fi
656
+ HF_TOKEN_PROMPTED=1
657
+
658
+ if [[ -n "${HF_TOKEN:-}" || -n "${HUGGING_FACE_HUB_TOKEN:-}" ]]; then
659
+ log_info "$(t hf_detected)"
660
+ return
661
+ fi
662
+
663
+ if [[ ! -t 0 ]]; then
664
+ log_info "$(t hf_noninteractive)"
665
+ return
666
+ fi
667
+
668
+ echo ""
669
+ echo -e "${BOLD}$(t hf_title)${NC}"
670
+ echo -e "${DIM}$(t hf_desc)${NC}"
671
+ if ask_yes_no_loop "$(t hf_prompt_now)"; then
672
+ read -r -s -p "$(t hf_prompt_paste)" INPUT_HF_TOKEN
673
+ echo ""
674
+ if [[ -n "${INPUT_HF_TOKEN}" ]]; then
675
+ export HF_TOKEN="${INPUT_HF_TOKEN}"
676
+ log_info "$(t hf_set)"
677
+ else
678
+ log_warn "$(t hf_empty)"
679
+ fi
680
+ else
681
+ log_info "$(t hf_skip)"
682
+ fi
683
+ }
684
+
685
+ run_indexer_with_diagnostics() {
686
+ local target_project_dir="$1"
687
+ local indexer_status=0
688
+
689
+ local tokenizers_parallelism="${TOKENIZERS_PARALLELISM:-false}"
690
+ local force_model_reconfig="${MCP_FORCE_MODEL_RECONFIG:-0}"
691
+
692
+ set +e
693
+ TOKENIZERS_PARALLELISM="${tokenizers_parallelism}" \
694
+ MCP_FORCE_MODEL_RECONFIG="${force_model_reconfig}" \
695
+ MCP_CHROMA_PORT="${CHROMA_PORT}" \
696
+ "${VENV_PYTHON}" "${EXTRACT_DIR}/indexer_full.py" "${target_project_dir}"
697
+ indexer_status=$?
698
+ set -e
699
+
700
+ if [[ "$indexer_status" -eq 137 ]]; then
701
+ log_error "$(t index_oom_title)"
702
+ elif [[ "$indexer_status" -ne 0 ]]; then
703
+ log_error "$(t index_failed_code): ${indexer_status}"
704
+ fi
705
+
706
+ return "$indexer_status"
707
+ }
708
+
709
+ # ---------------------------------------------------------------------------
710
+ # --only-index
711
+ # ---------------------------------------------------------------------------
712
+ if [[ "$ONLY_INDEX" == "true" ]]; then
713
+ log_info "$(t only_index_mode)"
714
+ if [[ ! -f "${VENV_PYTHON}" ]]; then
715
+ log_error "$(t venv_not_found) ${VENV_DIR}"
716
+ exit 1
717
+ fi
718
+ if [[ ! -d "${PROJECT_DIR}" ]]; then
719
+ log_error "$(t path_not_found): ${PROJECT_DIR}"
720
+ exit 1
721
+ fi
722
+ prompt_optional_hf_token
723
+ log_info "$(t indexing): ${PROJECT_DIR}"
724
+ run_indexer_with_diagnostics "${PROJECT_DIR}"
725
+ log_info "$(t indexing_done)"
726
+ exit 0
727
+ fi
728
+
729
+ # ---------------------------------------------------------------------------
730
+ # Venv + dependências
731
+ # ---------------------------------------------------------------------------
732
+ log_info "$(t section_venv)"
733
+
734
+ DEPS_OK=false
735
+ if [[ "$REINSTALL" == "false" ]] && [[ -f "${VENV_PYTHON}" ]]; then
736
+ if "${VENV_PYTHON}" -c "import chromadb, sentence_transformers, langchain_text_splitters, tqdm, mcp, transformers, sys; sys.exit(0 if int(transformers.__version__.split('.')[0]) < 5 else 1)" 2>/dev/null; then
737
+ log_info "$(t deps_ok)"
738
+ DEPS_OK=true
739
+ fi
740
+ fi
741
+
742
+ if [[ "$DEPS_OK" == "false" ]]; then
743
+ if [[ ! -f "${VENV_PYTHON}" ]]; then
744
+ log_info "$(t creating_venv) ${VENV_DIR}"
745
+ python3 -m venv "${VENV_DIR}"
746
+ log_info "$(t venv_created)"
747
+ fi
748
+
749
+ log_info "$(t upgrading_pip)"
750
+ "${VENV_PIP}" install --upgrade pip
751
+
752
+ log_info "$(t deps_reinstall)"
753
+ "${VENV_PIP}" install --progress-bar on -r "${EXTRACT_DIR}/requirements.txt"
754
+
755
+ log_info "$(t deps_installed)"
756
+ fi
757
+
758
+ # ---------------------------------------------------------------------------
759
+ # ChromaDB
760
+ # ---------------------------------------------------------------------------
761
+ log_info "$(t section_chroma)"
762
+
763
+ mkdir -p "${RAG_DB_DIR}" "${DOCKER_COMPOSE_DIR}"
764
+
765
+ if [[ "$REINSTALL" == "true" ]] || [[ ! -f "${DOCKER_COMPOSE_FILE_PATH}" ]]; then
766
+ if [[ "${CHROMA_PORT_FROM_ENV}" == "true" ]]; then
767
+ CHROMA_PORT="$(choose_chroma_port_for_install "${CHROMA_PORT_DEFAULT}" "${CHROMA_PORT}")"
768
+ else
769
+ CHROMA_PORT="$(choose_chroma_port_for_install "${CHROMA_PORT_DEFAULT}")"
770
+ fi
771
+ cp "${EXTRACT_DIR}/docker-compose.yml" "${DOCKER_COMPOSE_FILE_PATH}"
772
+ sed -i '' -E 's/"[0-9]{1,5}:8000"/"'"${CHROMA_PORT}"':8000"/g' "${DOCKER_COMPOSE_FILE_PATH}"
773
+ sed -i '' -E 's|http://localhost:[0-9]{1,5}/api/v1/heartbeat|http://localhost:'"${CHROMA_PORT}"'/api/v1/heartbeat|g' "${DOCKER_COMPOSE_FILE_PATH}"
774
+ log_info "$(t compose_installed) ${DOCKER_COMPOSE_DIR}"
775
+ log_info "$(t chroma_port_selected) ${CHROMA_PORT}"
776
+ else
777
+ log_info "$(t compose_keep)"
778
+ if EXISTING_CHROMA_PORT="$(extract_chroma_port_from_compose "${DOCKER_COMPOSE_FILE_PATH}")"; then
779
+ CHROMA_PORT="${EXISTING_CHROMA_PORT}"
780
+ fi
781
+ log_info "$(t chroma_port_selected) ${CHROMA_PORT}"
782
+ fi
783
+
784
+ if docker ps --format '{{.Names}}' | grep -q '^chromadb-rag$'; then
785
+ log_info "$(t chroma_running)"
786
+ else
787
+ log_info "$(t chroma_start)"
788
+ (cd "${DOCKER_COMPOSE_DIR}" && $DOCKER_COMPOSE_CMD up -d)
789
+
790
+ log_info "$(t chroma_wait)"
791
+ WAITED=0
792
+ while true; do
793
+ if [[ "$HAS_CURL" == "true" ]] && curl -sf "http://${CHROMA_HOST}:${CHROMA_PORT}/api/v1/heartbeat" >/dev/null 2>&1; then
794
+ log_info "$(t chroma_ready) http://${CHROMA_HOST}:${CHROMA_PORT}"
795
+ break
796
+ fi
797
+ if [[ $WAITED -ge 40 ]]; then
798
+ log_warn "$(t chroma_timeout)"
799
+ break
800
+ fi
801
+ sleep 2
802
+ WAITED=$((WAITED+2))
803
+ done
804
+ fi
805
+
806
+ # ---------------------------------------------------------------------------
807
+ # Instala mcp-rag-server
808
+ # ---------------------------------------------------------------------------
809
+ log_info "$(t section_install_mcp)"
810
+
811
+ mkdir -p "${BIN_DIR}"
812
+
813
+ NEEDS_INSTALL=true
814
+ MCP_WAS_OUTDATED=false
815
+ if [[ -f "${MCP_SERVER_DEST}" ]]; then
816
+ if cmp -s <(tail -n +2 "${MCP_SERVER_DEST}") <(tail -n +2 "${EXTRACT_DIR}/mcp_server.py"); then
817
+ log_info "$(t mcp_keep)"
818
+ else
819
+ MCP_WAS_OUTDATED=true
820
+ log_warn "$(t mcp_outdated)"
821
+ fi
822
+
823
+ if [[ "$REINSTALL" == "true" ]]; then
824
+ NEEDS_INSTALL=true
825
+ elif [[ -t 0 ]]; then
826
+ if [[ "$MCP_WAS_OUTDATED" == "true" ]]; then
827
+ if ask_yes_no_loop "$(t mcp_prompt_update)"; then
828
+ NEEDS_INSTALL=true
829
+ else
830
+ NEEDS_INSTALL=false
831
+ log_info "$(t mcp_skip)"
832
+ fi
833
+ else
834
+ if ask_yes_no_loop "$(t mcp_prompt_reinstall)"; then
835
+ NEEDS_INSTALL=true
836
+ else
837
+ NEEDS_INSTALL=false
838
+ log_info "$(t mcp_skip)"
839
+ fi
840
+ fi
841
+ else
842
+ NEEDS_INSTALL=false
843
+ fi
844
+ fi
845
+
846
+ if [[ "$NEEDS_INSTALL" == "true" ]]; then
847
+ cp "${EXTRACT_DIR}/mcp_server.py" "${MCP_SERVER_DEST}"
848
+ cp "${EXTRACT_DIR}/download_model_from_hugginface.py" "${MODEL_DL_HF_DEST}"
849
+ cp "${EXTRACT_DIR}/download_model_from_modelscope.py" "${MODEL_DL_MS_DEST}"
850
+
851
+ replace_shebang "${MCP_SERVER_DEST}" "${VENV_PYTHON}"
852
+ chmod +x "${MCP_SERVER_DEST}"
853
+
854
+ log_info "$(t mcp_installed): ${MCP_SERVER_DEST}"
855
+ log_info "$(t mod_hf_installed): ${MODEL_DL_HF_DEST}"
856
+ log_info "$(t mod_ms_installed): ${MODEL_DL_MS_DEST}"
857
+ log_info "$(t shebang_set): ${VENV_PYTHON}"
858
+ fi
859
+
860
+ for RC in "${USER_HOME}/.zshrc" "${USER_HOME}/.zprofile" "${USER_HOME}/.bash_profile"; do
861
+ if [[ -f "$RC" ]] && ! grep -qF '.local/bin' "$RC"; then
862
+ echo "" >> "$RC"
863
+ echo '# RAG setup — add local bin to PATH' >> "$RC"
864
+ echo 'export PATH="$HOME/.local/bin:$PATH"' >> "$RC"
865
+ log_info "$(t path_added) $RC"
866
+ fi
867
+ done
868
+
869
+ # ---------------------------------------------------------------------------
870
+ # Configuração MCP opcional
871
+ # ---------------------------------------------------------------------------
872
+ log_info "$(t section_mcp_cfg)"
873
+
874
+ CLAUDE_JSON="${USER_HOME}/.claude.json"
875
+ CURSOR_MCP_JSON_1="${USER_HOME}/.cursor/mcp.json"
876
+ CURSOR_MCP_JSON_2="${USER_HOME}/Library/Application Support/Cursor/User/mcp.json"
877
+ MCP_VERSION="$(detect_current_mcp_version)"
878
+
879
+ TARGET_CONFIGS=()
880
+ TARGET_LABELS=()
881
+
882
+ _append_target() {
883
+ local cfg="$1"
884
+ local label="$2"
885
+ for existing in "${TARGET_CONFIGS[@]:-}"; do
886
+ if [[ "$existing" == "$cfg" ]]; then
887
+ return 0
888
+ fi
889
+ done
890
+ TARGET_CONFIGS+=("$cfg")
891
+ TARGET_LABELS+=("$label")
892
+ }
893
+
894
+ if [[ -f "${CLAUDE_JSON}" ]]; then
895
+ _append_target "${CLAUDE_JSON}" "Claude Code"
896
+ fi
897
+ if [[ -f "${CURSOR_MCP_JSON_1}" ]]; then
898
+ _append_target "${CURSOR_MCP_JSON_1}" "Cursor"
899
+ fi
900
+ if [[ -f "${CURSOR_MCP_JSON_2}" ]]; then
901
+ _append_target "${CURSOR_MCP_JSON_2}" "Cursor"
902
+ fi
903
+
904
+ if [[ "${#TARGET_CONFIGS[@]}" -eq 0 ]]; then
905
+ log_info "$(t no_cfg_files)"
906
+ else
907
+ PENDING_CONFIGS=()
908
+ PENDING_LABELS=()
909
+
910
+ for i in "${!TARGET_CONFIGS[@]}"; do
911
+ CFG_PATH="${TARGET_CONFIGS[$i]}"
912
+ CFG_LABEL="${TARGET_LABELS[$i]}"
913
+ CHECK_RESULT=$(
914
+ python3 - "${CFG_PATH}" "${MCP_SERVER_DEST}" "${MCP_VERSION}" "${CHROMA_PORT}" <<'PYEOF'
915
+ import json
916
+ import sys
917
+ from pathlib import Path
918
+
919
+ cfg_path = Path(sys.argv[1]).expanduser()
920
+ mcp_server_command = sys.argv[2]
921
+ mcp_version = sys.argv[3]
922
+ chroma_port = sys.argv[4]
923
+
924
+ try:
925
+ data = json.loads(cfg_path.read_text(encoding="utf-8"))
926
+ except Exception:
927
+ print("needs_update")
928
+ sys.exit(0)
929
+
930
+ if not isinstance(data, dict):
931
+ print("needs_update")
932
+ sys.exit(0)
933
+
934
+ mcp_servers = data.get("mcpServers")
935
+ if mcp_servers is None:
936
+ mcp_servers = {}
937
+ if not isinstance(mcp_servers, dict):
938
+ print("needs_update")
939
+ sys.exit(0)
940
+
941
+ desired = {
942
+ "command": mcp_server_command,
943
+ "args": [],
944
+ "env": {
945
+ "CHROMA_HOST": "localhost",
946
+ "CHROMA_PORT": chroma_port,
947
+ "TOKENIZERS_PARALLELISM": "false",
948
+ },
949
+ "version": mcp_version,
950
+ }
951
+
952
+ def is_rag_server_entry(value):
953
+ if not isinstance(value, dict):
954
+ return False
955
+ cmd = value.get("command")
956
+ return isinstance(cmd, str) and "mcp-rag-server" in cmd
957
+
958
+ if "rag-codebase" in mcp_servers and mcp_servers["rag-codebase"] == desired:
959
+ print("already_up_to_date")
960
+ sys.exit(0)
961
+
962
+ for key, value in mcp_servers.items():
963
+ if key != "rag-codebase" and is_rag_server_entry(value):
964
+ print("needs_update")
965
+ sys.exit(0)
966
+
967
+ print("needs_update")
968
+ PYEOF
969
+ )
970
+
971
+ if [[ "${CHECK_RESULT}" != "already_up_to_date" ]]; then
972
+ PENDING_CONFIGS+=("${CFG_PATH}")
973
+ PENDING_LABELS+=("${CFG_LABEL}")
974
+ fi
975
+ done
976
+
977
+ if [[ "${#PENDING_CONFIGS[@]}" -eq 0 ]]; then
978
+ log_info "$(t cfg_all_current)"
979
+ else
980
+ log_info "$(t cfg_detected)"
981
+ for i in "${!PENDING_CONFIGS[@]}"; do
982
+ echo -e " - ${PENDING_LABELS[$i]}: ${PENDING_CONFIGS[$i]}"
983
+ done
984
+
985
+ APPLY_MCP_CONFIG=false
986
+ if [[ -t 0 ]]; then
987
+ if ask_yes_no_loop "$(t ask_apply_cfg)"; then
988
+ APPLY_MCP_CONFIG=true
989
+ fi
990
+ else
991
+ log_info "$(t noninteractive_cfg_skip)"
992
+ fi
993
+
994
+ if [[ "$APPLY_MCP_CONFIG" == "true" ]]; then
995
+ for i in "${!PENDING_CONFIGS[@]}"; do
996
+ CFG_PATH="${PENDING_CONFIGS[$i]}"
997
+ CFG_LABEL="${PENDING_LABELS[$i]}"
998
+
999
+ if ! RESULT=$(
1000
+ python3 - "${CFG_PATH}" "${MCP_SERVER_DEST}" "${MCP_VERSION}" "${CHROMA_PORT}" <<'PYEOF'
1001
+ import json
1002
+ import sys
1003
+ from pathlib import Path
1004
+
1005
+ cfg_path = Path(sys.argv[1]).expanduser()
1006
+ mcp_server_command = sys.argv[2]
1007
+ mcp_version = sys.argv[3]
1008
+ chroma_port = sys.argv[4]
1009
+
1010
+ try:
1011
+ data = json.loads(cfg_path.read_text(encoding="utf-8"))
1012
+ except Exception as exc:
1013
+ print(f"error:json_invalido:{exc}")
1014
+ sys.exit(2)
1015
+
1016
+ if not isinstance(data, dict):
1017
+ print("error:estrutura_invalida")
1018
+ sys.exit(2)
1019
+
1020
+ mcp_servers = data.get("mcpServers")
1021
+ if mcp_servers is None:
1022
+ mcp_servers = {}
1023
+ if not isinstance(mcp_servers, dict):
1024
+ print("error:mcpServers_invalido")
1025
+ sys.exit(2)
1026
+
1027
+ desired = {
1028
+ "command": mcp_server_command,
1029
+ "args": [],
1030
+ "env": {
1031
+ "CHROMA_HOST": "localhost",
1032
+ "CHROMA_PORT": chroma_port,
1033
+ "TOKENIZERS_PARALLELISM": "false",
1034
+ },
1035
+ "version": mcp_version,
1036
+ }
1037
+
1038
+ def is_rag_server_entry(value: object) -> bool:
1039
+ if not isinstance(value, dict):
1040
+ return False
1041
+ cmd = value.get("command")
1042
+ return isinstance(cmd, str) and "mcp-rag-server" in cmd
1043
+
1044
+ if "rag-codebase" in mcp_servers:
1045
+ if mcp_servers["rag-codebase"] == desired:
1046
+ print("ok:already_exists")
1047
+ sys.exit(0)
1048
+ mcp_servers["rag-codebase"] = desired
1049
+ data["mcpServers"] = mcp_servers
1050
+ cfg_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
1051
+ print("ok:updated_rag_codebase")
1052
+ sys.exit(0)
1053
+
1054
+ old_rag_key = None
1055
+ for key, value in mcp_servers.items():
1056
+ if is_rag_server_entry(value):
1057
+ old_rag_key = key
1058
+ break
1059
+
1060
+ if old_rag_key is not None:
1061
+ del mcp_servers[old_rag_key]
1062
+ mcp_servers["rag-codebase"] = desired
1063
+ data["mcpServers"] = mcp_servers
1064
+ cfg_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
1065
+ print(f"ok:replaced_old:{old_rag_key}")
1066
+ sys.exit(0)
1067
+
1068
+ mcp_servers["rag-codebase"] = desired
1069
+ data["mcpServers"] = mcp_servers
1070
+ cfg_path.write_text(json.dumps(data, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
1071
+ print("ok:added")
1072
+ PYEOF
1073
+ ); then
1074
+ log_warn "$(t cannot_update_cfg) ${CFG_LABEL} (${CFG_PATH}): ${RESULT}"
1075
+ continue
1076
+ fi
1077
+
1078
+ case "${RESULT}" in
1079
+ ok:already_exists)
1080
+ log_info "${CFG_LABEL}: $(t already_updated)"
1081
+ ;;
1082
+ ok:updated_rag_codebase)
1083
+ log_info "${CFG_LABEL}: $(t updated_cfg) ${MCP_VERSION}."
1084
+ ;;
1085
+ ok:replaced_old:*)
1086
+ log_info "${CFG_LABEL}: $(t replaced_cfg) ${MCP_VERSION})."
1087
+ ;;
1088
+ ok:added)
1089
+ log_info "${CFG_LABEL}: $(t added_cfg) ${MCP_VERSION})."
1090
+ ;;
1091
+ *)
1092
+ log_warn "${CFG_LABEL}: $(t cannot_update_cfg) -> ${RESULT}"
1093
+ ;;
1094
+ esac
1095
+ done
1096
+ fi
1097
+ fi
1098
+ fi
1099
+
1100
+ # ---------------------------------------------------------------------------
1101
+ # Indexação
1102
+ # ---------------------------------------------------------------------------
1103
+ if [[ "$SKIP_INDEX" == "false" ]]; then
1104
+ if [[ ! -d "${PROJECT_DIR}" ]]; then
1105
+ log_warn "$(t path_not_found): ${PROJECT_DIR}"
1106
+ else
1107
+ prompt_optional_hf_token
1108
+ log_info "$(t indexing): ${PROJECT_DIR}"
1109
+ run_indexer_with_diagnostics "${PROJECT_DIR}"
1110
+ log_info "$(t indexing_done)"
1111
+ fi
1112
+ fi
1113
+
1114
+ # ---------------------------------------------------------------------------
1115
+ # Resumo
1116
+ # ---------------------------------------------------------------------------
1117
+ echo ""
1118
+ echo -e "${BOLD}${GREEN}================================================================${NC}"
1119
+ echo -e "${BOLD}${GREEN} $(t setup_done)${NC}"
1120
+ echo -e "${BOLD}${GREEN}================================================================${NC}"
1121
+ echo ""
1122
+ echo -e " ${GREEN}Venv Python${NC} : ${VENV_DIR}"
1123
+ echo -e " ${GREEN}ChromaDB${NC} : http://${CHROMA_HOST}:${CHROMA_PORT} (Docker Desktop)"
1124
+ echo -e " ${GREEN}Dados${NC} : ${RAG_DB_DIR}"
1125
+ echo -e " ${GREEN}MCP Server${NC} : ${MCP_SERVER_DEST}"
1126
+ echo -e " ${GREEN}Projeto${NC} : ${PROJECT_DIR}"
1127
+ echo ""
1128
+ echo -e " $(t restart_tools)"
1129
+ echo ""