own-rag-cli 0.0.12-snapshot → 0.0.13-snapshot
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bin/mcp_server.py +121 -0
- package/package.json +1 -1
- package/rag-setup-macos.run +2 -2
- package/rag-setup.run +2 -2
package/README.md
CHANGED
package/bin/mcp_server.py
CHANGED
|
@@ -16,6 +16,8 @@ import json
|
|
|
16
16
|
import logging
|
|
17
17
|
import getpass
|
|
18
18
|
import shutil
|
|
19
|
+
import threading
|
|
20
|
+
import time
|
|
19
21
|
from collections.abc import Iterator
|
|
20
22
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
21
23
|
from dataclasses import dataclass
|
|
@@ -392,6 +394,21 @@ MAX_FILE_SIZE_BYTES = 500 * 1024 # 500 KB
|
|
|
392
394
|
TOP_K_RESULTS = 7
|
|
393
395
|
MAX_QUERY_RESULTS = 30
|
|
394
396
|
|
|
397
|
+
WARMUP_ENABLED = os.environ.get("MCP_WARMUP_ENABLED", "true").strip().lower() in {"1", "true", "yes", "on"}
|
|
398
|
+
WARMUP_MODE = os.environ.get("MCP_WARMUP_MODE", "single").strip().lower()
|
|
399
|
+
if WARMUP_MODE not in {"single", "ensemble"}:
|
|
400
|
+
WARMUP_MODE = "single"
|
|
401
|
+
WARMUP_QUERY = os.environ.get("MCP_WARMUP_QUERY", "warmup semantic search")
|
|
402
|
+
WARMUP_TOP_K = max(1, min(_parse_int(os.environ.get("MCP_WARMUP_TOP_K", "1"), 1, min_value=1), 20))
|
|
403
|
+
|
|
404
|
+
KEEPALIVE_ENABLED = os.environ.get("MCP_KEEPALIVE_ENABLED", "true").strip().lower() in {"1", "true", "yes", "on"}
|
|
405
|
+
KEEPALIVE_MODE = os.environ.get("MCP_KEEPALIVE_MODE", "single").strip().lower()
|
|
406
|
+
if KEEPALIVE_MODE not in {"single", "ensemble"}:
|
|
407
|
+
KEEPALIVE_MODE = "single"
|
|
408
|
+
KEEPALIVE_QUERY = os.environ.get("MCP_KEEPALIVE_QUERY", WARMUP_QUERY)
|
|
409
|
+
KEEPALIVE_TOP_K = max(1, min(_parse_int(os.environ.get("MCP_KEEPALIVE_TOP_K", "1"), 1, min_value=1), 20))
|
|
410
|
+
KEEPALIVE_INTERVAL_SEC = _parse_int(os.environ.get("MCP_KEEPALIVE_INTERVAL_SEC", "600"), 600, min_value=60)
|
|
411
|
+
|
|
395
412
|
# Filtros de varredura
|
|
396
413
|
IGNORED_DIRS = {
|
|
397
414
|
".git", "node_modules", "__pycache__", ".venv", "venv", "env",
|
|
@@ -1339,6 +1356,88 @@ def _run_ensemble_mode(query: str, top_k: int) -> tuple[list[FusedHit], list[str
|
|
|
1339
1356
|
return reranked_hits, branch_errors, rerank_applied, rerank_error
|
|
1340
1357
|
|
|
1341
1358
|
|
|
1359
|
+
def _execute_background_search(mode: str, query: str, top_k: int, source: str) -> None:
|
|
1360
|
+
started = time.perf_counter()
|
|
1361
|
+
try:
|
|
1362
|
+
if mode == "ensemble":
|
|
1363
|
+
hits, branch_errors, rerank_applied, rerank_error = _run_ensemble_mode(query, top_k)
|
|
1364
|
+
else:
|
|
1365
|
+
hits, branch_errors, rerank_applied, rerank_error = _run_single_mode(query, top_k)
|
|
1366
|
+
|
|
1367
|
+
elapsed_ms = int((time.perf_counter() - started) * 1000)
|
|
1368
|
+
log.info(
|
|
1369
|
+
"%s concluido: mode=%s top_k=%s elapsed_ms=%s hits=%s branch_errors=%s rerank=%s",
|
|
1370
|
+
source,
|
|
1371
|
+
mode,
|
|
1372
|
+
top_k,
|
|
1373
|
+
elapsed_ms,
|
|
1374
|
+
len(hits),
|
|
1375
|
+
len(branch_errors),
|
|
1376
|
+
rerank_applied,
|
|
1377
|
+
)
|
|
1378
|
+
_log_tool_usage(
|
|
1379
|
+
event="maintenance_call",
|
|
1380
|
+
tool_name=source,
|
|
1381
|
+
details={
|
|
1382
|
+
"status": "ok",
|
|
1383
|
+
"mode": mode,
|
|
1384
|
+
"query_len": len(query),
|
|
1385
|
+
"top_k": top_k,
|
|
1386
|
+
"elapsed_ms": elapsed_ms,
|
|
1387
|
+
"result_count": len(hits),
|
|
1388
|
+
"branch_errors": len(branch_errors),
|
|
1389
|
+
"rerank_applied": rerank_applied,
|
|
1390
|
+
"rerank_error": rerank_error,
|
|
1391
|
+
},
|
|
1392
|
+
)
|
|
1393
|
+
except Exception as e:
|
|
1394
|
+
elapsed_ms = int((time.perf_counter() - started) * 1000)
|
|
1395
|
+
log.warning("%s falhou após %sms: %s", source, elapsed_ms, e)
|
|
1396
|
+
_log_tool_usage(
|
|
1397
|
+
event="maintenance_call",
|
|
1398
|
+
tool_name=source,
|
|
1399
|
+
details={
|
|
1400
|
+
"status": "error",
|
|
1401
|
+
"mode": mode,
|
|
1402
|
+
"query_len": len(query),
|
|
1403
|
+
"top_k": top_k,
|
|
1404
|
+
"elapsed_ms": elapsed_ms,
|
|
1405
|
+
"error": str(e),
|
|
1406
|
+
},
|
|
1407
|
+
)
|
|
1408
|
+
|
|
1409
|
+
|
|
1410
|
+
_keepalive_thread: threading.Thread | None = None
|
|
1411
|
+
_keepalive_stop_event = threading.Event()
|
|
1412
|
+
|
|
1413
|
+
|
|
1414
|
+
def _start_keepalive_thread() -> None:
|
|
1415
|
+
global _keepalive_thread
|
|
1416
|
+
if not KEEPALIVE_ENABLED:
|
|
1417
|
+
log.info("Keepalive desabilitado (MCP_KEEPALIVE_ENABLED=false).")
|
|
1418
|
+
return
|
|
1419
|
+
if _keepalive_thread is not None and _keepalive_thread.is_alive():
|
|
1420
|
+
return
|
|
1421
|
+
|
|
1422
|
+
def _runner() -> None:
|
|
1423
|
+
log.info(
|
|
1424
|
+
"Keepalive iniciado: intervalo=%ss mode=%s top_k=%s",
|
|
1425
|
+
KEEPALIVE_INTERVAL_SEC,
|
|
1426
|
+
KEEPALIVE_MODE,
|
|
1427
|
+
KEEPALIVE_TOP_K,
|
|
1428
|
+
)
|
|
1429
|
+
while not _keepalive_stop_event.wait(KEEPALIVE_INTERVAL_SEC):
|
|
1430
|
+
_execute_background_search(
|
|
1431
|
+
mode=KEEPALIVE_MODE,
|
|
1432
|
+
query=KEEPALIVE_QUERY,
|
|
1433
|
+
top_k=KEEPALIVE_TOP_K,
|
|
1434
|
+
source="semantic_search_keepalive",
|
|
1435
|
+
)
|
|
1436
|
+
|
|
1437
|
+
_keepalive_thread = threading.Thread(target=_runner, name="rag-keepalive", daemon=True)
|
|
1438
|
+
_keepalive_thread.start()
|
|
1439
|
+
|
|
1440
|
+
|
|
1342
1441
|
# ---------------------------------------------------------------------------
|
|
1343
1442
|
# Servidor MCP via FastMCP
|
|
1344
1443
|
# ---------------------------------------------------------------------------
|
|
@@ -1799,6 +1898,16 @@ if __name__ == "__main__":
|
|
|
1799
1898
|
log.info("Reranker: %s (enabled=%s, quant=%s)", RERANK_MODEL_ID, RERANK_ENABLED, RERANKER_QUANTIZATION)
|
|
1800
1899
|
log.info("Pasta de modelos locais: %s", MODEL_DIR)
|
|
1801
1900
|
log.info("Uso MCP será registrado em: %s", MCP_USAGE_LOG_PATH)
|
|
1901
|
+
log.info(
|
|
1902
|
+
"Warmup: enabled=%s mode=%s top_k=%s | Keepalive: enabled=%s interval=%ss mode=%s top_k=%s",
|
|
1903
|
+
WARMUP_ENABLED,
|
|
1904
|
+
WARMUP_MODE,
|
|
1905
|
+
WARMUP_TOP_K,
|
|
1906
|
+
KEEPALIVE_ENABLED,
|
|
1907
|
+
KEEPALIVE_INTERVAL_SEC,
|
|
1908
|
+
KEEPALIVE_MODE,
|
|
1909
|
+
KEEPALIVE_TOP_K,
|
|
1910
|
+
)
|
|
1802
1911
|
|
|
1803
1912
|
# Pré-aquece somente conexão Chroma; modelos ficam lazy para poupar RAM.
|
|
1804
1913
|
try:
|
|
@@ -1810,4 +1919,16 @@ if __name__ == "__main__":
|
|
|
1810
1919
|
log.error("Falha ao inicializar ChromaDB: %s", e)
|
|
1811
1920
|
log.error("O servidor continuará, mas as ferramentas retornarão erro até o ChromaDB estar disponível.")
|
|
1812
1921
|
|
|
1922
|
+
if WARMUP_ENABLED:
|
|
1923
|
+
_execute_background_search(
|
|
1924
|
+
mode=WARMUP_MODE,
|
|
1925
|
+
query=WARMUP_QUERY,
|
|
1926
|
+
top_k=WARMUP_TOP_K,
|
|
1927
|
+
source="semantic_search_warmup",
|
|
1928
|
+
)
|
|
1929
|
+
else:
|
|
1930
|
+
log.info("Warmup desabilitado (MCP_WARMUP_ENABLED=false).")
|
|
1931
|
+
|
|
1932
|
+
_start_keepalive_thread()
|
|
1933
|
+
|
|
1813
1934
|
mcp.run(transport="stdio")
|