own-rag-cli 0.0.12-snapshot → 0.0.13-snapshot

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # MCP binary checksum (SHA-256, payload without shebang): `286e52abcbc295893b501fc8aae092420c452a4d3a4242e25c88e646398190b6`
1
+ # MCP binary checksum (SHA-256, payload without shebang): `c74a07c0d8349fad54f3c5aa876296f9ed20239ffa0f2e6483e57796c7831e2a`
2
2
 
3
3
  # own-rag
4
4
 
package/bin/mcp_server.py CHANGED
@@ -16,6 +16,8 @@ import json
16
16
  import logging
17
17
  import getpass
18
18
  import shutil
19
+ import threading
20
+ import time
19
21
  from collections.abc import Iterator
20
22
  from concurrent.futures import ThreadPoolExecutor, as_completed
21
23
  from dataclasses import dataclass
@@ -392,6 +394,21 @@ MAX_FILE_SIZE_BYTES = 500 * 1024 # 500 KB
392
394
  TOP_K_RESULTS = 7
393
395
  MAX_QUERY_RESULTS = 30
394
396
 
397
+ WARMUP_ENABLED = os.environ.get("MCP_WARMUP_ENABLED", "true").strip().lower() in {"1", "true", "yes", "on"}
398
+ WARMUP_MODE = os.environ.get("MCP_WARMUP_MODE", "single").strip().lower()
399
+ if WARMUP_MODE not in {"single", "ensemble"}:
400
+ WARMUP_MODE = "single"
401
+ WARMUP_QUERY = os.environ.get("MCP_WARMUP_QUERY", "warmup semantic search")
402
+ WARMUP_TOP_K = max(1, min(_parse_int(os.environ.get("MCP_WARMUP_TOP_K", "1"), 1, min_value=1), 20))
403
+
404
+ KEEPALIVE_ENABLED = os.environ.get("MCP_KEEPALIVE_ENABLED", "true").strip().lower() in {"1", "true", "yes", "on"}
405
+ KEEPALIVE_MODE = os.environ.get("MCP_KEEPALIVE_MODE", "single").strip().lower()
406
+ if KEEPALIVE_MODE not in {"single", "ensemble"}:
407
+ KEEPALIVE_MODE = "single"
408
+ KEEPALIVE_QUERY = os.environ.get("MCP_KEEPALIVE_QUERY", WARMUP_QUERY)
409
+ KEEPALIVE_TOP_K = max(1, min(_parse_int(os.environ.get("MCP_KEEPALIVE_TOP_K", "1"), 1, min_value=1), 20))
410
+ KEEPALIVE_INTERVAL_SEC = _parse_int(os.environ.get("MCP_KEEPALIVE_INTERVAL_SEC", "600"), 600, min_value=60)
411
+
395
412
  # Filtros de varredura
396
413
  IGNORED_DIRS = {
397
414
  ".git", "node_modules", "__pycache__", ".venv", "venv", "env",
@@ -1339,6 +1356,88 @@ def _run_ensemble_mode(query: str, top_k: int) -> tuple[list[FusedHit], list[str
1339
1356
  return reranked_hits, branch_errors, rerank_applied, rerank_error
1340
1357
 
1341
1358
 
1359
+ def _execute_background_search(mode: str, query: str, top_k: int, source: str) -> None:
1360
+ started = time.perf_counter()
1361
+ try:
1362
+ if mode == "ensemble":
1363
+ hits, branch_errors, rerank_applied, rerank_error = _run_ensemble_mode(query, top_k)
1364
+ else:
1365
+ hits, branch_errors, rerank_applied, rerank_error = _run_single_mode(query, top_k)
1366
+
1367
+ elapsed_ms = int((time.perf_counter() - started) * 1000)
1368
+ log.info(
1369
+ "%s concluido: mode=%s top_k=%s elapsed_ms=%s hits=%s branch_errors=%s rerank=%s",
1370
+ source,
1371
+ mode,
1372
+ top_k,
1373
+ elapsed_ms,
1374
+ len(hits),
1375
+ len(branch_errors),
1376
+ rerank_applied,
1377
+ )
1378
+ _log_tool_usage(
1379
+ event="maintenance_call",
1380
+ tool_name=source,
1381
+ details={
1382
+ "status": "ok",
1383
+ "mode": mode,
1384
+ "query_len": len(query),
1385
+ "top_k": top_k,
1386
+ "elapsed_ms": elapsed_ms,
1387
+ "result_count": len(hits),
1388
+ "branch_errors": len(branch_errors),
1389
+ "rerank_applied": rerank_applied,
1390
+ "rerank_error": rerank_error,
1391
+ },
1392
+ )
1393
+ except Exception as e:
1394
+ elapsed_ms = int((time.perf_counter() - started) * 1000)
1395
+ log.warning("%s falhou após %sms: %s", source, elapsed_ms, e)
1396
+ _log_tool_usage(
1397
+ event="maintenance_call",
1398
+ tool_name=source,
1399
+ details={
1400
+ "status": "error",
1401
+ "mode": mode,
1402
+ "query_len": len(query),
1403
+ "top_k": top_k,
1404
+ "elapsed_ms": elapsed_ms,
1405
+ "error": str(e),
1406
+ },
1407
+ )
1408
+
1409
+
1410
+ _keepalive_thread: threading.Thread | None = None
1411
+ _keepalive_stop_event = threading.Event()
1412
+
1413
+
1414
+ def _start_keepalive_thread() -> None:
1415
+ global _keepalive_thread
1416
+ if not KEEPALIVE_ENABLED:
1417
+ log.info("Keepalive desabilitado (MCP_KEEPALIVE_ENABLED=false).")
1418
+ return
1419
+ if _keepalive_thread is not None and _keepalive_thread.is_alive():
1420
+ return
1421
+
1422
+ def _runner() -> None:
1423
+ log.info(
1424
+ "Keepalive iniciado: intervalo=%ss mode=%s top_k=%s",
1425
+ KEEPALIVE_INTERVAL_SEC,
1426
+ KEEPALIVE_MODE,
1427
+ KEEPALIVE_TOP_K,
1428
+ )
1429
+ while not _keepalive_stop_event.wait(KEEPALIVE_INTERVAL_SEC):
1430
+ _execute_background_search(
1431
+ mode=KEEPALIVE_MODE,
1432
+ query=KEEPALIVE_QUERY,
1433
+ top_k=KEEPALIVE_TOP_K,
1434
+ source="semantic_search_keepalive",
1435
+ )
1436
+
1437
+ _keepalive_thread = threading.Thread(target=_runner, name="rag-keepalive", daemon=True)
1438
+ _keepalive_thread.start()
1439
+
1440
+
1342
1441
  # ---------------------------------------------------------------------------
1343
1442
  # Servidor MCP via FastMCP
1344
1443
  # ---------------------------------------------------------------------------
@@ -1799,6 +1898,16 @@ if __name__ == "__main__":
1799
1898
  log.info("Reranker: %s (enabled=%s, quant=%s)", RERANK_MODEL_ID, RERANK_ENABLED, RERANKER_QUANTIZATION)
1800
1899
  log.info("Pasta de modelos locais: %s", MODEL_DIR)
1801
1900
  log.info("Uso MCP será registrado em: %s", MCP_USAGE_LOG_PATH)
1901
+ log.info(
1902
+ "Warmup: enabled=%s mode=%s top_k=%s | Keepalive: enabled=%s interval=%ss mode=%s top_k=%s",
1903
+ WARMUP_ENABLED,
1904
+ WARMUP_MODE,
1905
+ WARMUP_TOP_K,
1906
+ KEEPALIVE_ENABLED,
1907
+ KEEPALIVE_INTERVAL_SEC,
1908
+ KEEPALIVE_MODE,
1909
+ KEEPALIVE_TOP_K,
1910
+ )
1802
1911
 
1803
1912
  # Pré-aquece somente conexão Chroma; modelos ficam lazy para poupar RAM.
1804
1913
  try:
@@ -1810,4 +1919,16 @@ if __name__ == "__main__":
1810
1919
  log.error("Falha ao inicializar ChromaDB: %s", e)
1811
1920
  log.error("O servidor continuará, mas as ferramentas retornarão erro até o ChromaDB estar disponível.")
1812
1921
 
1922
+ if WARMUP_ENABLED:
1923
+ _execute_background_search(
1924
+ mode=WARMUP_MODE,
1925
+ query=WARMUP_QUERY,
1926
+ top_k=WARMUP_TOP_K,
1927
+ source="semantic_search_warmup",
1928
+ )
1929
+ else:
1930
+ log.info("Warmup desabilitado (MCP_WARMUP_ENABLED=false).")
1931
+
1932
+ _start_keepalive_thread()
1933
+
1813
1934
  mcp.run(transport="stdio")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "own-rag-cli",
3
- "version": "0.0.12-snapshot",
3
+ "version": "0.0.13-snapshot",
4
4
  "description": "Local RAG setup with ChromaDB + MCP server (Jina/BGE hybrid support).",
5
5
  "license": "MIT",
6
6
  "private": false,