@innvisor/conny-ai 9.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/.env.example +68 -0
  2. package/CHANGELOG.md +54 -0
  3. package/LICENSE +21 -0
  4. package/README.md +369 -0
  5. package/brand-assets/A_dark_luxury_web_background_202605210700.jpeg +0 -0
  6. package/brand-assets/Conny.web.logo.png +0 -0
  7. package/brand-assets/Logo_Conny_Petalo_Claro.png +0 -0
  8. package/brand-assets/cl-nica-de-las-am-ricas/manifest.json +22 -0
  9. package/brand-assets/cl-nica-de-las-am-ricas/processed/business-identity.txt +11 -0
  10. package/brand-assets/cl-nica-de-las-am-ricas/raw/business-identity.txt +11 -0
  11. package/brand-assets/cl-nica-las-am-ricas/manifest.json +22 -0
  12. package/brand-assets/cl-nica-las-am-ricas/processed/business-identity.txt +11 -0
  13. package/brand-assets/cl-nica-las-am-ricas/raw/business-identity.txt +11 -0
  14. package/brand-assets/conny-demo/manifest.json +22 -0
  15. package/brand-assets/conny-demo/processed/business-identity.txt +7 -0
  16. package/brand-assets/conny-demo/raw/business-identity.txt +7 -0
  17. package/brand-assets/conny-logo.png +0 -0
  18. package/brand-assets/web.background.png +0 -0
  19. package/brand_assets.py +323 -0
  20. package/conny +28 -0
  21. package/conny-chat.py +579 -0
  22. package/conny-omni.py +3843 -0
  23. package/conny.py +113 -0
  24. package/conny_agents/__init__.py +1 -0
  25. package/conny_agents/agenda.py +1 -0
  26. package/conny_agents/captacion.py +1 -0
  27. package/conny_agents/conocimiento.py +1 -0
  28. package/conny_agents/escalacion.py +1 -0
  29. package/conny_agents/objeciones.py +1 -0
  30. package/conny_agents/seguimiento.py +1 -0
  31. package/conny_app.py +287 -0
  32. package/conny_audio.py +350 -0
  33. package/conny_audio_learn.py +84 -0
  34. package/conny_brain_v10.py +804 -0
  35. package/conny_bridge.py +656 -0
  36. package/conny_calendar.py +169 -0
  37. package/conny_cli.py +11784 -0
  38. package/conny_cli_bb.py +437 -0
  39. package/conny_commands.py +243 -0
  40. package/conny_config.py +215 -0
  41. package/conny_core/__init__.py +3 -0
  42. package/conny_core/conversation_engine.py +446 -0
  43. package/conny_core/first_turn_ops.py +287 -0
  44. package/conny_core/persona_registry.py +157 -0
  45. package/conny_core/prompt_ops.py +561 -0
  46. package/conny_cron.py +72 -0
  47. package/conny_demo_v2.py +209 -0
  48. package/conny_demo_voice.py +134 -0
  49. package/conny_design.py +43 -0
  50. package/conny_doctor.py +319 -0
  51. package/conny_domino.py +696 -0
  52. package/conny_generator.py +447 -0
  53. package/conny_google_auth.py +159 -0
  54. package/conny_i18n.py +619 -0
  55. package/conny_init.py +509 -0
  56. package/conny_integrations/__init__.py +4 -0
  57. package/conny_integrations/llm.py +1 -0
  58. package/conny_integrations/vault.py +77 -0
  59. package/conny_integrations/whatsapp.py +1 -0
  60. package/conny_intelligence.py +65 -0
  61. package/conny_learning.py +154 -0
  62. package/conny_memory.py +243 -0
  63. package/conny_memory_engine.py +292 -0
  64. package/conny_nova_proxy.py +170 -0
  65. package/conny_nuke_robot_phrases.py +493 -0
  66. package/conny_pairing.py +253 -0
  67. package/conny_patch.py +291 -0
  68. package/conny_persona_cli.py +150 -0
  69. package/conny_router.py +308 -0
  70. package/conny_runtime_ops.py +271 -0
  71. package/conny_session.py +516 -0
  72. package/conny_skills/__init__.py +1 -0
  73. package/conny_skills/demo_mode.py +35 -0
  74. package/conny_skills/text_processing.py +1 -0
  75. package/conny_skills/tone_detection.py +1 -0
  76. package/conny_smart_features.py +333 -0
  77. package/conny_studio.py +161 -0
  78. package/conny_sync_fix.py +306 -0
  79. package/conny_tui.py +512 -0
  80. package/conny_tui_select.py +202 -0
  81. package/conny_ultra_config.py +411 -0
  82. package/conny_uncertainty.py +174 -0
  83. package/conny_utils.py +87 -0
  84. package/conny_voice.py +156 -0
  85. package/conny_voice_engine.py +124 -0
  86. package/conny_web_search.py +66 -0
  87. package/conny_weekly_report.py +85 -0
  88. package/conny_worm.py +88 -0
  89. package/core/__init__.py +25 -0
  90. package/ecosystem.config.js +24 -0
  91. package/fix_init.py +27 -0
  92. package/install.sh +78 -0
  93. package/knowledge_base.py +330 -0
  94. package/nova/rules/default.yaml +37 -0
  95. package/nova_bridge.py +509 -0
  96. package/npm/conny.js +471 -0
  97. package/package.json +102 -0
  98. package/personas/conny/base/default.yaml +35 -0
  99. package/personas/conny/base/estetica_whatsapp.yaml +36 -0
  100. package/requirements.txt +14 -0
  101. package/run.sh +47 -0
  102. package/search.py +465 -0
  103. package/smart_handoff.py +1150 -0
  104. package/src/__init__.py +0 -0
  105. package/src/conny/__init__.py +0 -0
  106. package/src/conny/admin/__init__.py +0 -0
  107. package/src/conny/admin/api.py +234 -0
  108. package/src/conny/admin/dashboard.py +772 -0
  109. package/src/conny/api/__init__.py +0 -0
  110. package/src/conny/api/routes.py +8851 -0
  111. package/src/conny/brain/__init__.py +15 -0
  112. package/src/conny/brain/engine.py +804 -0
  113. package/src/conny/brain/learning.py +154 -0
  114. package/src/conny/brain/memory.py +324 -0
  115. package/src/conny/brain/smart_features.py +333 -0
  116. package/src/conny/brain/uncertainty.py +167 -0
  117. package/src/conny/channels/__init__.py +0 -0
  118. package/src/conny/channels/audio.py +316 -0
  119. package/src/conny/channels/cli.py +11795 -0
  120. package/src/conny/channels/logo_art.py +11 -0
  121. package/src/conny/channels/voice.py +156 -0
  122. package/src/conny/core/__init__.py +0 -0
  123. package/src/conny/core/config.py +215 -0
  124. package/src/conny/core/cron.py +72 -0
  125. package/src/conny/core/messenger.py +563 -0
  126. package/src/conny/core/router.py +297 -0
  127. package/src/conny/core/session.py +312 -0
  128. package/src/conny/demo/__init__.py +0 -0
  129. package/src/conny/demo/handler.py +3110 -0
  130. package/src/conny/integrations/__init__.py +19 -0
  131. package/src/conny/integrations/calendar.py +169 -0
  132. package/src/conny/integrations/knowledge.py +312 -0
  133. package/src/conny/integrations/search.py +66 -0
  134. package/src/conny/personas/__init__.py +0 -0
  135. package/src/conny/personas/generator.py +447 -0
  136. package/src/conny/production/__init__.py +0 -0
  137. package/src/conny/production/domino.py +696 -0
  138. package/src/conny/production/guard.py +550 -0
  139. package/src/conny/production/handoff.py +1150 -0
  140. package/src/conny/production/monitor.py +353 -0
  141. package/src/conny/utils/__init__.py +2 -0
  142. package/src/conny/utils/helpers.py +75 -0
  143. package/src/conny/utils/i18n.py +619 -0
  144. package/src/core/admin_engines.py +772 -0
  145. package/src/core/globals.py +11845 -0
  146. package/src/core/orchestrator.py +273 -0
  147. package/src/core/production_monitor.py +353 -0
  148. package/src/core/runtime.py +5487 -0
  149. package/src/domain/onboarding_flow.py +230 -0
  150. package/src/domain/prompts/__init__.py +1 -0
  151. package/src/domain/prompts/prospect_pitch.py +282 -0
  152. package/src/domain/send_guard.py +636 -0
  153. package/src/domain/swarm/queen.py +96 -0
  154. package/src/infrastructure/llm_providers/engine.py +487 -0
  155. package/src/interfaces/mcp_server.py +73 -0
  156. package/src/interfaces/nova_bridge.py +58 -0
  157. package/src/interfaces/web/admin_api.py +1379 -0
  158. package/src/interfaces/web/app.py +9408 -0
  159. package/src/interfaces/web/demo_handler.py +3450 -0
  160. package/src/interfaces/web/static/generate_avatars.py +46 -0
  161. package/v7/__init__.py +46 -0
  162. package/v7/agents/__init__.py +46 -0
  163. package/v7/agents/agenda.py +77 -0
  164. package/v7/agents/base.py +216 -0
  165. package/v7/agents/captacion.py +60 -0
  166. package/v7/agents/conocimiento.py +69 -0
  167. package/v7/agents/escalacion.py +83 -0
  168. package/v7/agents/objeciones.py +109 -0
  169. package/v7/agents/seguimiento.py +71 -0
  170. package/v7/memory/__init__.py +46 -0
  171. package/v7/memory/patient_profile.py +200 -0
  172. package/v7/orchestrator.py +275 -0
  173. package/v7/postprocess.py +127 -0
  174. package/v7/router.py +239 -0
  175. package/verify_conversation_impl.py +48 -0
@@ -0,0 +1,85 @@
1
+ """conny_weekly_report.py — Weekly brain report to admin."""
2
+ from __future__ import annotations
3
+ import json, logging, sqlite3
4
+ from datetime import datetime, timedelta
5
+ from pathlib import Path
6
+ from typing import Dict, Optional
7
+
8
+ log = logging.getLogger("conny.weekly_report")
9
+
10
+
11
+ async def generate_weekly_report(instance_id: str, db_path: str = "conny.db") -> str:
12
+ """Generate a human-readable weekly report."""
13
+ try:
14
+ conn = sqlite3.connect(db_path)
15
+ conn.row_factory = sqlite3.Row
16
+ c = conn.cursor()
17
+
18
+ week_ago = (datetime.now() - timedelta(days=7)).isoformat()
19
+
20
+ # Count conversations
21
+ c.execute("SELECT COUNT(DISTINCT chat_id) as patients FROM conversations WHERE role='user' AND created_at > ?", (week_ago,))
22
+ row = c.fetchone()
23
+ total_patients = row["patients"] if row else 0
24
+
25
+ # Count messages
26
+ c.execute("SELECT COUNT(*) as msgs FROM conversations WHERE created_at > ?", (week_ago,))
27
+ row = c.fetchone()
28
+ total_msgs = row["msgs"] if row else 0
29
+
30
+ # Count gaps
31
+ gaps_dir = Path("knowledge_gaps")
32
+ gaps_count = 0
33
+ gap_questions = []
34
+ if gaps_dir.exists():
35
+ for f in gaps_dir.glob("*.jsonl"):
36
+ for line in open(f):
37
+ try:
38
+ g = json.loads(line)
39
+ gaps_count += 1
40
+ gap_questions.append(g.get("user_msg", "")[:80])
41
+ except:
42
+ pass
43
+
44
+ # Count teachings learned
45
+ teachings_file = Path(f"teachings/{instance_id}.jsonl")
46
+ teachings_this_week = 0
47
+ if teachings_file.exists():
48
+ for line in open(teachings_file):
49
+ try:
50
+ t = json.loads(line)
51
+ if t.get("ts", "") > week_ago:
52
+ teachings_this_week += 1
53
+ except:
54
+ pass
55
+
56
+ conn.close()
57
+
58
+ # Build report
59
+ report = (
60
+ f"hola! te cuento cómo me fue esta semana:\n\n"
61
+ f"hablé con {total_patients} pacientes ({total_msgs} mensajes)\n"
62
+ f"aprendí {teachings_this_week} cosas nuevas\n"
63
+ )
64
+
65
+ if gaps_count > 0:
66
+ report += f"\nhubo {gaps_count} preguntas que no supe responder:\n"
67
+ for q in gap_questions[:5]:
68
+ report += f" • {q}\n"
69
+ report += "\nsi me enseñas con /aprender las respondo sola la próxima vez"
70
+ else:
71
+ report += "\nrespondí todo sin problemas esta semana 💪"
72
+
73
+ return report
74
+
75
+ except Exception as e:
76
+ log.error(f"[weekly_report] error: {e}")
77
+ return "no pude generar el reporte esta semana, perdona"
78
+
79
+
80
+ async def send_weekly_report(instance_id: str, admin_jid: str, send_fn, db_path: str = "conny.db"):
81
+ """Generate and send weekly report to admin."""
82
+ report = await generate_weekly_report(instance_id, db_path)
83
+ if send_fn and report:
84
+ await send_fn(admin_jid, report)
85
+ log.info(f"[weekly_report] sent to {admin_jid}")
package/conny_worm.py ADDED
@@ -0,0 +1,88 @@
1
+ """conny_worm.py — Animated ASCII worm for terminal."""
2
+ from __future__ import annotations
3
+
4
+ import math
5
+ import sys
6
+ import time
7
+
8
+
9
+ SEGMENTS = ["◉", "█", "▓", "▒", "░", "·"]
10
+ PURPLE = "\033[38;5;141m"
11
+ DIM = "\033[2m"
12
+ R = "\033[0m"
13
+
14
+
15
+ def boot_sequence(duration: float = 2.0):
16
+ """
17
+ Animated worm crawls in, CONNY appears letter by letter.
18
+ Only runs on real TTY. Graceful skip otherwise.
19
+ """
20
+ if not sys.stdout.isatty():
21
+ return
22
+
23
+ frames = int(duration / 0.04)
24
+ width = 60
25
+
26
+ try:
27
+ sys.stdout.write("\033[?25l") # hide cursor
28
+ for f in range(frames):
29
+ t = f / frames
30
+ sys.stdout.write("\033[H\033[J") # clear
31
+
32
+ # Worm position (sinusoidal crawl from right to left)
33
+ worm_x = int((1.0 - t) * width * 0.7) + 2
34
+ worm_y = int(2 + math.sin(t * math.pi * 3) * 1.2)
35
+
36
+ # Canvas
37
+ canvas = [[" "] * width for _ in range(7)]
38
+
39
+ # Draw worm body
40
+ for i, seg in enumerate(SEGMENTS[:5]):
41
+ sx = worm_x + i * 2
42
+ sy = min(6, max(0, worm_y + int(math.sin((t * 6) - i * 0.5) * 0.8)))
43
+ if 0 <= sx < width and 0 <= sy < 7:
44
+ canvas[sy][sx] = seg
45
+
46
+ # CONNY text types in progressively
47
+ word = "CONNY"
48
+ chars_shown = int(t * 2.5 * len(word))
49
+ for i, ch in enumerate(word[:min(chars_shown, len(word))]):
50
+ cx = 6 + i * 4
51
+ if cx < width:
52
+ canvas[5][cx] = ch
53
+
54
+ # Render
55
+ sys.stdout.write("\n")
56
+ for row in canvas:
57
+ sys.stdout.write(f" {PURPLE}{''.join(row)}{R}\n")
58
+ sys.stdout.flush()
59
+ time.sleep(0.04)
60
+
61
+ except (KeyboardInterrupt, BrokenPipeError):
62
+ pass
63
+ finally:
64
+ sys.stdout.write("\033[?25h") # show cursor
65
+ sys.stdout.write("\033[H\033[J") # clear for real content
66
+
67
+
68
+ def crawl_spinner(text: str, duration: float = 1.5):
69
+ """Single-line crawling worm spinner for loading states."""
70
+ if not sys.stdout.isatty():
71
+ sys.stdout.write(f" {text}\n")
72
+ return
73
+
74
+ frames = int(duration / 0.06)
75
+ try:
76
+ sys.stdout.write("\033[?25l")
77
+ for f in range(frames):
78
+ t = f / frames
79
+ x = int(abs(math.sin(t * math.pi * 2)) * 12)
80
+ worm = "".join(SEGMENTS[:4])
81
+ sys.stdout.write(f"\033[2K\r {' ' * x}{PURPLE}{worm}{R} {DIM}{text}{R}")
82
+ sys.stdout.flush()
83
+ time.sleep(0.06)
84
+ sys.stdout.write(f"\033[2K\r")
85
+ except (KeyboardInterrupt, BrokenPipeError):
86
+ pass
87
+ finally:
88
+ sys.stdout.write("\033[?25h")
@@ -0,0 +1,25 @@
1
+ """
2
+ conny/core/ — Clean public API for Conny v9.0.
3
+
4
+ Exports the main engine components without requiring direct conny.py imports.
5
+ This module bridges the legacy monolith (conny.py) with the new modular architecture.
6
+ """
7
+ from conny_memory_engine import memory_engine, ConnyMemoryEngine
8
+ from conny_uncertainty import uncertainty_detector, UncertaintyDetector
9
+ from conny_voice import voice, ConnyVoice
10
+ from conny_nova_proxy import NovaLLMProxy
11
+ from src.interfaces.web.admin_api import router as admin_router
12
+ from conny_cron import init_scheduler, shutdown_scheduler
13
+
14
+ __all__ = [
15
+ "memory_engine",
16
+ "ConnyMemoryEngine",
17
+ "uncertainty_detector",
18
+ "UncertaintyDetector",
19
+ "voice",
20
+ "ConnyVoice",
21
+ "NovaLLMProxy",
22
+ "admin_router",
23
+ "init_scheduler",
24
+ "shutdown_scheduler",
25
+ ]
@@ -0,0 +1,24 @@
1
+ module.exports = {
2
+ apps: [
3
+ {
4
+ name: "conny",
5
+ script: "/home/ubuntu/conny/run.sh",
6
+ cwd: "/home/ubuntu/conny",
7
+ restart_delay: 3000,
8
+ max_restarts: 10,
9
+ out_file: "/home/ubuntu/conny/logs/conny.log",
10
+ error_file: "/home/ubuntu/conny/logs/conny-error.log",
11
+ watch: false,
12
+ },
13
+ {
14
+ name: "conny-clinica-de-las-americas",
15
+ script: "/home/ubuntu/conny-instances/clinica-de-las-americas/run.sh",
16
+ cwd: "/home/ubuntu/conny-instances/clinica-de-las-americas",
17
+ restart_delay: 3000,
18
+ max_restarts: 10,
19
+ out_file: "/home/ubuntu/conny-instances/clinica-de-las-americas/logs/conny.log",
20
+ error_file: "/home/ubuntu/conny-instances/clinica-de-las-americas/logs/error.log",
21
+ watch: false,
22
+ }
23
+ ]
24
+ }
package/fix_init.py ADDED
@@ -0,0 +1,27 @@
1
+ file_path = "conny.py"
2
+ with open(file_path, "r", encoding="utf-8") as f:
3
+ content = f.read()
4
+
5
+ old_init = """async def init_conny():
6
+ \"\"\"Inicializa Conny Ultra.\"\"\"
7
+ global conny
8
+ conny = ConnyUltra()
9
+ await conny.initialize()"""
10
+
11
+ new_init = """async def init_conny():
12
+ \"\"\"Inicializa Conny Ultra.\"\"\"
13
+ global conny
14
+ conny = ConnyUltra()
15
+ await conny.initialize()
16
+
17
+ import src.core.globals as g
18
+ global db, llm_engine, auth_engine, mcp_manager
19
+ db = g.db
20
+ llm_engine = g.llm_engine
21
+ auth_engine = getattr(g, "auth_engine", None)
22
+ mcp_manager = getattr(g, "mcp_manager", None)
23
+ """
24
+
25
+ content = content.replace(old_init, new_init)
26
+ with open(file_path, "w", encoding="utf-8") as f:
27
+ f.write(content)
package/install.sh ADDED
@@ -0,0 +1,78 @@
1
+ #!/bin/bash
2
+ # Conny AI - The AI Receptionist Engine
3
+ # Ultimate GitHub Installer Script
4
+ set -e
5
+
6
+ C_PRIMARY="\033[38;5;135m"
7
+ C_SUCCESS="\033[38;5;46m"
8
+ C_MUTED="\033[38;5;240m"
9
+ BOLD="\033[1m"
10
+ RESET="\033[0m"
11
+
12
+ echo -e "\n ${C_PRIMARY}${BOLD}✦ Conny AI - Ultimate Installer${RESET}"
13
+ echo -e " ${C_MUTED}─────────────────────────────────────────${RESET}"
14
+
15
+ # Handle sudo gracefully (Termux / Root environments)
16
+ SUDO=""
17
+ if command -v sudo &> /dev/null; then
18
+ SUDO="sudo"
19
+ fi
20
+
21
+ # 1. Install chafa if possible
22
+ if ! command -v chafa &> /dev/null; then
23
+ echo -e "\n ${BOLD}1. Instalando motor True-Color (chafa)...${RESET}"
24
+ if command -v pkg &> /dev/null; then
25
+ pkg install -y chafa || true
26
+ elif command -v apt-get &> /dev/null; then
27
+ $SUDO apt-get update -yqq && $SUDO apt-get install -yqq chafa || true
28
+ elif command -v brew &> /dev/null; then
29
+ brew install chafa || true
30
+ else
31
+ echo -e " ${C_MUTED}No se pudo instalar chafa automáticamente. Se usará el logo clásico.${RESET}"
32
+ fi
33
+ else
34
+ echo -e "\n ${BOLD}1. Motor True-Color detectado (chafa).${RESET}"
35
+ fi
36
+
37
+ # 2. Verify Python sanely (3.9+), without hardcoding minor versions
38
+ PYTHON_BIN=""
39
+ for candidate in python3 python; do
40
+ if command -v "$candidate" >/dev/null 2>&1; then
41
+ if "$candidate" - <<'PY' >/dev/null 2>&1
42
+ import sys
43
+ raise SystemExit(0 if sys.version_info >= (3, 9) else 1)
44
+ PY
45
+ then
46
+ PYTHON_BIN="$candidate"
47
+ break
48
+ fi
49
+ fi
50
+ done
51
+
52
+ if [ -n "$PYTHON_BIN" ]; then
53
+ PY_VERSION="$($PYTHON_BIN -c 'import sys; print(".".join(map(str, sys.version_info[:3])))')"
54
+ echo -e "\n ${BOLD}2. Python detectado:${RESET} ${C_SUCCESS}${PYTHON_BIN} (${PY_VERSION})${RESET}"
55
+ else
56
+ echo -e "\n ${BOLD}2. Python 3.9+ no detectado localmente.${RESET}"
57
+ echo -e " ${C_MUTED}Conny intentará crear su runtime cuando se ejecute por primera vez.${RESET}"
58
+ fi
59
+
60
+ # 3. Install NPM Package
61
+ if ! command -v npm &> /dev/null; then
62
+ echo -e "\n \033[31mError: Node.js y npm son requeridos. Instálalos primero.\033[0m"
63
+ exit 1
64
+ fi
65
+
66
+ echo -e "\n ${BOLD}3. Limpiando versiones anteriores...${RESET}"
67
+ npm uninstall -g conny-ai @blackboss/conny || true
68
+
69
+ echo -e "\n ${BOLD}4. Instalando Conny CLI y Motor AI...${RESET}"
70
+ npm install -g "${CONNY_INSTALL_PACKAGE:-conny-ai@latest}"
71
+
72
+ echo -e "\n ${BOLD}5. Verificando bootstrap del CLI...${RESET}"
73
+ if command -v conny >/dev/null 2>&1; then
74
+ conny --version || true
75
+ fi
76
+
77
+ echo -e "\n ${C_SUCCESS}${BOLD}✔ ¡Conny instalado con éxito!${RESET}"
78
+ echo -e " Ejecuta ${C_PRIMARY}conny init${RESET} en tu terminal para empezar la magia.\n"
@@ -0,0 +1,330 @@
1
+ """
2
+ knowledge_base.py — Base de conocimiento de la clinica para Conny
3
+
4
+ Sistema que permite al admin cargar un documento maestro con toda la info
5
+ de la clinica (servicios, precios, contraindicaciones, protocolos, FAQs, etc.)
6
+ y que Conny consulte siempre antes de responder.
7
+
8
+ Flujo:
9
+ 1. Admin termina setup y Conny le pide el documento maestro
10
+ 2. Admin envia texto libre (puede ser muy largo)
11
+ 3. Se chunquea y se guarda en DB
12
+ 4. Cuando un paciente pregunta algo, se recuperan los chunks relevantes
13
+ 5. El contexto del KB se inyecta en el system prompt con maxima prioridad
14
+
15
+ Schema DB adicional:
16
+ - knowledge_base: texto completo del documento
17
+ - kb_chunks: chunks indexados por palabras clave
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import logging
24
+ import re
25
+ import sqlite3
26
+ from datetime import datetime
27
+ from typing import Dict, List, Optional, Tuple
28
+
29
+ log = logging.getLogger("conny.kb")
30
+
31
+ # Tamano de chunks (en caracteres)
32
+ CHUNK_SIZE = 400
33
+ CHUNK_OVERLAP = 80
34
+
35
+ # Cuantos chunks incluir en el contexto del LLM
36
+ MAX_CHUNKS_IN_CONTEXT = 4
37
+
38
+ # Minima relevancia para incluir un chunk (0-1)
39
+ MIN_RELEVANCE = 0.15
40
+
41
+
42
+ # ─── Chunking ────────────────────────────────────────────────────────────────
43
+
44
+ def _split_into_chunks(text: str) -> List[str]:
45
+ """
46
+ Divide el texto en chunks con overlap.
47
+ Intenta partir en parrafos/oraciones completas.
48
+ """
49
+ # Limpiar texto
50
+ text = re.sub(r'\r\n', '\n', text)
51
+ text = re.sub(r'\n{3,}', '\n\n', text)
52
+ text = text.strip()
53
+
54
+ # Primero intentar partir por parrafos
55
+ paragraphs = [p.strip() for p in re.split(r'\n\n+', text) if p.strip()]
56
+
57
+ chunks = []
58
+ current = ""
59
+
60
+ for para in paragraphs:
61
+ if len(current) + len(para) < CHUNK_SIZE:
62
+ current = (current + "\n\n" + para).strip()
63
+ else:
64
+ if current:
65
+ chunks.append(current)
66
+ # Si el parrafo es muy largo, partirlo por oraciones
67
+ if len(para) > CHUNK_SIZE:
68
+ sentences = re.split(r'(?<=[.!?])\s+', para)
69
+ buf = ""
70
+ for s in sentences:
71
+ if len(buf) + len(s) < CHUNK_SIZE:
72
+ buf = (buf + " " + s).strip()
73
+ else:
74
+ if buf:
75
+ chunks.append(buf)
76
+ buf = s
77
+ if buf:
78
+ current = buf
79
+ else:
80
+ current = ""
81
+ else:
82
+ current = para
83
+
84
+ if current:
85
+ chunks.append(current)
86
+
87
+ return [c for c in chunks if len(c) > 30]
88
+
89
+
90
+ def _extract_keywords(text: str) -> List[str]:
91
+ """
92
+ Extrae palabras clave de un texto para indexacion.
93
+ Ignora stopwords comunes en espanol.
94
+ """
95
+ STOPWORDS = {
96
+ "el", "la", "los", "las", "un", "una", "unos", "unas",
97
+ "de", "del", "al", "a", "en", "con", "por", "para", "que",
98
+ "como", "cuando", "donde", "cual", "quien", "este", "esta",
99
+ "ese", "esa", "mi", "tu", "su", "y", "o", "pero", "si", "no",
100
+ "mas", "muy", "ya", "es", "son", "era", "fue", "ser", "estar",
101
+ "hay", "han", "hemos", "tienen", "tiene", "hace", "hacer",
102
+ "puede", "pueden", "se", "lo", "le", "les", "me", "te", "nos",
103
+ "sobre", "entre", "hasta", "desde", "durante", "antes", "despues",
104
+ "tambien", "ademas", "aunque", "porque", "sino", "sino", "ni",
105
+ "todo", "toda", "todos", "todas", "cada", "otro", "otra",
106
+ }
107
+ words = re.findall(r'\b[a-záéíóúüñA-ZÁÉÍÓÚÜÑ]{3,}\b', text.lower())
108
+ keywords = [w for w in words if w not in STOPWORDS]
109
+ # Frecuencia
110
+ freq: Dict[str, int] = {}
111
+ for w in keywords:
112
+ freq[w] = freq.get(w, 0) + 1
113
+ # Top palabras por frecuencia
114
+ return sorted(freq, key=freq.get, reverse=True)[:20]
115
+
116
+
117
+ def _score_chunk(chunk_keywords: List[str], query_keywords: List[str]) -> float:
118
+ """
119
+ Calcula relevancia de un chunk para una query.
120
+ Score de 0 a 1.
121
+ """
122
+ if not query_keywords or not chunk_keywords:
123
+ return 0.0
124
+ chunk_set = set(chunk_keywords)
125
+ matches = sum(1 for kw in query_keywords if kw in chunk_set)
126
+ # Bonus por matches exactos de frases
127
+ return min(matches / max(len(query_keywords), 1), 1.0)
128
+
129
+
130
+ # ─── KnowledgeBase Manager ────────────────────────────────────────────────────
131
+
132
+ class KnowledgeBase:
133
+ """
134
+ Gestor de la base de conocimiento de la clinica.
135
+ Se inicializa con la conexion DB de conny.
136
+ """
137
+
138
+ def __init__(self, db_path: str):
139
+ self.db_path = db_path
140
+ self._ensure_tables()
141
+
142
+ def _conn(self) -> sqlite3.Connection:
143
+ conn = sqlite3.connect(self.db_path)
144
+ conn.row_factory = sqlite3.Row
145
+ return conn
146
+
147
+ def _ensure_tables(self):
148
+ """Crea las tablas KB si no existen (migracion segura)."""
149
+ with self._conn() as c:
150
+ c.executescript("""
151
+ -- Documento maestro de la clinica
152
+ CREATE TABLE IF NOT EXISTS knowledge_base (
153
+ id INTEGER PRIMARY KEY,
154
+ raw_text TEXT DEFAULT '',
155
+ chunk_count INTEGER DEFAULT 0,
156
+ word_count INTEGER DEFAULT 0,
157
+ created_at TEXT DEFAULT (datetime('now')),
158
+ updated_at TEXT DEFAULT (datetime('now'))
159
+ );
160
+ INSERT OR IGNORE INTO knowledge_base (id) VALUES (1);
161
+
162
+ -- Chunks indexados para recuperacion rapida
163
+ CREATE TABLE IF NOT EXISTS kb_chunks (
164
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
165
+ content TEXT NOT NULL,
166
+ keywords TEXT DEFAULT '[]',
167
+ section_hint TEXT DEFAULT '',
168
+ created_at TEXT DEFAULT (datetime('now'))
169
+ );
170
+ CREATE INDEX IF NOT EXISTS idx_kb_chunks_id ON kb_chunks(id);
171
+ """)
172
+ log.info("[kb] tablas listas")
173
+
174
+ # ─── Ingestion ────────────────────────────────────────────────────────────
175
+
176
+ def save_learned_fact(self, question: str, answer: str, source: str = "admin") -> Dict:
177
+ """Guarda un hecho aprendido directamente de la interacción con el admin."""
178
+ fact_text = f"\n\n--- DATO APRENDIDO ({source}) ---\nPREGUNTA: {question}\nRESPUESTA: {answer}"
179
+ return self.append(fact_text)
180
+
181
+ def ingest(self, raw_text: str) -> Dict:
182
+ """
183
+ Procesa y guarda el documento maestro de la clinica.
184
+ Reemplaza cualquier KB anterior.
185
+ Retorna stats del ingestion.
186
+ """
187
+ if not raw_text or not raw_text.strip():
188
+ return {"ok": False, "error": "texto vacio"}
189
+
190
+ raw_text = raw_text.strip()
191
+ chunks = _split_into_chunks(raw_text)
192
+ word_count = len(raw_text.split())
193
+
194
+ with self._conn() as c:
195
+ # Limpiar chunks anteriores
196
+ c.execute("DELETE FROM kb_chunks")
197
+
198
+ # Guardar documento maestro
199
+ c.execute("""
200
+ UPDATE knowledge_base
201
+ SET raw_text=?, chunk_count=?, word_count=?, updated_at=datetime('now')
202
+ WHERE id=1
203
+ """, (raw_text, len(chunks), word_count))
204
+
205
+ # Insertar chunks con keywords
206
+ for i, chunk in enumerate(chunks):
207
+ keywords = _extract_keywords(chunk)
208
+ # Detectar hint de seccion (primera linea si parece titulo)
209
+ first_line = chunk.split('\n')[0].strip()
210
+ section_hint = first_line if len(first_line) < 60 and not first_line.endswith('.') else ""
211
+
212
+ c.execute("""
213
+ INSERT INTO kb_chunks (content, keywords, section_hint)
214
+ VALUES (?, ?, ?)
215
+ """, (chunk, json.dumps(keywords, ensure_ascii=False), section_hint))
216
+
217
+ log.info(f"[kb] ingested {len(chunks)} chunks, {word_count} palabras")
218
+ return {
219
+ "ok": True,
220
+ "chunks": len(chunks),
221
+ "words": word_count,
222
+ "chars": len(raw_text),
223
+ }
224
+
225
+ def append(self, additional_text: str) -> Dict:
226
+ """Agrega texto al KB existente sin borrar lo anterior."""
227
+ existing = self.get_raw()
228
+ combined = (existing + "\n\n" + additional_text).strip() if existing else additional_text
229
+ return self.ingest(combined)
230
+
231
+ # ─── Recuperacion ─────────────────────────────────────────────────────────
232
+
233
+ def retrieve(self, query: str, max_chunks: int = MAX_CHUNKS_IN_CONTEXT) -> str:
234
+ """
235
+ Recupera los chunks mas relevantes para una query.
236
+ Retorna contexto listo para inyectar al LLM.
237
+ """
238
+ if not self.has_content():
239
+ return ""
240
+
241
+ query_keywords = _extract_keywords(query)
242
+ if not query_keywords:
243
+ # Sin keywords claras: retornar primeros chunks (intro de la clinica)
244
+ return self._get_first_chunks(max_chunks)
245
+
246
+ with self._conn() as c:
247
+ rows = c.execute("SELECT id, content, keywords FROM kb_chunks").fetchall()
248
+
249
+ # Calcular relevancia de cada chunk
250
+ scored: List[Tuple[float, str]] = []
251
+ for row in rows:
252
+ try:
253
+ chunk_kws = json.loads(row["keywords"] or "[]")
254
+ except Exception:
255
+ chunk_kws = []
256
+ score = _score_chunk(chunk_kws, query_keywords)
257
+ if score >= MIN_RELEVANCE:
258
+ scored.append((score, row["content"]))
259
+
260
+ if not scored:
261
+ # Ninguno relevante: dar los primeros (contexto general)
262
+ return self._get_first_chunks(max_chunks)
263
+
264
+ # Ordenar por relevancia descendente
265
+ scored.sort(key=lambda x: x[0], reverse=True)
266
+ selected = [content for _, content in scored[:max_chunks]]
267
+
268
+ return "\n\n---\n\n".join(selected)
269
+
270
+ def _get_first_chunks(self, n: int) -> str:
271
+ """Retorna los primeros n chunks (contexto general de la clinica)."""
272
+ with self._conn() as c:
273
+ rows = c.execute(
274
+ "SELECT content FROM kb_chunks ORDER BY id LIMIT ?", (n,)
275
+ ).fetchall()
276
+ return "\n\n---\n\n".join(r["content"] for r in rows)
277
+
278
+ def get_raw(self) -> str:
279
+ """Retorna el documento completo."""
280
+ with self._conn() as c:
281
+ row = c.execute("SELECT raw_text FROM knowledge_base WHERE id=1").fetchone()
282
+ return row["raw_text"] if row else ""
283
+
284
+ def get_stats(self) -> Dict:
285
+ with self._conn() as c:
286
+ row = c.execute(
287
+ "SELECT chunk_count, word_count, updated_at FROM knowledge_base WHERE id=1"
288
+ ).fetchone()
289
+ if row:
290
+ return {
291
+ "chunks": row["chunk_count"],
292
+ "words": row["word_count"],
293
+ "updated_at": row["updated_at"],
294
+ }
295
+ return {"chunks": 0, "words": 0, "updated_at": None}
296
+
297
+ def has_content(self) -> bool:
298
+ with self._conn() as c:
299
+ row = c.execute(
300
+ "SELECT chunk_count FROM knowledge_base WHERE id=1"
301
+ ).fetchone()
302
+ return bool(row and row["chunk_count"] > 0)
303
+
304
+ def clear(self):
305
+ """Borra el KB completo."""
306
+ with self._conn() as c:
307
+ c.execute("DELETE FROM kb_chunks")
308
+ c.execute("""
309
+ UPDATE knowledge_base
310
+ SET raw_text='', chunk_count=0, word_count=0, updated_at=datetime('now')
311
+ WHERE id=1
312
+ """)
313
+ log.info("[kb] KB limpiado")
314
+
315
+
316
+ # ─── Formateo para el LLM ─────────────────────────────────────────────────────
317
+
318
+ def format_kb_context(kb_text: str) -> str:
319
+ """
320
+ Envuelve el contexto del KB en un bloque claro para el LLM.
321
+ """
322
+ if not kb_text:
323
+ return ""
324
+ return (
325
+ "=== BASE DE CONOCIMIENTO DE LA CLINICA ===\n"
326
+ "(Esta es informacion oficial de la clinica. Usala con maxima prioridad "
327
+ "antes de buscar en internet o inventar datos.)\n\n"
328
+ f"{kb_text}\n"
329
+ "=== FIN BASE DE CONOCIMIENTO ==="
330
+ )