@innvisor/conny-ai 9.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. package/.env.example +68 -0
  2. package/CHANGELOG.md +54 -0
  3. package/LICENSE +21 -0
  4. package/README.md +369 -0
  5. package/brand-assets/A_dark_luxury_web_background_202605210700.jpeg +0 -0
  6. package/brand-assets/Conny.web.logo.png +0 -0
  7. package/brand-assets/Logo_Conny_Petalo_Claro.png +0 -0
  8. package/brand-assets/cl-nica-de-las-am-ricas/manifest.json +22 -0
  9. package/brand-assets/cl-nica-de-las-am-ricas/processed/business-identity.txt +11 -0
  10. package/brand-assets/cl-nica-de-las-am-ricas/raw/business-identity.txt +11 -0
  11. package/brand-assets/cl-nica-las-am-ricas/manifest.json +22 -0
  12. package/brand-assets/cl-nica-las-am-ricas/processed/business-identity.txt +11 -0
  13. package/brand-assets/cl-nica-las-am-ricas/raw/business-identity.txt +11 -0
  14. package/brand-assets/conny-demo/manifest.json +22 -0
  15. package/brand-assets/conny-demo/processed/business-identity.txt +7 -0
  16. package/brand-assets/conny-demo/raw/business-identity.txt +7 -0
  17. package/brand-assets/conny-logo.png +0 -0
  18. package/brand-assets/web.background.png +0 -0
  19. package/brand_assets.py +323 -0
  20. package/conny +28 -0
  21. package/conny-chat.py +579 -0
  22. package/conny-omni.py +3843 -0
  23. package/conny.py +113 -0
  24. package/conny_agents/__init__.py +1 -0
  25. package/conny_agents/agenda.py +1 -0
  26. package/conny_agents/captacion.py +1 -0
  27. package/conny_agents/conocimiento.py +1 -0
  28. package/conny_agents/escalacion.py +1 -0
  29. package/conny_agents/objeciones.py +1 -0
  30. package/conny_agents/seguimiento.py +1 -0
  31. package/conny_app.py +287 -0
  32. package/conny_audio.py +350 -0
  33. package/conny_audio_learn.py +84 -0
  34. package/conny_brain_v10.py +804 -0
  35. package/conny_bridge.py +656 -0
  36. package/conny_calendar.py +169 -0
  37. package/conny_cli.py +11784 -0
  38. package/conny_cli_bb.py +437 -0
  39. package/conny_commands.py +243 -0
  40. package/conny_config.py +215 -0
  41. package/conny_core/__init__.py +3 -0
  42. package/conny_core/conversation_engine.py +446 -0
  43. package/conny_core/first_turn_ops.py +287 -0
  44. package/conny_core/persona_registry.py +157 -0
  45. package/conny_core/prompt_ops.py +561 -0
  46. package/conny_cron.py +72 -0
  47. package/conny_demo_v2.py +209 -0
  48. package/conny_demo_voice.py +134 -0
  49. package/conny_design.py +43 -0
  50. package/conny_doctor.py +319 -0
  51. package/conny_domino.py +696 -0
  52. package/conny_generator.py +447 -0
  53. package/conny_google_auth.py +159 -0
  54. package/conny_i18n.py +619 -0
  55. package/conny_init.py +509 -0
  56. package/conny_integrations/__init__.py +4 -0
  57. package/conny_integrations/llm.py +1 -0
  58. package/conny_integrations/vault.py +77 -0
  59. package/conny_integrations/whatsapp.py +1 -0
  60. package/conny_intelligence.py +65 -0
  61. package/conny_learning.py +154 -0
  62. package/conny_memory.py +243 -0
  63. package/conny_memory_engine.py +292 -0
  64. package/conny_nova_proxy.py +170 -0
  65. package/conny_nuke_robot_phrases.py +493 -0
  66. package/conny_pairing.py +253 -0
  67. package/conny_patch.py +291 -0
  68. package/conny_persona_cli.py +150 -0
  69. package/conny_router.py +308 -0
  70. package/conny_runtime_ops.py +271 -0
  71. package/conny_session.py +516 -0
  72. package/conny_skills/__init__.py +1 -0
  73. package/conny_skills/demo_mode.py +35 -0
  74. package/conny_skills/text_processing.py +1 -0
  75. package/conny_skills/tone_detection.py +1 -0
  76. package/conny_smart_features.py +333 -0
  77. package/conny_studio.py +161 -0
  78. package/conny_sync_fix.py +306 -0
  79. package/conny_tui.py +512 -0
  80. package/conny_tui_select.py +202 -0
  81. package/conny_ultra_config.py +411 -0
  82. package/conny_uncertainty.py +174 -0
  83. package/conny_utils.py +87 -0
  84. package/conny_voice.py +156 -0
  85. package/conny_voice_engine.py +124 -0
  86. package/conny_web_search.py +66 -0
  87. package/conny_weekly_report.py +85 -0
  88. package/conny_worm.py +88 -0
  89. package/core/__init__.py +25 -0
  90. package/ecosystem.config.js +24 -0
  91. package/fix_init.py +27 -0
  92. package/install.sh +78 -0
  93. package/knowledge_base.py +330 -0
  94. package/nova/rules/default.yaml +37 -0
  95. package/nova_bridge.py +509 -0
  96. package/npm/conny.js +471 -0
  97. package/package.json +102 -0
  98. package/personas/conny/base/default.yaml +35 -0
  99. package/personas/conny/base/estetica_whatsapp.yaml +36 -0
  100. package/requirements.txt +14 -0
  101. package/run.sh +47 -0
  102. package/search.py +465 -0
  103. package/smart_handoff.py +1150 -0
  104. package/src/__init__.py +0 -0
  105. package/src/conny/__init__.py +0 -0
  106. package/src/conny/admin/__init__.py +0 -0
  107. package/src/conny/admin/api.py +234 -0
  108. package/src/conny/admin/dashboard.py +772 -0
  109. package/src/conny/api/__init__.py +0 -0
  110. package/src/conny/api/routes.py +8851 -0
  111. package/src/conny/brain/__init__.py +15 -0
  112. package/src/conny/brain/engine.py +804 -0
  113. package/src/conny/brain/learning.py +154 -0
  114. package/src/conny/brain/memory.py +324 -0
  115. package/src/conny/brain/smart_features.py +333 -0
  116. package/src/conny/brain/uncertainty.py +167 -0
  117. package/src/conny/channels/__init__.py +0 -0
  118. package/src/conny/channels/audio.py +316 -0
  119. package/src/conny/channels/cli.py +11795 -0
  120. package/src/conny/channels/logo_art.py +11 -0
  121. package/src/conny/channels/voice.py +156 -0
  122. package/src/conny/core/__init__.py +0 -0
  123. package/src/conny/core/config.py +215 -0
  124. package/src/conny/core/cron.py +72 -0
  125. package/src/conny/core/messenger.py +563 -0
  126. package/src/conny/core/router.py +297 -0
  127. package/src/conny/core/session.py +312 -0
  128. package/src/conny/demo/__init__.py +0 -0
  129. package/src/conny/demo/handler.py +3110 -0
  130. package/src/conny/integrations/__init__.py +19 -0
  131. package/src/conny/integrations/calendar.py +169 -0
  132. package/src/conny/integrations/knowledge.py +312 -0
  133. package/src/conny/integrations/search.py +66 -0
  134. package/src/conny/personas/__init__.py +0 -0
  135. package/src/conny/personas/generator.py +447 -0
  136. package/src/conny/production/__init__.py +0 -0
  137. package/src/conny/production/domino.py +696 -0
  138. package/src/conny/production/guard.py +550 -0
  139. package/src/conny/production/handoff.py +1150 -0
  140. package/src/conny/production/monitor.py +353 -0
  141. package/src/conny/utils/__init__.py +2 -0
  142. package/src/conny/utils/helpers.py +75 -0
  143. package/src/conny/utils/i18n.py +619 -0
  144. package/src/core/admin_engines.py +772 -0
  145. package/src/core/globals.py +11845 -0
  146. package/src/core/orchestrator.py +273 -0
  147. package/src/core/production_monitor.py +353 -0
  148. package/src/core/runtime.py +5487 -0
  149. package/src/domain/onboarding_flow.py +230 -0
  150. package/src/domain/prompts/__init__.py +1 -0
  151. package/src/domain/prompts/prospect_pitch.py +282 -0
  152. package/src/domain/send_guard.py +636 -0
  153. package/src/domain/swarm/queen.py +96 -0
  154. package/src/infrastructure/llm_providers/engine.py +487 -0
  155. package/src/interfaces/mcp_server.py +73 -0
  156. package/src/interfaces/nova_bridge.py +58 -0
  157. package/src/interfaces/web/admin_api.py +1379 -0
  158. package/src/interfaces/web/app.py +9408 -0
  159. package/src/interfaces/web/demo_handler.py +3450 -0
  160. package/src/interfaces/web/static/generate_avatars.py +46 -0
  161. package/v7/__init__.py +46 -0
  162. package/v7/agents/__init__.py +46 -0
  163. package/v7/agents/agenda.py +77 -0
  164. package/v7/agents/base.py +216 -0
  165. package/v7/agents/captacion.py +60 -0
  166. package/v7/agents/conocimiento.py +69 -0
  167. package/v7/agents/escalacion.py +83 -0
  168. package/v7/agents/objeciones.py +109 -0
  169. package/v7/agents/seguimiento.py +71 -0
  170. package/v7/memory/__init__.py +46 -0
  171. package/v7/memory/patient_profile.py +200 -0
  172. package/v7/orchestrator.py +275 -0
  173. package/v7/postprocess.py +127 -0
  174. package/v7/router.py +239 -0
  175. package/verify_conversation_impl.py +48 -0
@@ -0,0 +1,19 @@
1
+ """Integrations subsystem: external services, calendar, search, knowledge."""
2
+
3
+ from conny.integrations.knowledge import (
4
+ KnowledgeBase,
5
+ format_kb_context,
6
+ CHUNK_SIZE,
7
+ CHUNK_OVERLAP,
8
+ MAX_CHUNKS_IN_CONTEXT,
9
+ MIN_RELEVANCE,
10
+ )
11
+
12
+ __all__ = [
13
+ "KnowledgeBase",
14
+ "format_kb_context",
15
+ "CHUNK_SIZE",
16
+ "CHUNK_OVERLAP",
17
+ "MAX_CHUNKS_IN_CONTEXT",
18
+ "MIN_RELEVANCE",
19
+ ]
@@ -0,0 +1,169 @@
1
+ """conny_calendar.py — Google Calendar: check availability + book appointments."""
2
+ from __future__ import annotations
3
+ import json, logging, os
4
+ from datetime import datetime, timedelta
5
+ from pathlib import Path
6
+ from typing import Dict, List, Optional
7
+ import httpx
8
+
9
+ log = logging.getLogger("conny.calendar")
10
+
11
+ VAULT_DIR = Path("integrations/vault")
12
+
13
+
14
+ class ConnyCalendar:
15
+ """Google Calendar integration for booking appointments."""
16
+
17
+ def __init__(self, instance_id: str = "default"):
18
+ self.instance_id = instance_id
19
+ self._tokens_file = VAULT_DIR / instance_id / "google_tokens.json"
20
+ self._creds_file = VAULT_DIR / instance_id / "google_credentials.json"
21
+
22
+ @property
23
+ def is_connected(self) -> bool:
24
+ return self._tokens_file.exists()
25
+
26
+ async def _get_access_token(self) -> Optional[str]:
27
+ """Get valid access token (refresh if needed)."""
28
+ if not self._tokens_file.exists():
29
+ return None
30
+ tokens = json.loads(self._tokens_file.read_text())
31
+ refresh_token = tokens.get("refresh_token")
32
+ if not refresh_token:
33
+ return tokens.get("access_token")
34
+
35
+ # Load client creds
36
+ if not self._creds_file.exists():
37
+ return tokens.get("access_token")
38
+ creds_data = json.loads(self._creds_file.read_text())
39
+ inner = creds_data.get("installed") or creds_data.get("web") or creds_data
40
+ client_id = inner.get("client_id", "")
41
+ client_secret = inner.get("client_secret", "")
42
+
43
+ if not client_id:
44
+ return tokens.get("access_token")
45
+
46
+ # Refresh
47
+ try:
48
+ async with httpx.AsyncClient(timeout=10.0) as client:
49
+ r = await client.post("https://oauth2.googleapis.com/token", data={
50
+ "client_id": client_id,
51
+ "client_secret": client_secret,
52
+ "refresh_token": refresh_token,
53
+ "grant_type": "refresh_token",
54
+ })
55
+ if r.status_code == 200:
56
+ new = r.json()
57
+ tokens["access_token"] = new["access_token"]
58
+ self._tokens_file.write_text(json.dumps(tokens, indent=2))
59
+ return new["access_token"]
60
+ except Exception as e:
61
+ log.warning(f"[calendar] refresh failed: {e}")
62
+
63
+ return tokens.get("access_token")
64
+
65
+ async def get_availability(self, date: str = None, days_ahead: int = 3) -> List[Dict]:
66
+ """Get available slots for the next N days."""
67
+ token = await self._get_access_token()
68
+ if not token:
69
+ return []
70
+
71
+ if not date:
72
+ start = datetime.now()
73
+ else:
74
+ try:
75
+ start = datetime.strptime(date, "%Y-%m-%d")
76
+ except:
77
+ start = datetime.now()
78
+
79
+ end = start + timedelta(days=days_ahead)
80
+ time_min = start.isoformat() + "Z"
81
+ time_max = end.isoformat() + "Z"
82
+
83
+ try:
84
+ async with httpx.AsyncClient(timeout=10.0) as client:
85
+ r = await client.get(
86
+ "https://www.googleapis.com/calendar/v3/calendars/primary/events",
87
+ headers={"Authorization": f"Bearer {token}"},
88
+ params={"timeMin": time_min, "timeMax": time_max,
89
+ "singleEvents": "true", "orderBy": "startTime"},
90
+ )
91
+ if r.status_code == 200:
92
+ events = r.json().get("items", [])
93
+ return [{"summary": e.get("summary", "Ocupado"),
94
+ "start": e.get("start", {}).get("dateTime", ""),
95
+ "end": e.get("end", {}).get("dateTime", "")}
96
+ for e in events]
97
+ else:
98
+ log.warning(f"[calendar] events fetch failed: {r.status_code}")
99
+ return []
100
+ except Exception as e:
101
+ log.error(f"[calendar] error: {e}")
102
+ return []
103
+
104
+ async def create_appointment(self, patient_name: str, phone: str,
105
+ service: str, date_time: str,
106
+ duration_minutes: int = 30) -> Optional[str]:
107
+ """Create a calendar event for an appointment."""
108
+ token = await self._get_access_token()
109
+ if not token:
110
+ return None
111
+
112
+ try:
113
+ start_dt = datetime.fromisoformat(date_time)
114
+ except:
115
+ return None
116
+
117
+ end_dt = start_dt + timedelta(minutes=duration_minutes)
118
+
119
+ event = {
120
+ "summary": f"Cita: {patient_name} — {service}",
121
+ "description": f"Paciente: {patient_name}\nTeléfono: {phone}\nServicio: {service}\nAgendado por: Conny AI",
122
+ "start": {"dateTime": start_dt.isoformat(), "timeZone": "America/Bogota"},
123
+ "end": {"dateTime": end_dt.isoformat(), "timeZone": "America/Bogota"},
124
+ "reminders": {
125
+ "useDefault": False,
126
+ "overrides": [
127
+ {"method": "popup", "minutes": 60},
128
+ {"method": "popup", "minutes": 1440},
129
+ ],
130
+ },
131
+ }
132
+
133
+ try:
134
+ async with httpx.AsyncClient(timeout=10.0) as client:
135
+ r = await client.post(
136
+ "https://www.googleapis.com/calendar/v3/calendars/primary/events",
137
+ headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
138
+ json=event,
139
+ )
140
+ if r.status_code in (200, 201):
141
+ created = r.json()
142
+ log.info(f"[calendar] appointment created: {created.get('id')}")
143
+ return created.get("id")
144
+ else:
145
+ log.error(f"[calendar] create failed: {r.status_code} {r.text[:200]}")
146
+ return None
147
+ except Exception as e:
148
+ log.error(f"[calendar] create error: {e}")
149
+ return None
150
+
151
+ async def get_availability_summary(self, days_ahead: int = 3) -> str:
152
+ """Get human-readable availability summary for Conny to use."""
153
+ events = await self.get_availability(days_ahead=days_ahead)
154
+ if not events:
155
+ return "sin citas programadas los próximos días — disponibilidad abierta"
156
+
157
+ busy_times = []
158
+ for e in events[:10]:
159
+ start = e.get("start", "")
160
+ if start:
161
+ try:
162
+ dt = datetime.fromisoformat(start.replace("Z", ""))
163
+ busy_times.append(f"{dt.strftime('%A %d')} a las {dt.strftime('%I:%M%p')}: {e['summary']}")
164
+ except:
165
+ pass
166
+
167
+ if busy_times:
168
+ return "Citas existentes:\n" + "\n".join(busy_times)
169
+ return "calendario disponible"
@@ -0,0 +1,312 @@
1
+ """Knowledge base: clinic document ingestion, chunking, and retrieval."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import re
8
+ import sqlite3
9
+ from typing import Dict, List, Tuple
10
+
11
+ log = logging.getLogger("conny.kb")
12
+
13
+ # Tamano de chunks (en caracteres)
14
+ CHUNK_SIZE = 400
15
+ CHUNK_OVERLAP = 80
16
+
17
+ # Cuantos chunks incluir en el contexto del LLM
18
+ MAX_CHUNKS_IN_CONTEXT = 4
19
+
20
+ # Minima relevancia para incluir un chunk (0-1)
21
+ MIN_RELEVANCE = 0.15
22
+
23
+
24
+ # --- Chunking ----------------------------------------------------------------
25
+
26
+ def _split_into_chunks(text: str) -> List[str]:
27
+ """
28
+ Divide el texto en chunks con overlap.
29
+ Intenta partir en parrafos/oraciones completas.
30
+ """
31
+ # Limpiar texto
32
+ text = re.sub(r'\r\n', '\n', text)
33
+ text = re.sub(r'\n{3,}', '\n\n', text)
34
+ text = text.strip()
35
+
36
+ # Primero intentar partir por parrafos
37
+ paragraphs = [p.strip() for p in re.split(r'\n\n+', text) if p.strip()]
38
+
39
+ chunks = []
40
+ current = ""
41
+
42
+ for para in paragraphs:
43
+ if len(current) + len(para) < CHUNK_SIZE:
44
+ current = (current + "\n\n" + para).strip()
45
+ else:
46
+ if current:
47
+ chunks.append(current)
48
+ # Si el parrafo es muy largo, partirlo por oraciones
49
+ if len(para) > CHUNK_SIZE:
50
+ sentences = re.split(r'(?<=[.!?])\s+', para)
51
+ buf = ""
52
+ for s in sentences:
53
+ if len(buf) + len(s) < CHUNK_SIZE:
54
+ buf = (buf + " " + s).strip()
55
+ else:
56
+ if buf:
57
+ chunks.append(buf)
58
+ buf = s
59
+ if buf:
60
+ current = buf
61
+ else:
62
+ current = ""
63
+ else:
64
+ current = para
65
+
66
+ if current:
67
+ chunks.append(current)
68
+
69
+ return [c for c in chunks if len(c) > 30]
70
+
71
+
72
+ def _extract_keywords(text: str) -> List[str]:
73
+ """
74
+ Extrae palabras clave de un texto para indexacion.
75
+ Ignora stopwords comunes en espanol.
76
+ """
77
+ STOPWORDS = {
78
+ "el", "la", "los", "las", "un", "una", "unos", "unas",
79
+ "de", "del", "al", "a", "en", "con", "por", "para", "que",
80
+ "como", "cuando", "donde", "cual", "quien", "este", "esta",
81
+ "ese", "esa", "mi", "tu", "su", "y", "o", "pero", "si", "no",
82
+ "mas", "muy", "ya", "es", "son", "era", "fue", "ser", "estar",
83
+ "hay", "han", "hemos", "tienen", "tiene", "hace", "hacer",
84
+ "puede", "pueden", "se", "lo", "le", "les", "me", "te", "nos",
85
+ "sobre", "entre", "hasta", "desde", "durante", "antes", "despues",
86
+ "tambien", "ademas", "aunque", "porque", "sino", "sino", "ni",
87
+ "todo", "toda", "todos", "todas", "cada", "otro", "otra",
88
+ }
89
+ words = re.findall(r'\b[a-záéíóúüñA-ZÁÉÍÓÚÜÑ]{3,}\b', text.lower())
90
+ keywords = [w for w in words if w not in STOPWORDS]
91
+ # Frecuencia
92
+ freq: Dict[str, int] = {}
93
+ for w in keywords:
94
+ freq[w] = freq.get(w, 0) + 1
95
+ # Top palabras por frecuencia
96
+ return sorted(freq, key=freq.get, reverse=True)[:20]
97
+
98
+
99
+ def _score_chunk(chunk_keywords: List[str], query_keywords: List[str]) -> float:
100
+ """
101
+ Calcula relevancia de un chunk para una query.
102
+ Score de 0 a 1.
103
+ """
104
+ if not query_keywords or not chunk_keywords:
105
+ return 0.0
106
+ chunk_set = set(chunk_keywords)
107
+ matches = sum(1 for kw in query_keywords if kw in chunk_set)
108
+ # Bonus por matches exactos de frases
109
+ return min(matches / max(len(query_keywords), 1), 1.0)
110
+
111
+
112
+ # --- KnowledgeBase Manager ---------------------------------------------------
113
+
114
+ class KnowledgeBase:
115
+ """
116
+ Gestor de la base de conocimiento de la clinica.
117
+ Se inicializa con la conexion DB de conny.
118
+ """
119
+
120
+ def __init__(self, db_path: str):
121
+ self.db_path = db_path
122
+ self._ensure_tables()
123
+
124
+ def _conn(self) -> sqlite3.Connection:
125
+ conn = sqlite3.connect(self.db_path)
126
+ conn.row_factory = sqlite3.Row
127
+ return conn
128
+
129
+ def _ensure_tables(self):
130
+ """Crea las tablas KB si no existen (migracion segura)."""
131
+ with self._conn() as c:
132
+ c.executescript("""
133
+ -- Documento maestro de la clinica
134
+ CREATE TABLE IF NOT EXISTS knowledge_base (
135
+ id INTEGER PRIMARY KEY,
136
+ raw_text TEXT DEFAULT '',
137
+ chunk_count INTEGER DEFAULT 0,
138
+ word_count INTEGER DEFAULT 0,
139
+ created_at TEXT DEFAULT (datetime('now')),
140
+ updated_at TEXT DEFAULT (datetime('now'))
141
+ );
142
+ INSERT OR IGNORE INTO knowledge_base (id) VALUES (1);
143
+
144
+ -- Chunks indexados para recuperacion rapida
145
+ CREATE TABLE IF NOT EXISTS kb_chunks (
146
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
147
+ content TEXT NOT NULL,
148
+ keywords TEXT DEFAULT '[]',
149
+ section_hint TEXT DEFAULT '',
150
+ created_at TEXT DEFAULT (datetime('now'))
151
+ );
152
+ CREATE INDEX IF NOT EXISTS idx_kb_chunks_id ON kb_chunks(id);
153
+ """)
154
+ log.info("[kb] tablas listas")
155
+
156
+ # --- Ingestion ------------------------------------------------------------
157
+
158
+ def save_learned_fact(self, question: str, answer: str, source: str = "admin") -> Dict:
159
+ """Guarda un hecho aprendido directamente de la interaccion con el admin."""
160
+ fact_text = f"\n\n--- DATO APRENDIDO ({source}) ---\nPREGUNTA: {question}\nRESPUESTA: {answer}"
161
+ return self.append(fact_text)
162
+
163
+ def ingest(self, raw_text: str) -> Dict:
164
+ """
165
+ Procesa y guarda el documento maestro de la clinica.
166
+ Reemplaza cualquier KB anterior.
167
+ Retorna stats del ingestion.
168
+ """
169
+ if not raw_text or not raw_text.strip():
170
+ return {"ok": False, "error": "texto vacio"}
171
+
172
+ raw_text = raw_text.strip()
173
+ chunks = _split_into_chunks(raw_text)
174
+ word_count = len(raw_text.split())
175
+
176
+ with self._conn() as c:
177
+ # Limpiar chunks anteriores
178
+ c.execute("DELETE FROM kb_chunks")
179
+
180
+ # Guardar documento maestro
181
+ c.execute("""
182
+ UPDATE knowledge_base
183
+ SET raw_text=?, chunk_count=?, word_count=?, updated_at=datetime('now')
184
+ WHERE id=1
185
+ """, (raw_text, len(chunks), word_count))
186
+
187
+ # Insertar chunks con keywords
188
+ for i, chunk in enumerate(chunks):
189
+ keywords = _extract_keywords(chunk)
190
+ # Detectar hint de seccion (primera linea si parece titulo)
191
+ first_line = chunk.split('\n')[0].strip()
192
+ section_hint = first_line if len(first_line) < 60 and not first_line.endswith('.') else ""
193
+
194
+ c.execute("""
195
+ INSERT INTO kb_chunks (content, keywords, section_hint)
196
+ VALUES (?, ?, ?)
197
+ """, (chunk, json.dumps(keywords, ensure_ascii=False), section_hint))
198
+
199
+ log.info(f"[kb] ingested {len(chunks)} chunks, {word_count} palabras")
200
+ return {
201
+ "ok": True,
202
+ "chunks": len(chunks),
203
+ "words": word_count,
204
+ "chars": len(raw_text),
205
+ }
206
+
207
+ def append(self, additional_text: str) -> Dict:
208
+ """Agrega texto al KB existente sin borrar lo anterior."""
209
+ existing = self.get_raw()
210
+ combined = (existing + "\n\n" + additional_text).strip() if existing else additional_text
211
+ return self.ingest(combined)
212
+
213
+ # --- Recuperacion ---------------------------------------------------------
214
+
215
+ def retrieve(self, query: str, max_chunks: int = MAX_CHUNKS_IN_CONTEXT) -> str:
216
+ """
217
+ Recupera los chunks mas relevantes para una query.
218
+ Retorna contexto listo para inyectar al LLM.
219
+ """
220
+ if not self.has_content():
221
+ return ""
222
+
223
+ query_keywords = _extract_keywords(query)
224
+ if not query_keywords:
225
+ # Sin keywords claras: retornar primeros chunks (intro de la clinica)
226
+ return self._get_first_chunks(max_chunks)
227
+
228
+ with self._conn() as c:
229
+ rows = c.execute("SELECT id, content, keywords FROM kb_chunks").fetchall()
230
+
231
+ # Calcular relevancia de cada chunk
232
+ scored: List[Tuple[float, str]] = []
233
+ for row in rows:
234
+ try:
235
+ chunk_kws = json.loads(row["keywords"] or "[]")
236
+ except Exception:
237
+ chunk_kws = []
238
+ score = _score_chunk(chunk_kws, query_keywords)
239
+ if score >= MIN_RELEVANCE:
240
+ scored.append((score, row["content"]))
241
+
242
+ if not scored:
243
+ # Ninguno relevante: dar los primeros (contexto general)
244
+ return self._get_first_chunks(max_chunks)
245
+
246
+ # Ordenar por relevancia descendente
247
+ scored.sort(key=lambda x: x[0], reverse=True)
248
+ selected = [content for _, content in scored[:max_chunks]]
249
+
250
+ return "\n\n---\n\n".join(selected)
251
+
252
+ def _get_first_chunks(self, n: int) -> str:
253
+ """Retorna los primeros n chunks (contexto general de la clinica)."""
254
+ with self._conn() as c:
255
+ rows = c.execute(
256
+ "SELECT content FROM kb_chunks ORDER BY id LIMIT ?", (n,)
257
+ ).fetchall()
258
+ return "\n\n---\n\n".join(r["content"] for r in rows)
259
+
260
+ def get_raw(self) -> str:
261
+ """Retorna el documento completo."""
262
+ with self._conn() as c:
263
+ row = c.execute("SELECT raw_text FROM knowledge_base WHERE id=1").fetchone()
264
+ return row["raw_text"] if row else ""
265
+
266
+ def get_stats(self) -> Dict:
267
+ with self._conn() as c:
268
+ row = c.execute(
269
+ "SELECT chunk_count, word_count, updated_at FROM knowledge_base WHERE id=1"
270
+ ).fetchone()
271
+ if row:
272
+ return {
273
+ "chunks": row["chunk_count"],
274
+ "words": row["word_count"],
275
+ "updated_at": row["updated_at"],
276
+ }
277
+ return {"chunks": 0, "words": 0, "updated_at": None}
278
+
279
+ def has_content(self) -> bool:
280
+ with self._conn() as c:
281
+ row = c.execute(
282
+ "SELECT chunk_count FROM knowledge_base WHERE id=1"
283
+ ).fetchone()
284
+ return bool(row and row["chunk_count"] > 0)
285
+
286
+ def clear(self):
287
+ """Borra el KB completo."""
288
+ with self._conn() as c:
289
+ c.execute("DELETE FROM kb_chunks")
290
+ c.execute("""
291
+ UPDATE knowledge_base
292
+ SET raw_text='', chunk_count=0, word_count=0, updated_at=datetime('now')
293
+ WHERE id=1
294
+ """)
295
+ log.info("[kb] KB limpiado")
296
+
297
+
298
+ # --- Formateo para el LLM ----------------------------------------------------
299
+
300
+ def format_kb_context(kb_text: str) -> str:
301
+ """
302
+ Envuelve el contexto del KB en un bloque claro para el LLM.
303
+ """
304
+ if not kb_text:
305
+ return ""
306
+ return (
307
+ "=== BASE DE CONOCIMIENTO DE LA CLINICA ===\n"
308
+ "(Esta es informacion oficial de la clinica. Usala con maxima prioridad "
309
+ "antes de buscar en internet o inventar datos.)\n\n"
310
+ f"{kb_text}\n"
311
+ "=== FIN BASE DE CONOCIMIENTO ==="
312
+ )
@@ -0,0 +1,66 @@
1
+ """conny_web_search.py — Web search via Brave Search API."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ import os
6
+ from typing import List, Dict, Optional
7
+
8
+ import httpx
9
+
10
+ log = logging.getLogger("conny.web_search")
11
+
12
+ BRAVE_API_KEY = os.getenv("BRAVE_API_KEY", "")
13
+ BRAVE_URL = "https://api.search.brave.com/res/v1/web/search"
14
+
15
+
16
+ async def search_web(query: str, num_results: int = 5) -> List[Dict[str, str]]:
17
+ """Search the web using Brave Search API. Returns list of {title, snippet, url}."""
18
+ if not BRAVE_API_KEY:
19
+ log.debug("[web_search] no BRAVE_API_KEY configured")
20
+ return []
21
+
22
+ try:
23
+ async with httpx.AsyncClient(timeout=10.0) as client:
24
+ r = await client.get(
25
+ BRAVE_URL,
26
+ params={"q": query, "count": num_results},
27
+ headers={"X-Subscription-Token": BRAVE_API_KEY, "Accept": "application/json"},
28
+ )
29
+ if r.status_code != 200:
30
+ log.warning(f"[web_search] Brave returned {r.status_code}")
31
+ return []
32
+ data = r.json()
33
+ results = []
34
+ for item in data.get("web", {}).get("results", [])[:num_results]:
35
+ results.append({
36
+ "title": item.get("title", ""),
37
+ "snippet": item.get("description", ""),
38
+ "url": item.get("url", ""),
39
+ })
40
+ return results
41
+ except Exception as e:
42
+ log.error(f"[web_search] error: {e}")
43
+ return []
44
+
45
+
46
+ async def search_business(business_name: str, city: str = "Medellín") -> str:
47
+ """Search for a business and return a summary string for LLM context."""
48
+ query = f"{business_name} {city} servicios precios horario"
49
+ results = await search_web(query, num_results=5)
50
+ if not results:
51
+ return ""
52
+ lines = []
53
+ for r in results:
54
+ lines.append(f"- {r['title']}: {r['snippet'][:150]}")
55
+ return "\n".join(lines)
56
+
57
+
58
+ async def search_topic(topic: str) -> str:
59
+ """General topic search, returns formatted results."""
60
+ results = await search_web(topic, num_results=3)
61
+ if not results:
62
+ return ""
63
+ lines = []
64
+ for r in results:
65
+ lines.append(f"- {r['title']}: {r['snippet'][:200]}")
66
+ return "\n".join(lines)
File without changes