@innvisor/conny-ai 9.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +68 -0
- package/CHANGELOG.md +54 -0
- package/LICENSE +21 -0
- package/README.md +369 -0
- package/brand-assets/A_dark_luxury_web_background_202605210700.jpeg +0 -0
- package/brand-assets/Conny.web.logo.png +0 -0
- package/brand-assets/Logo_Conny_Petalo_Claro.png +0 -0
- package/brand-assets/cl-nica-de-las-am-ricas/manifest.json +22 -0
- package/brand-assets/cl-nica-de-las-am-ricas/processed/business-identity.txt +11 -0
- package/brand-assets/cl-nica-de-las-am-ricas/raw/business-identity.txt +11 -0
- package/brand-assets/cl-nica-las-am-ricas/manifest.json +22 -0
- package/brand-assets/cl-nica-las-am-ricas/processed/business-identity.txt +11 -0
- package/brand-assets/cl-nica-las-am-ricas/raw/business-identity.txt +11 -0
- package/brand-assets/conny-demo/manifest.json +22 -0
- package/brand-assets/conny-demo/processed/business-identity.txt +7 -0
- package/brand-assets/conny-demo/raw/business-identity.txt +7 -0
- package/brand-assets/conny-logo.png +0 -0
- package/brand-assets/web.background.png +0 -0
- package/brand_assets.py +323 -0
- package/conny +28 -0
- package/conny-chat.py +579 -0
- package/conny-omni.py +3843 -0
- package/conny.py +113 -0
- package/conny_agents/__init__.py +1 -0
- package/conny_agents/agenda.py +1 -0
- package/conny_agents/captacion.py +1 -0
- package/conny_agents/conocimiento.py +1 -0
- package/conny_agents/escalacion.py +1 -0
- package/conny_agents/objeciones.py +1 -0
- package/conny_agents/seguimiento.py +1 -0
- package/conny_app.py +287 -0
- package/conny_audio.py +350 -0
- package/conny_audio_learn.py +84 -0
- package/conny_brain_v10.py +804 -0
- package/conny_bridge.py +656 -0
- package/conny_calendar.py +169 -0
- package/conny_cli.py +11784 -0
- package/conny_cli_bb.py +437 -0
- package/conny_commands.py +243 -0
- package/conny_config.py +215 -0
- package/conny_core/__init__.py +3 -0
- package/conny_core/conversation_engine.py +446 -0
- package/conny_core/first_turn_ops.py +287 -0
- package/conny_core/persona_registry.py +157 -0
- package/conny_core/prompt_ops.py +561 -0
- package/conny_cron.py +72 -0
- package/conny_demo_v2.py +209 -0
- package/conny_demo_voice.py +134 -0
- package/conny_design.py +43 -0
- package/conny_doctor.py +319 -0
- package/conny_domino.py +696 -0
- package/conny_generator.py +447 -0
- package/conny_google_auth.py +159 -0
- package/conny_i18n.py +619 -0
- package/conny_init.py +509 -0
- package/conny_integrations/__init__.py +4 -0
- package/conny_integrations/llm.py +1 -0
- package/conny_integrations/vault.py +77 -0
- package/conny_integrations/whatsapp.py +1 -0
- package/conny_intelligence.py +65 -0
- package/conny_learning.py +154 -0
- package/conny_memory.py +243 -0
- package/conny_memory_engine.py +292 -0
- package/conny_nova_proxy.py +170 -0
- package/conny_nuke_robot_phrases.py +493 -0
- package/conny_pairing.py +253 -0
- package/conny_patch.py +291 -0
- package/conny_persona_cli.py +150 -0
- package/conny_router.py +308 -0
- package/conny_runtime_ops.py +271 -0
- package/conny_session.py +516 -0
- package/conny_skills/__init__.py +1 -0
- package/conny_skills/demo_mode.py +35 -0
- package/conny_skills/text_processing.py +1 -0
- package/conny_skills/tone_detection.py +1 -0
- package/conny_smart_features.py +333 -0
- package/conny_studio.py +161 -0
- package/conny_sync_fix.py +306 -0
- package/conny_tui.py +512 -0
- package/conny_tui_select.py +202 -0
- package/conny_ultra_config.py +411 -0
- package/conny_uncertainty.py +174 -0
- package/conny_utils.py +87 -0
- package/conny_voice.py +156 -0
- package/conny_voice_engine.py +124 -0
- package/conny_web_search.py +66 -0
- package/conny_weekly_report.py +85 -0
- package/conny_worm.py +88 -0
- package/core/__init__.py +25 -0
- package/ecosystem.config.js +24 -0
- package/fix_init.py +27 -0
- package/install.sh +78 -0
- package/knowledge_base.py +330 -0
- package/nova/rules/default.yaml +37 -0
- package/nova_bridge.py +509 -0
- package/npm/conny.js +471 -0
- package/package.json +102 -0
- package/personas/conny/base/default.yaml +35 -0
- package/personas/conny/base/estetica_whatsapp.yaml +36 -0
- package/requirements.txt +14 -0
- package/run.sh +47 -0
- package/search.py +465 -0
- package/smart_handoff.py +1150 -0
- package/src/__init__.py +0 -0
- package/src/conny/__init__.py +0 -0
- package/src/conny/admin/__init__.py +0 -0
- package/src/conny/admin/api.py +234 -0
- package/src/conny/admin/dashboard.py +772 -0
- package/src/conny/api/__init__.py +0 -0
- package/src/conny/api/routes.py +8851 -0
- package/src/conny/brain/__init__.py +15 -0
- package/src/conny/brain/engine.py +804 -0
- package/src/conny/brain/learning.py +154 -0
- package/src/conny/brain/memory.py +324 -0
- package/src/conny/brain/smart_features.py +333 -0
- package/src/conny/brain/uncertainty.py +167 -0
- package/src/conny/channels/__init__.py +0 -0
- package/src/conny/channels/audio.py +316 -0
- package/src/conny/channels/cli.py +11795 -0
- package/src/conny/channels/logo_art.py +11 -0
- package/src/conny/channels/voice.py +156 -0
- package/src/conny/core/__init__.py +0 -0
- package/src/conny/core/config.py +215 -0
- package/src/conny/core/cron.py +72 -0
- package/src/conny/core/messenger.py +563 -0
- package/src/conny/core/router.py +297 -0
- package/src/conny/core/session.py +312 -0
- package/src/conny/demo/__init__.py +0 -0
- package/src/conny/demo/handler.py +3110 -0
- package/src/conny/integrations/__init__.py +19 -0
- package/src/conny/integrations/calendar.py +169 -0
- package/src/conny/integrations/knowledge.py +312 -0
- package/src/conny/integrations/search.py +66 -0
- package/src/conny/personas/__init__.py +0 -0
- package/src/conny/personas/generator.py +447 -0
- package/src/conny/production/__init__.py +0 -0
- package/src/conny/production/domino.py +696 -0
- package/src/conny/production/guard.py +550 -0
- package/src/conny/production/handoff.py +1150 -0
- package/src/conny/production/monitor.py +353 -0
- package/src/conny/utils/__init__.py +2 -0
- package/src/conny/utils/helpers.py +75 -0
- package/src/conny/utils/i18n.py +619 -0
- package/src/core/admin_engines.py +772 -0
- package/src/core/globals.py +11845 -0
- package/src/core/orchestrator.py +273 -0
- package/src/core/production_monitor.py +353 -0
- package/src/core/runtime.py +5487 -0
- package/src/domain/onboarding_flow.py +230 -0
- package/src/domain/prompts/__init__.py +1 -0
- package/src/domain/prompts/prospect_pitch.py +282 -0
- package/src/domain/send_guard.py +636 -0
- package/src/domain/swarm/queen.py +96 -0
- package/src/infrastructure/llm_providers/engine.py +487 -0
- package/src/interfaces/mcp_server.py +73 -0
- package/src/interfaces/nova_bridge.py +58 -0
- package/src/interfaces/web/admin_api.py +1379 -0
- package/src/interfaces/web/app.py +9408 -0
- package/src/interfaces/web/demo_handler.py +3450 -0
- package/src/interfaces/web/static/generate_avatars.py +46 -0
- package/v7/__init__.py +46 -0
- package/v7/agents/__init__.py +46 -0
- package/v7/agents/agenda.py +77 -0
- package/v7/agents/base.py +216 -0
- package/v7/agents/captacion.py +60 -0
- package/v7/agents/conocimiento.py +69 -0
- package/v7/agents/escalacion.py +83 -0
- package/v7/agents/objeciones.py +109 -0
- package/v7/agents/seguimiento.py +71 -0
- package/v7/memory/__init__.py +46 -0
- package/v7/memory/patient_profile.py +200 -0
- package/v7/orchestrator.py +275 -0
- package/v7/postprocess.py +127 -0
- package/v7/router.py +239 -0
- package/verify_conversation_impl.py +48 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Dict, Any, List, Optional
|
|
4
|
+
from src.core.globals import llm_engine
|
|
5
|
+
|
|
6
|
+
log = logging.getLogger("conny.swarm")
|
|
7
|
+
|
|
8
|
+
class Agent:
|
|
9
|
+
def __init__(self, name: str, role: str, instruction: str):
|
|
10
|
+
self.name = name
|
|
11
|
+
self.role = role
|
|
12
|
+
self.instruction = instruction
|
|
13
|
+
|
|
14
|
+
async def execute(self, task: str, context: Dict[str, Any]) -> str:
|
|
15
|
+
prompt = f"Role: {self.role}\nInstruction: {self.instruction}\nContext: {context}\nTask: {task}"
|
|
16
|
+
log.info(f"[{self.name}] Executing task...")
|
|
17
|
+
# Llama al motor LLM real usando complete
|
|
18
|
+
messages = [{"role": "user", "content": prompt}]
|
|
19
|
+
response, _ = await llm_engine.complete(messages, model_tier="fast")
|
|
20
|
+
log.info(f"[{self.name}] Raw response: {response}")
|
|
21
|
+
return response
|
|
22
|
+
|
|
23
|
+
class QueenCoordinator:
|
|
24
|
+
"""
|
|
25
|
+
Coordina el enjambre jerárquico (Swarm V3).
|
|
26
|
+
Evita el 'drift' (desvío de instrucciones) dividiendo el trabajo.
|
|
27
|
+
"""
|
|
28
|
+
def __init__(self):
|
|
29
|
+
self.research_agent = Agent(
|
|
30
|
+
name="ResearchAgent",
|
|
31
|
+
role="Analista de Conocimiento",
|
|
32
|
+
instruction="Busca en la base de datos de FAQs o en el contexto del paciente para extraer hechos exactos."
|
|
33
|
+
)
|
|
34
|
+
self.response_agent = Agent(
|
|
35
|
+
name="ResponseAgent",
|
|
36
|
+
role="Generador de Respuestas",
|
|
37
|
+
instruction="Sintetiza la respuesta final basándose únicamente en los hechos encontrados, adoptando el tono de la clínica. Evita cualquier frase preprogramada o robótica."
|
|
38
|
+
)
|
|
39
|
+
self.reviewer_agent = Agent(
|
|
40
|
+
name="ReviewerAgent",
|
|
41
|
+
role="Auditor de Calidad",
|
|
42
|
+
instruction="Verifica la respuesta generada. Quita cualquier fragmento que suene a robot o call center. Devuelve ÚNICAMENTE un array de strings en formato JSON con los mensajes finales que recibirá el cliente (ejemplo: [\"¡Hola! Claro que sí.\", \"¿Te viene bien por la tarde?\"]). No devuelvas bloques de auditoría, objetos con llaves o explicaciones extra."
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
def _clean_json_text(self, text: str) -> str:
|
|
46
|
+
"""Limpia bloques de código markdown si los hay."""
|
|
47
|
+
text = text.strip()
|
|
48
|
+
if text.startswith("```"):
|
|
49
|
+
# Quitar primera línea de bloque de código
|
|
50
|
+
lines = text.splitlines()
|
|
51
|
+
if lines[0].startswith("```"):
|
|
52
|
+
lines = lines[1:]
|
|
53
|
+
if lines and lines[-1].strip() == "```":
|
|
54
|
+
lines = lines[:-1]
|
|
55
|
+
text = "\n".join(lines).strip()
|
|
56
|
+
return text
|
|
57
|
+
|
|
58
|
+
async def process(self, user_message: str, clinic_context: Dict[str, Any]) -> List[str]:
|
|
59
|
+
log.info("[swarm] Queen ha recibido un mensaje, coordinando enjambre...")
|
|
60
|
+
|
|
61
|
+
# 1. Investigación
|
|
62
|
+
facts = await self.research_agent.execute(f"Extrae datos relevantes para responder a: '{user_message}'", clinic_context)
|
|
63
|
+
|
|
64
|
+
# 2. Generación
|
|
65
|
+
draft_context = {"user_message": user_message, "facts": facts, "clinic": clinic_context}
|
|
66
|
+
draft_response = await self.response_agent.execute("Escribe la respuesta final en JSON", draft_context)
|
|
67
|
+
|
|
68
|
+
# 3. Revisión
|
|
69
|
+
final_response = await self.reviewer_agent.execute("Verifica esta respuesta y devuelve el JSON final de mensajes", {"draft": draft_response})
|
|
70
|
+
|
|
71
|
+
cleaned = self._clean_json_text(final_response)
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
import json
|
|
75
|
+
parsed = json.loads(cleaned)
|
|
76
|
+
if isinstance(parsed, list):
|
|
77
|
+
# Extraer strings de los elementos (en caso de que sean objetos o arrays anidados)
|
|
78
|
+
result = []
|
|
79
|
+
for item in parsed:
|
|
80
|
+
if isinstance(item, str):
|
|
81
|
+
result.append(item)
|
|
82
|
+
elif isinstance(item, dict):
|
|
83
|
+
# En caso de que el LLM insista en devolver objetos con llave 'response'
|
|
84
|
+
val = item.get("response") or item.get("text") or str(item)
|
|
85
|
+
result.append(val)
|
|
86
|
+
return result
|
|
87
|
+
elif isinstance(parsed, dict):
|
|
88
|
+
val = parsed.get("response") or parsed.get("text")
|
|
89
|
+
if val:
|
|
90
|
+
return [val]
|
|
91
|
+
return [cleaned]
|
|
92
|
+
except Exception as e:
|
|
93
|
+
log.warning(f"[swarm] Error al parsear JSON del ReviewerAgent ({e}). Usando fallback de texto crudo.")
|
|
94
|
+
return [cleaned]
|
|
95
|
+
|
|
96
|
+
swarm_queen = QueenCoordinator()
|
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import time
|
|
3
|
+
import httpx
|
|
4
|
+
import hashlib
|
|
5
|
+
import re
|
|
6
|
+
import logging
|
|
7
|
+
from typing import List, Dict, Tuple, Any
|
|
8
|
+
from conny_config import Config
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger("conny.llm")
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
from conny import model_manager
|
|
14
|
+
except ImportError:
|
|
15
|
+
model_manager = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LLMProvider:
|
|
19
|
+
"""Interfaz base para proveedores LLM."""
|
|
20
|
+
name: str = "base"
|
|
21
|
+
|
|
22
|
+
async def complete(self, messages: List[Dict], model: str,
|
|
23
|
+
temperature: float = 0.7, max_tokens: int = 1000,
|
|
24
|
+
**kwargs) -> Tuple[str, Dict]:
|
|
25
|
+
raise NotImplementedError
|
|
26
|
+
|
|
27
|
+
async def embed(self, text: str) -> List[float]:
|
|
28
|
+
raise NotImplementedError
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _parse_http_json_response(response: httpx.Response, provider_name: str) -> Dict[str, Any]:
|
|
32
|
+
body = response.text or ""
|
|
33
|
+
stripped = body.strip()
|
|
34
|
+
content_type = (response.headers.get("content-type") or "").strip() or "unknown"
|
|
35
|
+
if not stripped:
|
|
36
|
+
raise ValueError(f"{provider_name} devolvió body vacío [{content_type}]")
|
|
37
|
+
try:
|
|
38
|
+
parsed = response.json()
|
|
39
|
+
except Exception as exc:
|
|
40
|
+
snippet = re.sub(r"\s+", " ", stripped)[:220]
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"{provider_name} devolvió body no-JSON [{content_type}]: {snippet}"
|
|
43
|
+
) from exc
|
|
44
|
+
if not isinstance(parsed, dict):
|
|
45
|
+
raise ValueError(f"{provider_name} devolvió JSON no-objeto [{content_type}]")
|
|
46
|
+
return parsed
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class GroqProvider(LLMProvider):
|
|
50
|
+
"""Groq — el mas rapido (~500ms). Llama-3.3-70b."""
|
|
51
|
+
name = "groq"
|
|
52
|
+
BASE = "https://api.groq.com/openai/v1"
|
|
53
|
+
MDLS = {"reasoning": "llama-3.3-70b-versatile",
|
|
54
|
+
"fast": "llama-3.3-70b-versatile",
|
|
55
|
+
"lite": "llama-3.1-8b-instant"}
|
|
56
|
+
|
|
57
|
+
def __init__(self, key: str): self.key = key
|
|
58
|
+
|
|
59
|
+
async def complete(self, messages, model="fast", temperature=0.7, max_tokens=1000, **kw):
|
|
60
|
+
start = time.time()
|
|
61
|
+
if isinstance(model, str) and model in self.MDLS:
|
|
62
|
+
m = self.MDLS[model]
|
|
63
|
+
elif isinstance(model, str) and model.startswith("groq/"):
|
|
64
|
+
m = model.split("/", 1)[1]
|
|
65
|
+
elif isinstance(model, str) and model not in ("fast", "reasoning", "lite"):
|
|
66
|
+
m = model
|
|
67
|
+
else:
|
|
68
|
+
m = self.MDLS["fast"]
|
|
69
|
+
async with httpx.AsyncClient(timeout=15.0) as c:
|
|
70
|
+
r = await c.post(f"{self.BASE}/chat/completions",
|
|
71
|
+
headers={"Authorization": f"Bearer {self.key}", "Content-Type": "application/json"},
|
|
72
|
+
json={"model": m, "messages": messages, "temperature": temperature, "max_tokens": max_tokens})
|
|
73
|
+
r.raise_for_status()
|
|
74
|
+
payload = _parse_http_json_response(r, self.name)
|
|
75
|
+
text = payload["choices"][0]["message"]["content"].strip()
|
|
76
|
+
return text, {"model": m, "latency_ms": int((time.time()-start)*1000), "provider": "groq"}
|
|
77
|
+
|
|
78
|
+
async def embed(self, text):
|
|
79
|
+
raise NotImplementedError
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class GeminiProvider(LLMProvider):
|
|
83
|
+
"""Google Gemini directo. Soporta rotacion de claves."""
|
|
84
|
+
name = "gemini"
|
|
85
|
+
BASE = "https://generativelanguage.googleapis.com/v1beta"
|
|
86
|
+
MDLS = {"reasoning": "gemini-2.5-pro", # Pro para razonamiento complejo
|
|
87
|
+
"fast": "gemini-2.5-flash", # Flash para velocidad
|
|
88
|
+
"lite": "gemini-2.5-flash-lite"}
|
|
89
|
+
|
|
90
|
+
def __init__(self, key: str, label: str = "gemini"):
|
|
91
|
+
self.key = key
|
|
92
|
+
self.name = label
|
|
93
|
+
|
|
94
|
+
async def complete(self, messages, model="fast", temperature=0.7, max_tokens=1000, **kw):
|
|
95
|
+
start = time.time()
|
|
96
|
+
if isinstance(model, str) and model in self.MDLS:
|
|
97
|
+
gm = self.MDLS[model]
|
|
98
|
+
elif isinstance(model, str) and model.startswith("google/"):
|
|
99
|
+
gm = model.split("/", 1)[1]
|
|
100
|
+
elif isinstance(model, str) and model.startswith("gemini-"):
|
|
101
|
+
gm = model
|
|
102
|
+
else:
|
|
103
|
+
gm = self.MDLS["fast"]
|
|
104
|
+
system_parts, contents = [], []
|
|
105
|
+
for m in messages:
|
|
106
|
+
if m["role"] == "system":
|
|
107
|
+
system_parts.append({"text": m["content"]})
|
|
108
|
+
elif m["role"] == "user":
|
|
109
|
+
contents.append({"role": "user", "parts": [{"text": m["content"]}]})
|
|
110
|
+
elif m["role"] == "assistant":
|
|
111
|
+
contents.append({"role": "model", "parts": [{"text": m["content"]}]})
|
|
112
|
+
gen_config = {"temperature": temperature, "maxOutputTokens": max_tokens}
|
|
113
|
+
if "gemini-2.5-flash" in gm:
|
|
114
|
+
gen_config["thinkingConfig"] = {"thinkingBudget": 0}
|
|
115
|
+
elif "gemini-2.5-pro" in gm:
|
|
116
|
+
# Pro utiliza razonamiento obligatorio que cuenta hacia maxOutputTokens; subimos el límite
|
|
117
|
+
gen_config["maxOutputTokens"] = max(max_tokens, 4000)
|
|
118
|
+
|
|
119
|
+
payload = {"contents": contents, "generationConfig": gen_config}
|
|
120
|
+
if system_parts:
|
|
121
|
+
payload["systemInstruction"] = {"parts": system_parts}
|
|
122
|
+
url = f"{self.BASE}/models/{gm}:generateContent?key={self.key}"
|
|
123
|
+
async with httpx.AsyncClient(timeout=20.0) as c:
|
|
124
|
+
r = await c.post(url, json=payload)
|
|
125
|
+
r.raise_for_status()
|
|
126
|
+
payload = _parse_http_json_response(r, self.name)
|
|
127
|
+
text = payload["candidates"][0]["content"]["parts"][0]["text"].strip()
|
|
128
|
+
return text, {"model": gm, "latency_ms": int((time.time()-start)*1000), "provider": self.name}
|
|
129
|
+
|
|
130
|
+
async def embed(self, text):
|
|
131
|
+
url = f"{self.BASE}/models/text-embedding-004:embedContent?key={self.key}"
|
|
132
|
+
async with httpx.AsyncClient(timeout=30.0) as c:
|
|
133
|
+
r = await c.post(url, json={"content": {"parts": [{"text": text}]}})
|
|
134
|
+
r.raise_for_status()
|
|
135
|
+
return r.json()["embedding"]["values"]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class OpenRouterProvider(LLMProvider):
|
|
139
|
+
"""OpenRouter — acceso a todos los modelos."""
|
|
140
|
+
name = "openrouter"
|
|
141
|
+
BASE = "https://openrouter.ai/api/v1"
|
|
142
|
+
MDLS = {"reasoning": "anthropic/claude-sonnet-4",
|
|
143
|
+
"fast": "google/gemini-2.5-flash",
|
|
144
|
+
"lite": "google/gemini-2.5-flash-lite"}
|
|
145
|
+
|
|
146
|
+
def __init__(self, key: str): self.key = key
|
|
147
|
+
|
|
148
|
+
async def complete(self, messages, model="fast", temperature=0.7, max_tokens=1000, **kw):
|
|
149
|
+
start = time.time()
|
|
150
|
+
m = self.MDLS.get(model, model if isinstance(model, str) else self.MDLS["fast"])
|
|
151
|
+
async with httpx.AsyncClient(timeout=25.0) as c:
|
|
152
|
+
r = await c.post(f"{self.BASE}/chat/completions",
|
|
153
|
+
headers={"Authorization": f"Bearer {self.key}", "Content-Type": "application/json",
|
|
154
|
+
"HTTP-Referer": "https://conny.ai", "X-Title": "Conny Ultra"},
|
|
155
|
+
json={"model": m, "messages": messages, "temperature": temperature, "max_tokens": max_tokens})
|
|
156
|
+
r.raise_for_status()
|
|
157
|
+
payload = _parse_http_json_response(r, self.name)
|
|
158
|
+
text = payload["choices"][0]["message"]["content"].strip()
|
|
159
|
+
return text, {"model": m, "latency_ms": int((time.time()-start)*1000), "provider": "openrouter"}
|
|
160
|
+
|
|
161
|
+
async def embed(self, text):
|
|
162
|
+
async with httpx.AsyncClient(timeout=30.0) as c:
|
|
163
|
+
r = await c.post(f"{self.BASE}/embeddings",
|
|
164
|
+
headers={"Authorization": f"Bearer {self.key}", "Content-Type": "application/json"},
|
|
165
|
+
json={"model": "openai/text-embedding-3-small", "input": text})
|
|
166
|
+
r.raise_for_status()
|
|
167
|
+
return r.json()["data"][0]["embedding"]
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class OpenAIProvider(LLMProvider):
|
|
171
|
+
"""OpenAI — ultimo recurso."""
|
|
172
|
+
name = "openai"
|
|
173
|
+
BASE = "https://api.openai.com/v1"
|
|
174
|
+
MDLS = {"reasoning": "gpt-4o", "fast": "gpt-4o-mini", "lite": "gpt-4o-mini"}
|
|
175
|
+
|
|
176
|
+
def __init__(self, key: str): self.key = key
|
|
177
|
+
|
|
178
|
+
async def complete(self, messages, model="fast", temperature=0.7, max_tokens=1000, **kw):
|
|
179
|
+
start = time.time()
|
|
180
|
+
if isinstance(model, str) and model in self.MDLS:
|
|
181
|
+
m = self.MDLS[model]
|
|
182
|
+
elif isinstance(model, str) and model.startswith("openai/"):
|
|
183
|
+
m = model.split("/", 1)[1]
|
|
184
|
+
elif isinstance(model, str) and model.startswith("gpt-"):
|
|
185
|
+
m = model
|
|
186
|
+
else:
|
|
187
|
+
m = self.MDLS["fast"]
|
|
188
|
+
async with httpx.AsyncClient(timeout=25.0) as c:
|
|
189
|
+
r = await c.post(f"{self.BASE}/chat/completions",
|
|
190
|
+
headers={"Authorization": f"Bearer {self.key}", "Content-Type": "application/json"},
|
|
191
|
+
json={"model": m, "messages": messages, "temperature": temperature, "max_tokens": max_tokens})
|
|
192
|
+
r.raise_for_status()
|
|
193
|
+
payload = _parse_http_json_response(r, self.name)
|
|
194
|
+
text = payload["choices"][0]["message"]["content"].strip()
|
|
195
|
+
return text, {"model": m, "latency_ms": int((time.time()-start)*1000), "provider": "openai"}
|
|
196
|
+
|
|
197
|
+
async def embed(self, text):
|
|
198
|
+
async with httpx.AsyncClient(timeout=30.0) as c:
|
|
199
|
+
r = await c.post(f"{self.BASE}/embeddings",
|
|
200
|
+
headers={"Authorization": f"Bearer {self.key}", "Content-Type": "application/json"},
|
|
201
|
+
json={"model": "text-embedding-3-small", "input": text})
|
|
202
|
+
r.raise_for_status()
|
|
203
|
+
return r.json()["data"][0]["embedding"]
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
class LLMEngine:
|
|
207
|
+
"""
|
|
208
|
+
Motor LLM con cascada de 6 proveedores.
|
|
209
|
+
Groq -> Gemini(key1) -> Gemini(key2) -> Gemini(key3) -> OpenRouter -> OpenAI
|
|
210
|
+
|
|
211
|
+
V8.1 — Fixes de fallos silenciosos:
|
|
212
|
+
- Blacklist temporal (60s) en vez de permanente
|
|
213
|
+
- Detección de respuesta vacía o inválida
|
|
214
|
+
- Timeout de provider < timeout de caller (nunca zombie)
|
|
215
|
+
- _push_to_engine solo en OpenRouter (único multi-modelo real)
|
|
216
|
+
- Métricas de fallo por provider en DB para diagnóstico
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
# Providers que soportan modelos externos (OpenRouter puede usar cualquier modelo)
|
|
220
|
+
_MULTI_MODEL_PROVIDERS = {"openrouter"}
|
|
221
|
+
|
|
222
|
+
def __init__(self):
|
|
223
|
+
self.providers: List[LLMProvider] = []
|
|
224
|
+
self._failures: Dict[str, int] = {} # conteo de fallos
|
|
225
|
+
self._blocked_until: Dict[str, float] = {} # timestamp hasta cuando está bloqueado
|
|
226
|
+
self._last_success: Dict[str, float] = {}
|
|
227
|
+
self._blacklist_ttl = 60.0 # segundos de bloqueo tras 3 fallos consecutivos
|
|
228
|
+
self._cache: Dict[str, Tuple[str, float]] = {}
|
|
229
|
+
self._cache_ttl = 300
|
|
230
|
+
|
|
231
|
+
if Config.GROQ_API_KEY:
|
|
232
|
+
self.providers.append(GroqProvider(Config.GROQ_API_KEY))
|
|
233
|
+
log.info("[llm] Groq OK")
|
|
234
|
+
_all_gemini_keys = Config.GEMINI_API_KEYS or [
|
|
235
|
+
Config.GEMINI_API_KEY, Config.GEMINI_API_KEY_2,
|
|
236
|
+
Config.GEMINI_API_KEY_3, Config.GEMINI_API_KEY_4,
|
|
237
|
+
Config.GEMINI_API_KEY_5, Config.GEMINI_API_KEY_6,
|
|
238
|
+
]
|
|
239
|
+
for i, key in enumerate(_all_gemini_keys):
|
|
240
|
+
if key:
|
|
241
|
+
self.providers.append(GeminiProvider(key, f"gemini_k{i+1}"))
|
|
242
|
+
log.info(f"[llm] Gemini key{i+1} OK")
|
|
243
|
+
if Config.OPENROUTER_API_KEY:
|
|
244
|
+
self.providers.append(OpenRouterProvider(Config.OPENROUTER_API_KEY))
|
|
245
|
+
log.info("[llm] OpenRouter OK")
|
|
246
|
+
if Config.OPENAI_API_KEY:
|
|
247
|
+
self.providers.append(OpenAIProvider(Config.OPENAI_API_KEY))
|
|
248
|
+
log.info("[llm] OpenAI OK")
|
|
249
|
+
|
|
250
|
+
n = len(self.providers)
|
|
251
|
+
if n == 0:
|
|
252
|
+
log.critical("[llm] SIN PROVEEDORES — el bot no podra generar respuestas inteligentes")
|
|
253
|
+
else:
|
|
254
|
+
log.info(f"[llm] cascada lista: {n} proveedores")
|
|
255
|
+
|
|
256
|
+
def _hash(self, messages, **kw):
|
|
257
|
+
return hashlib.md5((json.dumps(messages, sort_keys=True) + json.dumps(kw, sort_keys=True)).encode()).hexdigest()
|
|
258
|
+
|
|
259
|
+
def _get_requested_model(self, model_tier: str) -> str:
|
|
260
|
+
try:
|
|
261
|
+
if model_manager:
|
|
262
|
+
effective = model_manager.get_effective_models()
|
|
263
|
+
chosen = effective.get(model_tier)
|
|
264
|
+
if chosen:
|
|
265
|
+
return chosen
|
|
266
|
+
except Exception:
|
|
267
|
+
pass
|
|
268
|
+
return Config.LLM_MODELS.get(model_tier, model_tier)
|
|
269
|
+
|
|
270
|
+
def _ordered_providers(self, requested_model: str) -> List[LLMProvider]:
|
|
271
|
+
providers = list(self.providers)
|
|
272
|
+
|
|
273
|
+
def _priority(provider: LLMProvider) -> int:
|
|
274
|
+
name = provider.name
|
|
275
|
+
if requested_model.startswith("google/") or requested_model.startswith("gemini-"):
|
|
276
|
+
if name.startswith("gemini"):
|
|
277
|
+
return 0
|
|
278
|
+
if name == "openrouter":
|
|
279
|
+
return 1
|
|
280
|
+
return 2
|
|
281
|
+
if requested_model.startswith("anthropic/") or requested_model.startswith("meta-llama/") or requested_model.startswith("mistralai/"):
|
|
282
|
+
if name == "openrouter":
|
|
283
|
+
return 0
|
|
284
|
+
return 2
|
|
285
|
+
if requested_model.startswith("openai/") or requested_model.startswith("gpt-"):
|
|
286
|
+
if name == "openai":
|
|
287
|
+
return 0
|
|
288
|
+
if name == "openrouter":
|
|
289
|
+
return 1
|
|
290
|
+
return 2
|
|
291
|
+
if requested_model.startswith("groq/") or requested_model.startswith("llama-"):
|
|
292
|
+
if name == "groq":
|
|
293
|
+
return 0
|
|
294
|
+
if name == "openrouter":
|
|
295
|
+
return 1
|
|
296
|
+
return 2
|
|
297
|
+
return 0
|
|
298
|
+
|
|
299
|
+
return sorted(
|
|
300
|
+
providers,
|
|
301
|
+
key=lambda provider: (
|
|
302
|
+
_priority(provider),
|
|
303
|
+
self._failures.get(provider.name, 0),
|
|
304
|
+
-self._last_success.get(provider.name, 0.0),
|
|
305
|
+
provider.name,
|
|
306
|
+
),
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
def _resolve_provider_model(self, provider: LLMProvider,
|
|
310
|
+
requested_model: str,
|
|
311
|
+
model_tier: str) -> str:
|
|
312
|
+
name = provider.name
|
|
313
|
+
if name.startswith("gemini") and (
|
|
314
|
+
requested_model.startswith("google/") or requested_model.startswith("gemini-")
|
|
315
|
+
):
|
|
316
|
+
return requested_model
|
|
317
|
+
if name == "openai" and (
|
|
318
|
+
requested_model.startswith("openai/") or requested_model.startswith("gpt-")
|
|
319
|
+
):
|
|
320
|
+
return requested_model
|
|
321
|
+
if name == "groq" and (
|
|
322
|
+
requested_model.startswith("groq/") or requested_model.startswith("llama-")
|
|
323
|
+
):
|
|
324
|
+
return requested_model
|
|
325
|
+
if name == "openrouter":
|
|
326
|
+
return requested_model
|
|
327
|
+
return model_tier
|
|
328
|
+
|
|
329
|
+
def _is_blocked(self, provider_name: str) -> bool:
|
|
330
|
+
"""Blacklist temporal: bloqueado solo por _blacklist_ttl segundos."""
|
|
331
|
+
until = self._blocked_until.get(provider_name, 0)
|
|
332
|
+
if until and time.time() < until:
|
|
333
|
+
return True
|
|
334
|
+
# Tiempo expirado — resetear fallos para darle otra oportunidad
|
|
335
|
+
if until and time.time() >= until:
|
|
336
|
+
self._failures[provider_name] = 0
|
|
337
|
+
self._blocked_until[provider_name] = 0
|
|
338
|
+
log.info(f"[llm] {provider_name} desbloqueado (blacklist expirado)")
|
|
339
|
+
return False
|
|
340
|
+
|
|
341
|
+
def _register_failure(self, provider_name: str, error: Exception):
|
|
342
|
+
"""Registra un fallo y bloquea si acumula 3 consecutivos."""
|
|
343
|
+
self._failures[provider_name] = self._failures.get(provider_name, 0) + 1
|
|
344
|
+
count = self._failures[provider_name]
|
|
345
|
+
log.warning(f"[llm] {provider_name} fallo #{count}: {str(error)[:100]}")
|
|
346
|
+
status_code = getattr(getattr(error, "response", None), "status_code", None)
|
|
347
|
+
block_after = 3
|
|
348
|
+
block_ttl = self._blacklist_ttl
|
|
349
|
+
if status_code in (401, 402, 403):
|
|
350
|
+
block_after = 1
|
|
351
|
+
block_ttl = max(block_ttl, 1800.0)
|
|
352
|
+
elif status_code in (429, 500, 502, 503, 504):
|
|
353
|
+
block_after = 2
|
|
354
|
+
block_ttl = max(block_ttl, 180.0)
|
|
355
|
+
if count >= block_after:
|
|
356
|
+
self._blocked_until[provider_name] = time.time() + block_ttl
|
|
357
|
+
log.error(f"[llm] {provider_name} BLOQUEADO por {block_ttl}s tras {count} fallos")
|
|
358
|
+
# Guardar métrica en DB para que el admin pueda ver con /v8
|
|
359
|
+
try:
|
|
360
|
+
if db:
|
|
361
|
+
db.record_metric("llm_failure", provider_name, count,
|
|
362
|
+
{"error": str(error)[:80], "blocked": count >= block_after, "status_code": status_code})
|
|
363
|
+
except Exception:
|
|
364
|
+
pass
|
|
365
|
+
|
|
366
|
+
def _is_valid_response(self, text: str) -> bool:
|
|
367
|
+
"""Detecta respuestas vacías o inválidas que no deben llegar al usuario."""
|
|
368
|
+
if not text or not text.strip():
|
|
369
|
+
return False
|
|
370
|
+
stripped = text.strip()
|
|
371
|
+
# Respuesta puramente de error del API
|
|
372
|
+
if stripped.startswith("Error") and len(stripped) < 30:
|
|
373
|
+
return False
|
|
374
|
+
# JSON de error de OpenRouter / Gemini que se filtró
|
|
375
|
+
if stripped.startswith('{"error"') or stripped.startswith('{"status"'):
|
|
376
|
+
return False
|
|
377
|
+
return True
|
|
378
|
+
|
|
379
|
+
async def complete(self, messages: List[Dict],
|
|
380
|
+
model_tier: str = "fast",
|
|
381
|
+
temperature: float = 0.7,
|
|
382
|
+
max_tokens: int = 1000,
|
|
383
|
+
use_cache: bool = True,
|
|
384
|
+
**kwargs) -> Tuple[str, Dict]:
|
|
385
|
+
requested_model = self._get_requested_model(model_tier)
|
|
386
|
+
if use_cache and db:
|
|
387
|
+
ck = self._hash(messages, t=temperature, m=max_tokens,
|
|
388
|
+
tier=model_tier, requested_model=requested_model)
|
|
389
|
+
cached = db.get_cached_response(ck)
|
|
390
|
+
if cached and self._is_valid_response(cached):
|
|
391
|
+
return cached, {"cached": True}
|
|
392
|
+
|
|
393
|
+
last_error = None
|
|
394
|
+
attempted = []
|
|
395
|
+
for provider in self._ordered_providers(requested_model):
|
|
396
|
+
if self._is_blocked(provider.name):
|
|
397
|
+
log.debug(f"[llm] {provider.name} saltado (blacklist activo)")
|
|
398
|
+
continue
|
|
399
|
+
attempted.append(provider.name)
|
|
400
|
+
try:
|
|
401
|
+
provider_model = self._resolve_provider_model(provider, requested_model, model_tier)
|
|
402
|
+
# Timeout del provider siempre menor que el del caller
|
|
403
|
+
# para evitar zombies. El caller (admin_brain) usa 12s,
|
|
404
|
+
# los providers internos usan hasta 25s — reducimos aquí.
|
|
405
|
+
response, metadata = await asyncio.wait_for(
|
|
406
|
+
provider.complete(
|
|
407
|
+
messages, model=provider_model,
|
|
408
|
+
temperature=temperature, max_tokens=max_tokens, **kwargs),
|
|
409
|
+
timeout=10.0 # siempre < 12s del caller
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# Verificar que la respuesta sea válida — no vacía ni error
|
|
413
|
+
if not self._is_valid_response(response):
|
|
414
|
+
err = ValueError(f"respuesta inválida/vacía: '{response[:40]}'")
|
|
415
|
+
self._register_failure(provider.name, err)
|
|
416
|
+
last_error = err
|
|
417
|
+
log.warning(f"[llm] {provider.name} devolvió respuesta inválida — siguiente")
|
|
418
|
+
continue
|
|
419
|
+
|
|
420
|
+
# Éxito — resetear fallos
|
|
421
|
+
self._failures[provider.name] = 0
|
|
422
|
+
self._last_success[provider.name] = time.time()
|
|
423
|
+
if use_cache and db:
|
|
424
|
+
db.cache_response(ck, response)
|
|
425
|
+
if db:
|
|
426
|
+
db.record_metric("llm", "completion",
|
|
427
|
+
metadata.get("latency_ms", 0),
|
|
428
|
+
{"provider": metadata.get("provider"), "tier": model_tier,
|
|
429
|
+
"requested_model": requested_model})
|
|
430
|
+
log.info(
|
|
431
|
+
f"[llm] {provider.name} OK ({metadata.get('latency_ms',0)}ms) | "
|
|
432
|
+
f"tier={model_tier} requested={requested_model}"
|
|
433
|
+
)
|
|
434
|
+
return response, metadata
|
|
435
|
+
|
|
436
|
+
except asyncio.TimeoutError as e:
|
|
437
|
+
te = TimeoutError(f"timeout 10s")
|
|
438
|
+
self._register_failure(provider.name, te)
|
|
439
|
+
last_error = te
|
|
440
|
+
except Exception as e:
|
|
441
|
+
self._register_failure(provider.name, e)
|
|
442
|
+
last_error = e
|
|
443
|
+
|
|
444
|
+
providers_tried = ", ".join(attempted) if attempted else "ninguno"
|
|
445
|
+
raise RuntimeError(f"Todos los LLM fallaron [{providers_tried}]: {last_error}")
|
|
446
|
+
|
|
447
|
+
def get_health(self) -> Dict:
|
|
448
|
+
"""Estado de salud de cada provider. Usado por /v8 y diagnóstico."""
|
|
449
|
+
now = time.time()
|
|
450
|
+
result = {}
|
|
451
|
+
for p in self.providers:
|
|
452
|
+
blocked_until = self._blocked_until.get(p.name, 0)
|
|
453
|
+
result[p.name] = {
|
|
454
|
+
"failures": self._failures.get(p.name, 0),
|
|
455
|
+
"blocked": now < blocked_until,
|
|
456
|
+
"unblocks_in": max(0, int(blocked_until - now)) if now < blocked_until else 0,
|
|
457
|
+
}
|
|
458
|
+
return result
|
|
459
|
+
|
|
460
|
+
async def embed(self, text: str) -> List[float]:
|
|
461
|
+
for p in self.providers:
|
|
462
|
+
try:
|
|
463
|
+
return await p.embed(text)
|
|
464
|
+
except Exception:
|
|
465
|
+
continue
|
|
466
|
+
return self._simple_embedding(text)
|
|
467
|
+
|
|
468
|
+
def _simple_embedding(self, text: str, dim: int = 384) -> List[float]:
|
|
469
|
+
words = text.lower().split()
|
|
470
|
+
vec = [0.0] * dim
|
|
471
|
+
for i, w in enumerate(words[:dim]):
|
|
472
|
+
vec[i % dim] += hash(w) % 100 / 100.0
|
|
473
|
+
norm = math.sqrt(sum(x*x for x in vec))
|
|
474
|
+
return [x/norm for x in vec] if norm > 0 else vec
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
# Instancia global
|
|
478
|
+
llm_engine: LLMEngine = None
|
|
479
|
+
|
|
480
|
+
def init_llm():
|
|
481
|
+
global llm_engine
|
|
482
|
+
llm_engine = LLMEngine()
|
|
483
|
+
|
|
484
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
485
|
+
# ANALIZADOR DE MENSAJES AVANZADO
|
|
486
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
487
|
+
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import sys
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Dict, Any
|
|
5
|
+
from src.interfaces.nova_bridge import nova_bridge
|
|
6
|
+
|
|
7
|
+
log = logging.getLogger("conny.mcp")
|
|
8
|
+
|
|
9
|
+
class MCPServer:
|
|
10
|
+
"""
|
|
11
|
+
Servidor Model Context Protocol (MCP) sobre stdio.
|
|
12
|
+
Expone las herramientas del Enjambre (Swarm) y AgentDB a la máquina anfitriona.
|
|
13
|
+
"""
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self.tools = {
|
|
16
|
+
"agent_spawn": self.tool_agent_spawn,
|
|
17
|
+
"memory_store": self.tool_memory_store,
|
|
18
|
+
"task_orchestrate": self.tool_task_orchestrate
|
|
19
|
+
}
|
|
20
|
+
# Registrar reglas de seguridad con Nova Governance
|
|
21
|
+
nova_bridge.connect(cannot_do_rules=["rm -rf /", "leak patient data"])
|
|
22
|
+
|
|
23
|
+
def handle_request(self, req: Dict[str, Any]) -> Dict[str, Any]:
|
|
24
|
+
method = req.get("method")
|
|
25
|
+
params = req.get("params", {})
|
|
26
|
+
|
|
27
|
+
if method == "tools/list":
|
|
28
|
+
return {"tools": list(self.tools.keys())}
|
|
29
|
+
|
|
30
|
+
if method == "tools/call":
|
|
31
|
+
tool_name = params.get("name")
|
|
32
|
+
tool_args = params.get("arguments", {})
|
|
33
|
+
|
|
34
|
+
if tool_name in self.tools:
|
|
35
|
+
# Validar la acción con Nova Governance antes de ejecutarla
|
|
36
|
+
if not nova_bridge.validate_action(tool_name, tool_args):
|
|
37
|
+
return {"error": f"Acción bloqueada por Nova Governance: {tool_name}"}
|
|
38
|
+
|
|
39
|
+
result = self.tools[tool_name](tool_args)
|
|
40
|
+
return {"result": result}
|
|
41
|
+
else:
|
|
42
|
+
return {"error": "Tool not found"}
|
|
43
|
+
|
|
44
|
+
return {"error": "Unknown method"}
|
|
45
|
+
|
|
46
|
+
def tool_agent_spawn(self, args: Dict) -> str:
|
|
47
|
+
role = args.get("role", "coder")
|
|
48
|
+
return f"Agent {role} spawned successfully."
|
|
49
|
+
|
|
50
|
+
def tool_memory_store(self, args: Dict) -> str:
|
|
51
|
+
# Aquí se conectaría a src.conny.brain.memory.AgentDB
|
|
52
|
+
key = args.get("key")
|
|
53
|
+
return f"Memory {key} stored in AgentDB."
|
|
54
|
+
|
|
55
|
+
def tool_task_orchestrate(self, args: Dict) -> str:
|
|
56
|
+
task = args.get("task")
|
|
57
|
+
return f"Task '{task}' routed to Swarm Queen."
|
|
58
|
+
|
|
59
|
+
def start_stdio(self):
|
|
60
|
+
"""Escucha JSON-RPC en stdin."""
|
|
61
|
+
for line in sys.stdin:
|
|
62
|
+
if not line.strip():
|
|
63
|
+
continue
|
|
64
|
+
try:
|
|
65
|
+
req = json.loads(line)
|
|
66
|
+
res = self.handle_request(req)
|
|
67
|
+
print(json.dumps(res), flush=True)
|
|
68
|
+
except Exception as e:
|
|
69
|
+
print(json.dumps({"error": str(e)}), flush=True)
|
|
70
|
+
|
|
71
|
+
if __name__ == "__main__":
|
|
72
|
+
server = MCPServer()
|
|
73
|
+
server.start_stdio()
|