@innvisor/conny-ai 9.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +68 -0
- package/CHANGELOG.md +54 -0
- package/LICENSE +21 -0
- package/README.md +369 -0
- package/brand-assets/A_dark_luxury_web_background_202605210700.jpeg +0 -0
- package/brand-assets/Conny.web.logo.png +0 -0
- package/brand-assets/Logo_Conny_Petalo_Claro.png +0 -0
- package/brand-assets/cl-nica-de-las-am-ricas/manifest.json +22 -0
- package/brand-assets/cl-nica-de-las-am-ricas/processed/business-identity.txt +11 -0
- package/brand-assets/cl-nica-de-las-am-ricas/raw/business-identity.txt +11 -0
- package/brand-assets/cl-nica-las-am-ricas/manifest.json +22 -0
- package/brand-assets/cl-nica-las-am-ricas/processed/business-identity.txt +11 -0
- package/brand-assets/cl-nica-las-am-ricas/raw/business-identity.txt +11 -0
- package/brand-assets/conny-demo/manifest.json +22 -0
- package/brand-assets/conny-demo/processed/business-identity.txt +7 -0
- package/brand-assets/conny-demo/raw/business-identity.txt +7 -0
- package/brand-assets/conny-logo.png +0 -0
- package/brand-assets/web.background.png +0 -0
- package/brand_assets.py +323 -0
- package/conny +28 -0
- package/conny-chat.py +579 -0
- package/conny-omni.py +3843 -0
- package/conny.py +113 -0
- package/conny_agents/__init__.py +1 -0
- package/conny_agents/agenda.py +1 -0
- package/conny_agents/captacion.py +1 -0
- package/conny_agents/conocimiento.py +1 -0
- package/conny_agents/escalacion.py +1 -0
- package/conny_agents/objeciones.py +1 -0
- package/conny_agents/seguimiento.py +1 -0
- package/conny_app.py +287 -0
- package/conny_audio.py +350 -0
- package/conny_audio_learn.py +84 -0
- package/conny_brain_v10.py +804 -0
- package/conny_bridge.py +656 -0
- package/conny_calendar.py +169 -0
- package/conny_cli.py +11784 -0
- package/conny_cli_bb.py +437 -0
- package/conny_commands.py +243 -0
- package/conny_config.py +215 -0
- package/conny_core/__init__.py +3 -0
- package/conny_core/conversation_engine.py +446 -0
- package/conny_core/first_turn_ops.py +287 -0
- package/conny_core/persona_registry.py +157 -0
- package/conny_core/prompt_ops.py +561 -0
- package/conny_cron.py +72 -0
- package/conny_demo_v2.py +209 -0
- package/conny_demo_voice.py +134 -0
- package/conny_design.py +43 -0
- package/conny_doctor.py +319 -0
- package/conny_domino.py +696 -0
- package/conny_generator.py +447 -0
- package/conny_google_auth.py +159 -0
- package/conny_i18n.py +619 -0
- package/conny_init.py +509 -0
- package/conny_integrations/__init__.py +4 -0
- package/conny_integrations/llm.py +1 -0
- package/conny_integrations/vault.py +77 -0
- package/conny_integrations/whatsapp.py +1 -0
- package/conny_intelligence.py +65 -0
- package/conny_learning.py +154 -0
- package/conny_memory.py +243 -0
- package/conny_memory_engine.py +292 -0
- package/conny_nova_proxy.py +170 -0
- package/conny_nuke_robot_phrases.py +493 -0
- package/conny_pairing.py +253 -0
- package/conny_patch.py +291 -0
- package/conny_persona_cli.py +150 -0
- package/conny_router.py +308 -0
- package/conny_runtime_ops.py +271 -0
- package/conny_session.py +516 -0
- package/conny_skills/__init__.py +1 -0
- package/conny_skills/demo_mode.py +35 -0
- package/conny_skills/text_processing.py +1 -0
- package/conny_skills/tone_detection.py +1 -0
- package/conny_smart_features.py +333 -0
- package/conny_studio.py +161 -0
- package/conny_sync_fix.py +306 -0
- package/conny_tui.py +512 -0
- package/conny_tui_select.py +202 -0
- package/conny_ultra_config.py +411 -0
- package/conny_uncertainty.py +174 -0
- package/conny_utils.py +87 -0
- package/conny_voice.py +156 -0
- package/conny_voice_engine.py +124 -0
- package/conny_web_search.py +66 -0
- package/conny_weekly_report.py +85 -0
- package/conny_worm.py +88 -0
- package/core/__init__.py +25 -0
- package/ecosystem.config.js +24 -0
- package/fix_init.py +27 -0
- package/install.sh +78 -0
- package/knowledge_base.py +330 -0
- package/nova/rules/default.yaml +37 -0
- package/nova_bridge.py +509 -0
- package/npm/conny.js +471 -0
- package/package.json +102 -0
- package/personas/conny/base/default.yaml +35 -0
- package/personas/conny/base/estetica_whatsapp.yaml +36 -0
- package/requirements.txt +14 -0
- package/run.sh +47 -0
- package/search.py +465 -0
- package/smart_handoff.py +1150 -0
- package/src/__init__.py +0 -0
- package/src/conny/__init__.py +0 -0
- package/src/conny/admin/__init__.py +0 -0
- package/src/conny/admin/api.py +234 -0
- package/src/conny/admin/dashboard.py +772 -0
- package/src/conny/api/__init__.py +0 -0
- package/src/conny/api/routes.py +8851 -0
- package/src/conny/brain/__init__.py +15 -0
- package/src/conny/brain/engine.py +804 -0
- package/src/conny/brain/learning.py +154 -0
- package/src/conny/brain/memory.py +324 -0
- package/src/conny/brain/smart_features.py +333 -0
- package/src/conny/brain/uncertainty.py +167 -0
- package/src/conny/channels/__init__.py +0 -0
- package/src/conny/channels/audio.py +316 -0
- package/src/conny/channels/cli.py +11795 -0
- package/src/conny/channels/logo_art.py +11 -0
- package/src/conny/channels/voice.py +156 -0
- package/src/conny/core/__init__.py +0 -0
- package/src/conny/core/config.py +215 -0
- package/src/conny/core/cron.py +72 -0
- package/src/conny/core/messenger.py +563 -0
- package/src/conny/core/router.py +297 -0
- package/src/conny/core/session.py +312 -0
- package/src/conny/demo/__init__.py +0 -0
- package/src/conny/demo/handler.py +3110 -0
- package/src/conny/integrations/__init__.py +19 -0
- package/src/conny/integrations/calendar.py +169 -0
- package/src/conny/integrations/knowledge.py +312 -0
- package/src/conny/integrations/search.py +66 -0
- package/src/conny/personas/__init__.py +0 -0
- package/src/conny/personas/generator.py +447 -0
- package/src/conny/production/__init__.py +0 -0
- package/src/conny/production/domino.py +696 -0
- package/src/conny/production/guard.py +550 -0
- package/src/conny/production/handoff.py +1150 -0
- package/src/conny/production/monitor.py +353 -0
- package/src/conny/utils/__init__.py +2 -0
- package/src/conny/utils/helpers.py +75 -0
- package/src/conny/utils/i18n.py +619 -0
- package/src/core/admin_engines.py +772 -0
- package/src/core/globals.py +11845 -0
- package/src/core/orchestrator.py +273 -0
- package/src/core/production_monitor.py +353 -0
- package/src/core/runtime.py +5487 -0
- package/src/domain/onboarding_flow.py +230 -0
- package/src/domain/prompts/__init__.py +1 -0
- package/src/domain/prompts/prospect_pitch.py +282 -0
- package/src/domain/send_guard.py +636 -0
- package/src/domain/swarm/queen.py +96 -0
- package/src/infrastructure/llm_providers/engine.py +487 -0
- package/src/interfaces/mcp_server.py +73 -0
- package/src/interfaces/nova_bridge.py +58 -0
- package/src/interfaces/web/admin_api.py +1379 -0
- package/src/interfaces/web/app.py +9408 -0
- package/src/interfaces/web/demo_handler.py +3450 -0
- package/src/interfaces/web/static/generate_avatars.py +46 -0
- package/v7/__init__.py +46 -0
- package/v7/agents/__init__.py +46 -0
- package/v7/agents/agenda.py +77 -0
- package/v7/agents/base.py +216 -0
- package/v7/agents/captacion.py +60 -0
- package/v7/agents/conocimiento.py +69 -0
- package/v7/agents/escalacion.py +83 -0
- package/v7/agents/objeciones.py +109 -0
- package/v7/agents/seguimiento.py +71 -0
- package/v7/memory/__init__.py +46 -0
- package/v7/memory/patient_profile.py +200 -0
- package/v7/orchestrator.py +275 -0
- package/v7/postprocess.py +127 -0
- package/v7/router.py +239 -0
- package/verify_conversation_impl.py +48 -0
package/conny_audio.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Módulo de manejo de audio/transcripción para Conny Ultra.
|
|
3
|
+
|
|
4
|
+
Contiene la lógica de:
|
|
5
|
+
- Transcripción de audio con Gemini 2.0 Flash (primario)
|
|
6
|
+
- Fallback a Groq Whisper
|
|
7
|
+
- Fallback a OpenRouter Whisper
|
|
8
|
+
- Manejo de diferentes plataformas (Telegram, WhatsApp, WhatsApp Cloud)
|
|
9
|
+
|
|
10
|
+
Este módulo fue extraído de conny.py para reducir su tamaño y mejorar mantenibilidad.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import base64
|
|
15
|
+
import os
|
|
16
|
+
import tempfile
|
|
17
|
+
from typing import Optional, Tuple
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
from conny_config import Config
|
|
23
|
+
except ImportError:
|
|
24
|
+
class Config:
|
|
25
|
+
TELEGRAM_TOKEN = ""
|
|
26
|
+
WA_ACCESS_TOKEN = ""
|
|
27
|
+
GEMINI_API_KEY = ""
|
|
28
|
+
GEMINI_API_KEY_2 = ""
|
|
29
|
+
GEMINI_API_KEY_3 = ""
|
|
30
|
+
GEMINI_API_KEY_4 = ""
|
|
31
|
+
GEMINI_API_KEY_5 = ""
|
|
32
|
+
GEMINI_API_KEY_6 = ""
|
|
33
|
+
GEMINI_API_KEYS = []
|
|
34
|
+
GROQ_API_KEY = ""
|
|
35
|
+
OPENROUTER_API_KEY = ""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class AudioHandler:
|
|
39
|
+
"""
|
|
40
|
+
Manejador de audio y transcripción para Conny Ultra.
|
|
41
|
+
|
|
42
|
+
Proporciona transcripción de audio con múltiples proveedores:
|
|
43
|
+
- Gemini 2.0 Flash (primario, mayor comprensión de contexto)
|
|
44
|
+
- Groq Whisper (fallback 1)
|
|
45
|
+
- OpenRouter Whisper (fallback 2)
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self):
|
|
49
|
+
self._audio_cache: dict = {}
|
|
50
|
+
|
|
51
|
+
def _audio_suffix(self, mime: str) -> str:
|
|
52
|
+
"""Mapea mime type a extensión de archivo."""
|
|
53
|
+
mapping = {
|
|
54
|
+
"audio/ogg": ".ogg",
|
|
55
|
+
"audio/oga": ".ogg",
|
|
56
|
+
"audio/opus": ".ogg",
|
|
57
|
+
"audio/mp3": ".mp3",
|
|
58
|
+
"audio/mpeg": ".mp3",
|
|
59
|
+
"audio/wav": ".wav",
|
|
60
|
+
"audio/x-wav": ".wav",
|
|
61
|
+
"audio/mp4": ".m4a",
|
|
62
|
+
"audio/x-m4a": ".m4a",
|
|
63
|
+
"audio/webm": ".webm",
|
|
64
|
+
}
|
|
65
|
+
return mapping.get((mime or "").lower(), ".ogg")
|
|
66
|
+
|
|
67
|
+
async def transcribe_audio(
|
|
68
|
+
self,
|
|
69
|
+
file_id: str,
|
|
70
|
+
platform: str = "telegram",
|
|
71
|
+
wa_media_id: str = None
|
|
72
|
+
) -> str:
|
|
73
|
+
"""
|
|
74
|
+
Transcribe audio con Gemini 2.0 Flash (nativo) → fallback Whisper.
|
|
75
|
+
Gemini no solo transcribe — entiende contexto y tono coloquial.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
file_id: ID del archivo de audio
|
|
79
|
+
platform: Plataforma de origen (telegram, whatsapp, whatsapp_cloud)
|
|
80
|
+
wa_media_id: ID de media de WhatsApp Cloud (si aplica)
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Texto transcrito o mensaje de error
|
|
84
|
+
"""
|
|
85
|
+
audio_bytes, mime_type = None, "audio/ogg"
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
89
|
+
if platform == "telegram":
|
|
90
|
+
r = await client.get(
|
|
91
|
+
f"https://api.telegram.org/bot{Config.TELEGRAM_TOKEN}/getFile",
|
|
92
|
+
params={"file_id": file_id}
|
|
93
|
+
)
|
|
94
|
+
fp = r.json()["result"]["file_path"]
|
|
95
|
+
ext = fp.rsplit(".", 1)[-1].lower() if "." in fp else "ogg"
|
|
96
|
+
mime_type = {
|
|
97
|
+
"ogg": "audio/ogg", "mp3": "audio/mp3", "wav": "audio/wav",
|
|
98
|
+
"m4a": "audio/mp4", "oga": "audio/ogg", "opus": "audio/ogg"
|
|
99
|
+
}.get(ext, "audio/ogg")
|
|
100
|
+
ar = await client.get(
|
|
101
|
+
f"https://api.telegram.org/file/bot{Config.TELEGRAM_TOKEN}/{fp}"
|
|
102
|
+
)
|
|
103
|
+
audio_bytes = ar.content
|
|
104
|
+
|
|
105
|
+
elif platform == "whatsapp_cloud" and wa_media_id:
|
|
106
|
+
mr = await client.get(
|
|
107
|
+
f"https://graph.facebook.com/v20.0/{wa_media_id}",
|
|
108
|
+
headers={"Authorization": f"Bearer {Config.WA_ACCESS_TOKEN}"}
|
|
109
|
+
)
|
|
110
|
+
url = mr.json().get("url", "")
|
|
111
|
+
if url:
|
|
112
|
+
dl = await client.get(
|
|
113
|
+
url,
|
|
114
|
+
headers={"Authorization": f"Bearer {Config.WA_ACCESS_TOKEN}"}
|
|
115
|
+
)
|
|
116
|
+
audio_bytes, mime_type = (
|
|
117
|
+
dl.content,
|
|
118
|
+
mr.json().get("mime_type", "audio/ogg")
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# WhatsApp Bridge (Baileys) — audio base64 inline
|
|
122
|
+
if platform == "whatsapp" and file_id.startswith("wa_b64:"):
|
|
123
|
+
try:
|
|
124
|
+
_, mime_part, b64_data = file_id.split(":", 2)
|
|
125
|
+
mime_type = mime_part or "audio/ogg"
|
|
126
|
+
audio_bytes = base64.b64decode(b64_data)
|
|
127
|
+
except Exception:
|
|
128
|
+
return "[no pude escuchar, puedes escribirlo?]"
|
|
129
|
+
|
|
130
|
+
if not audio_bytes:
|
|
131
|
+
return "[no pude escuchar, puedes escribirlo?]"
|
|
132
|
+
|
|
133
|
+
# Intentar transcripción con Gemini 2.0 Flash
|
|
134
|
+
result = await self._transcribe_gemini(audio_bytes, mime_type)
|
|
135
|
+
if result:
|
|
136
|
+
return result
|
|
137
|
+
|
|
138
|
+
# Fallback 1: Groq Whisper
|
|
139
|
+
result = await self._transcribe_groq_whisper(audio_bytes, mime_type)
|
|
140
|
+
if result:
|
|
141
|
+
return result
|
|
142
|
+
|
|
143
|
+
# Fallback 2: OpenRouter Whisper
|
|
144
|
+
result = await self._transcribe_openrouter_whisper(audio_bytes, mime_type)
|
|
145
|
+
if result:
|
|
146
|
+
return result
|
|
147
|
+
|
|
148
|
+
return "[no se pudo transcribir el audio]"
|
|
149
|
+
|
|
150
|
+
except Exception as e:
|
|
151
|
+
import logging
|
|
152
|
+
log = logging.getLogger("conny_audio")
|
|
153
|
+
log.error(f"[audio] Error: {e}", exc_info=True)
|
|
154
|
+
return "[no pude escuchar, puedes escribirlo?]"
|
|
155
|
+
|
|
156
|
+
async def _transcribe_gemini(self, audio_bytes: bytes, mime_type: str) -> Optional[str]:
|
|
157
|
+
"""Transcribe usando Gemini 2.0 Flash."""
|
|
158
|
+
import logging
|
|
159
|
+
log = logging.getLogger("conny_audio")
|
|
160
|
+
|
|
161
|
+
effective_mime = "audio/ogg" if mime_type in ("audio/oga", "audio/opus") else mime_type
|
|
162
|
+
|
|
163
|
+
gemini_keys = Config.GEMINI_API_KEYS or [
|
|
164
|
+
k for k in [
|
|
165
|
+
Config.GEMINI_API_KEY,
|
|
166
|
+
Config.GEMINI_API_KEY_2,
|
|
167
|
+
Config.GEMINI_API_KEY_3,
|
|
168
|
+
Config.GEMINI_API_KEY_4,
|
|
169
|
+
Config.GEMINI_API_KEY_5,
|
|
170
|
+
Config.GEMINI_API_KEY_6,
|
|
171
|
+
] if k
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
for gkey in gemini_keys:
|
|
175
|
+
try:
|
|
176
|
+
b64 = base64.b64encode(audio_bytes).decode()
|
|
177
|
+
payload = {
|
|
178
|
+
"contents": [{
|
|
179
|
+
"parts": [
|
|
180
|
+
{"inline_data": {"mime_type": effective_mime, "data": b64}},
|
|
181
|
+
{
|
|
182
|
+
"text": "Transcribe este mensaje de voz en español exactamente "
|
|
183
|
+
"como se dice. Devuelve SOLO el texto transcrito, sin "
|
|
184
|
+
"comillas ni comentarios. Mantén el tono coloquial tal como se habla."
|
|
185
|
+
}
|
|
186
|
+
]
|
|
187
|
+
}],
|
|
188
|
+
"generationConfig": {"temperature": 0.0, "maxOutputTokens": 500}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
async with httpx.AsyncClient(timeout=25.0) as client:
|
|
192
|
+
resp = await client.post(
|
|
193
|
+
f"https://generativelanguage.googleapis.com/v1beta/models/"
|
|
194
|
+
f"gemini-2.5-flash:generateContent?key={gkey}",
|
|
195
|
+
json=payload
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
if resp.status_code == 200:
|
|
199
|
+
parts = resp.json().get("candidates", [{}])[0].get("content", {}).get("parts", [{}])
|
|
200
|
+
t = parts[0].get("text", "").strip() if parts else ""
|
|
201
|
+
if t and len(t) > 2:
|
|
202
|
+
log.info(f"[audio] Gemini OK: {t[:80]}")
|
|
203
|
+
return t
|
|
204
|
+
elif resp.status_code in (408, 429, 500, 502, 503, 504):
|
|
205
|
+
continue # rotar key
|
|
206
|
+
else:
|
|
207
|
+
log.warning(f"[audio] Gemini {resp.status_code}: {resp.text[:120]}")
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
except Exception as eg:
|
|
211
|
+
log.warning(f"[audio] Gemini error: {eg}")
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
async def _transcribe_groq_whisper(
|
|
217
|
+
self,
|
|
218
|
+
audio_bytes: bytes,
|
|
219
|
+
mime_type: str
|
|
220
|
+
) -> Optional[str]:
|
|
221
|
+
"""Transcribe usando Groq Whisper (fallback 1)."""
|
|
222
|
+
import logging
|
|
223
|
+
log = logging.getLogger("conny_audio")
|
|
224
|
+
|
|
225
|
+
if not Config.GROQ_API_KEY:
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
tmp_path = None
|
|
229
|
+
try:
|
|
230
|
+
suffix = self._audio_suffix(mime_type)
|
|
231
|
+
filename = f"audio{suffix}"
|
|
232
|
+
|
|
233
|
+
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
|
234
|
+
tmp.write(audio_bytes)
|
|
235
|
+
tmp_path = tmp.name
|
|
236
|
+
|
|
237
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
238
|
+
with open(tmp_path, "rb") as f:
|
|
239
|
+
resp = await client.post(
|
|
240
|
+
"https://api.groq.com/openai/v1/audio/transcriptions",
|
|
241
|
+
headers={"Authorization": f"Bearer {Config.GROQ_API_KEY}"},
|
|
242
|
+
files={"file": (filename, f, mime_type)},
|
|
243
|
+
data={
|
|
244
|
+
"model": "whisper-large-v3-turbo",
|
|
245
|
+
"language": "es",
|
|
246
|
+
"response_format": "json",
|
|
247
|
+
"temperature": "0",
|
|
248
|
+
"prompt": "Transcribe este audio en español tal como se dice, sin comentarios adicionales.",
|
|
249
|
+
},
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
if tmp_path and os.path.exists(tmp_path):
|
|
253
|
+
os.unlink(tmp_path)
|
|
254
|
+
|
|
255
|
+
if resp.status_code == 200:
|
|
256
|
+
payload = resp.json()
|
|
257
|
+
t = (payload.get("text") or "").strip()
|
|
258
|
+
if t:
|
|
259
|
+
log.info(f"[audio] Groq Whisper OK: {t[:80]}")
|
|
260
|
+
return t
|
|
261
|
+
else:
|
|
262
|
+
log.warning(f"[audio] Groq Whisper {resp.status_code}: {resp.text[:160]}")
|
|
263
|
+
|
|
264
|
+
except Exception as eg:
|
|
265
|
+
log.warning(f"[audio] Groq Whisper error: {eg}")
|
|
266
|
+
if tmp_path and os.path.exists(tmp_path):
|
|
267
|
+
try:
|
|
268
|
+
os.unlink(tmp_path)
|
|
269
|
+
except Exception:
|
|
270
|
+
pass
|
|
271
|
+
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
async def _transcribe_openrouter_whisper(
|
|
275
|
+
self,
|
|
276
|
+
audio_bytes: bytes,
|
|
277
|
+
mime_type: str
|
|
278
|
+
) -> Optional[str]:
|
|
279
|
+
"""Transcribe usando OpenRouter Whisper (fallback 2)."""
|
|
280
|
+
import logging
|
|
281
|
+
log = logging.getLogger("conny_audio")
|
|
282
|
+
|
|
283
|
+
if not Config.OPENROUTER_API_KEY:
|
|
284
|
+
return None
|
|
285
|
+
|
|
286
|
+
tmp_path = None
|
|
287
|
+
try:
|
|
288
|
+
suffix = self._audio_suffix(mime_type)
|
|
289
|
+
filename = f"audio{suffix}"
|
|
290
|
+
|
|
291
|
+
with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
|
|
292
|
+
tmp.write(audio_bytes)
|
|
293
|
+
tmp_path = tmp.name
|
|
294
|
+
|
|
295
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
296
|
+
with open(tmp_path, "rb") as f:
|
|
297
|
+
resp = await client.post(
|
|
298
|
+
"https://openrouter.ai/api/v1/audio/transcriptions",
|
|
299
|
+
headers={"Authorization": f"Bearer {Config.OPENROUTER_API_KEY}"},
|
|
300
|
+
files={"file": (filename, f, mime_type)},
|
|
301
|
+
data={
|
|
302
|
+
"model": getattr(Config, "WHISPER_MODEL", "openai/whisper-large-v3"),
|
|
303
|
+
"language": "es",
|
|
304
|
+
},
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
if tmp_path and os.path.exists(tmp_path):
|
|
308
|
+
os.unlink(tmp_path)
|
|
309
|
+
|
|
310
|
+
if resp.status_code == 200:
|
|
311
|
+
t = resp.json().get("text", "").strip()
|
|
312
|
+
if t:
|
|
313
|
+
log.info(f"[audio] Whisper OK: {t[:80]}")
|
|
314
|
+
return t
|
|
315
|
+
else:
|
|
316
|
+
log.warning(f"[audio] OpenRouter Whisper {resp.status_code}: {resp.text[:160]}")
|
|
317
|
+
|
|
318
|
+
except Exception as ew:
|
|
319
|
+
log.warning(f"[audio] Whisper error: {ew}")
|
|
320
|
+
if tmp_path and os.path.exists(tmp_path):
|
|
321
|
+
try:
|
|
322
|
+
os.unlink(tmp_path)
|
|
323
|
+
except Exception:
|
|
324
|
+
pass
|
|
325
|
+
|
|
326
|
+
return None
|
|
327
|
+
|
|
328
|
+
def clear_cache(self) -> None:
|
|
329
|
+
"""Limpia la caché de audio."""
|
|
330
|
+
self._audio_cache.clear()
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
async def transcribe_audio(
|
|
334
|
+
file_id: str,
|
|
335
|
+
platform: str = "telegram",
|
|
336
|
+
wa_media_id: str = None
|
|
337
|
+
) -> str:
|
|
338
|
+
"""
|
|
339
|
+
Función de conveniencia para transcripción de audio.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
file_id: ID del archivo de audio
|
|
343
|
+
platform: Plataforma de origen
|
|
344
|
+
wa_media_id: ID de media de WhatsApp Cloud
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
Texto transcrito o mensaje de error
|
|
348
|
+
"""
|
|
349
|
+
handler = AudioHandler()
|
|
350
|
+
return await handler.transcribe_audio(file_id, platform, wa_media_id)
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""conny_audio_learn.py — Transcribe admin audio → auto-learn."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
import logging, os, tempfile, base64
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
log = logging.getLogger("conny.audio_learn")
|
|
9
|
+
|
|
10
|
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
|
|
11
|
+
GROQ_WHISPER_URL = "https://api.groq.com/openai/v1/audio/transcriptions"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
async def transcribe_audio(audio_data: bytes, mime_type: str = "audio/ogg") -> Optional[str]:
|
|
15
|
+
"""Transcribe audio bytes using Groq Whisper API."""
|
|
16
|
+
if not GROQ_API_KEY:
|
|
17
|
+
# Fallback: try reading from .env at runtime
|
|
18
|
+
key = os.getenv("GROQ_API_KEY", "")
|
|
19
|
+
if not key:
|
|
20
|
+
log.warning("[audio_learn] no GROQ_API_KEY")
|
|
21
|
+
return None
|
|
22
|
+
else:
|
|
23
|
+
key = GROQ_API_KEY
|
|
24
|
+
|
|
25
|
+
ext = {"audio/ogg": ".ogg", "audio/mp4": ".m4a", "audio/mpeg": ".mp3",
|
|
26
|
+
"audio/wav": ".wav", "audio/webm": ".webm"}.get(mime_type, ".ogg")
|
|
27
|
+
|
|
28
|
+
tmp = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
|
|
29
|
+
tmp.write(audio_data)
|
|
30
|
+
tmp.close()
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
|
34
|
+
with open(tmp.name, "rb") as f:
|
|
35
|
+
r = await client.post(
|
|
36
|
+
GROQ_WHISPER_URL,
|
|
37
|
+
headers={"Authorization": f"Bearer {key}"},
|
|
38
|
+
files={"file": (f"audio{ext}", f, mime_type)},
|
|
39
|
+
data={"model": "whisper-large-v3", "language": "es"},
|
|
40
|
+
)
|
|
41
|
+
if r.status_code == 200:
|
|
42
|
+
text = r.json().get("text", "").strip()
|
|
43
|
+
log.info(f"[audio_learn] transcribed {len(audio_data)} bytes → {len(text)} chars")
|
|
44
|
+
return text
|
|
45
|
+
else:
|
|
46
|
+
log.error(f"[audio_learn] Groq returned {r.status_code}: {r.text[:200]}")
|
|
47
|
+
return None
|
|
48
|
+
except Exception as e:
|
|
49
|
+
log.error(f"[audio_learn] transcription error: {e}")
|
|
50
|
+
return None
|
|
51
|
+
finally:
|
|
52
|
+
os.unlink(tmp.name)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
async def process_admin_audio(audio_data: bytes, mime_type: str, instance_id: str, chat_id: str) -> Optional[str]:
|
|
56
|
+
"""Full pipeline: transcribe → save to soul + teachings."""
|
|
57
|
+
text = await transcribe_audio(audio_data, mime_type)
|
|
58
|
+
if not text or len(text) < 10:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
# Save to soul
|
|
62
|
+
soul_dir = Path(f"soul/{instance_id}")
|
|
63
|
+
soul_dir.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
soul_file = soul_dir / "knowledge.md"
|
|
65
|
+
from datetime import datetime
|
|
66
|
+
with open(soul_file, "a") as f:
|
|
67
|
+
f.write(f"\n[{datetime.now().strftime('%Y-%m-%d %H:%M')}] [audio del admin]\n{text[:2000]}\n")
|
|
68
|
+
|
|
69
|
+
# Save to teachings (split into sentences for better retrieval)
|
|
70
|
+
teachings_dir = Path("teachings")
|
|
71
|
+
teachings_dir.mkdir(exist_ok=True)
|
|
72
|
+
import json
|
|
73
|
+
teachings_file = teachings_dir / f"{instance_id}.jsonl"
|
|
74
|
+
with open(teachings_file, "a") as f:
|
|
75
|
+
f.write(json.dumps({
|
|
76
|
+
"ts": datetime.now().isoformat(),
|
|
77
|
+
"question": "[audio del admin] " + text[:100],
|
|
78
|
+
"answer": text[:500],
|
|
79
|
+
"taught_by": chat_id,
|
|
80
|
+
"source": "audio_transcription",
|
|
81
|
+
}, ensure_ascii=False) + "\n")
|
|
82
|
+
|
|
83
|
+
log.info(f"[audio_learn] saved audio teaching for {instance_id}: {text[:60]}...")
|
|
84
|
+
return text
|