@innvisor/conny-ai 9.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +68 -0
- package/CHANGELOG.md +54 -0
- package/LICENSE +21 -0
- package/README.md +369 -0
- package/brand-assets/A_dark_luxury_web_background_202605210700.jpeg +0 -0
- package/brand-assets/Conny.web.logo.png +0 -0
- package/brand-assets/Logo_Conny_Petalo_Claro.png +0 -0
- package/brand-assets/cl-nica-de-las-am-ricas/manifest.json +22 -0
- package/brand-assets/cl-nica-de-las-am-ricas/processed/business-identity.txt +11 -0
- package/brand-assets/cl-nica-de-las-am-ricas/raw/business-identity.txt +11 -0
- package/brand-assets/cl-nica-las-am-ricas/manifest.json +22 -0
- package/brand-assets/cl-nica-las-am-ricas/processed/business-identity.txt +11 -0
- package/brand-assets/cl-nica-las-am-ricas/raw/business-identity.txt +11 -0
- package/brand-assets/conny-demo/manifest.json +22 -0
- package/brand-assets/conny-demo/processed/business-identity.txt +7 -0
- package/brand-assets/conny-demo/raw/business-identity.txt +7 -0
- package/brand-assets/conny-logo.png +0 -0
- package/brand-assets/web.background.png +0 -0
- package/brand_assets.py +323 -0
- package/conny +28 -0
- package/conny-chat.py +579 -0
- package/conny-omni.py +3843 -0
- package/conny.py +113 -0
- package/conny_agents/__init__.py +1 -0
- package/conny_agents/agenda.py +1 -0
- package/conny_agents/captacion.py +1 -0
- package/conny_agents/conocimiento.py +1 -0
- package/conny_agents/escalacion.py +1 -0
- package/conny_agents/objeciones.py +1 -0
- package/conny_agents/seguimiento.py +1 -0
- package/conny_app.py +287 -0
- package/conny_audio.py +350 -0
- package/conny_audio_learn.py +84 -0
- package/conny_brain_v10.py +804 -0
- package/conny_bridge.py +656 -0
- package/conny_calendar.py +169 -0
- package/conny_cli.py +11784 -0
- package/conny_cli_bb.py +437 -0
- package/conny_commands.py +243 -0
- package/conny_config.py +215 -0
- package/conny_core/__init__.py +3 -0
- package/conny_core/conversation_engine.py +446 -0
- package/conny_core/first_turn_ops.py +287 -0
- package/conny_core/persona_registry.py +157 -0
- package/conny_core/prompt_ops.py +561 -0
- package/conny_cron.py +72 -0
- package/conny_demo_v2.py +209 -0
- package/conny_demo_voice.py +134 -0
- package/conny_design.py +43 -0
- package/conny_doctor.py +319 -0
- package/conny_domino.py +696 -0
- package/conny_generator.py +447 -0
- package/conny_google_auth.py +159 -0
- package/conny_i18n.py +619 -0
- package/conny_init.py +509 -0
- package/conny_integrations/__init__.py +4 -0
- package/conny_integrations/llm.py +1 -0
- package/conny_integrations/vault.py +77 -0
- package/conny_integrations/whatsapp.py +1 -0
- package/conny_intelligence.py +65 -0
- package/conny_learning.py +154 -0
- package/conny_memory.py +243 -0
- package/conny_memory_engine.py +292 -0
- package/conny_nova_proxy.py +170 -0
- package/conny_nuke_robot_phrases.py +493 -0
- package/conny_pairing.py +253 -0
- package/conny_patch.py +291 -0
- package/conny_persona_cli.py +150 -0
- package/conny_router.py +308 -0
- package/conny_runtime_ops.py +271 -0
- package/conny_session.py +516 -0
- package/conny_skills/__init__.py +1 -0
- package/conny_skills/demo_mode.py +35 -0
- package/conny_skills/text_processing.py +1 -0
- package/conny_skills/tone_detection.py +1 -0
- package/conny_smart_features.py +333 -0
- package/conny_studio.py +161 -0
- package/conny_sync_fix.py +306 -0
- package/conny_tui.py +512 -0
- package/conny_tui_select.py +202 -0
- package/conny_ultra_config.py +411 -0
- package/conny_uncertainty.py +174 -0
- package/conny_utils.py +87 -0
- package/conny_voice.py +156 -0
- package/conny_voice_engine.py +124 -0
- package/conny_web_search.py +66 -0
- package/conny_weekly_report.py +85 -0
- package/conny_worm.py +88 -0
- package/core/__init__.py +25 -0
- package/ecosystem.config.js +24 -0
- package/fix_init.py +27 -0
- package/install.sh +78 -0
- package/knowledge_base.py +330 -0
- package/nova/rules/default.yaml +37 -0
- package/nova_bridge.py +509 -0
- package/npm/conny.js +471 -0
- package/package.json +102 -0
- package/personas/conny/base/default.yaml +35 -0
- package/personas/conny/base/estetica_whatsapp.yaml +36 -0
- package/requirements.txt +14 -0
- package/run.sh +47 -0
- package/search.py +465 -0
- package/smart_handoff.py +1150 -0
- package/src/__init__.py +0 -0
- package/src/conny/__init__.py +0 -0
- package/src/conny/admin/__init__.py +0 -0
- package/src/conny/admin/api.py +234 -0
- package/src/conny/admin/dashboard.py +772 -0
- package/src/conny/api/__init__.py +0 -0
- package/src/conny/api/routes.py +8851 -0
- package/src/conny/brain/__init__.py +15 -0
- package/src/conny/brain/engine.py +804 -0
- package/src/conny/brain/learning.py +154 -0
- package/src/conny/brain/memory.py +324 -0
- package/src/conny/brain/smart_features.py +333 -0
- package/src/conny/brain/uncertainty.py +167 -0
- package/src/conny/channels/__init__.py +0 -0
- package/src/conny/channels/audio.py +316 -0
- package/src/conny/channels/cli.py +11795 -0
- package/src/conny/channels/logo_art.py +11 -0
- package/src/conny/channels/voice.py +156 -0
- package/src/conny/core/__init__.py +0 -0
- package/src/conny/core/config.py +215 -0
- package/src/conny/core/cron.py +72 -0
- package/src/conny/core/messenger.py +563 -0
- package/src/conny/core/router.py +297 -0
- package/src/conny/core/session.py +312 -0
- package/src/conny/demo/__init__.py +0 -0
- package/src/conny/demo/handler.py +3110 -0
- package/src/conny/integrations/__init__.py +19 -0
- package/src/conny/integrations/calendar.py +169 -0
- package/src/conny/integrations/knowledge.py +312 -0
- package/src/conny/integrations/search.py +66 -0
- package/src/conny/personas/__init__.py +0 -0
- package/src/conny/personas/generator.py +447 -0
- package/src/conny/production/__init__.py +0 -0
- package/src/conny/production/domino.py +696 -0
- package/src/conny/production/guard.py +550 -0
- package/src/conny/production/handoff.py +1150 -0
- package/src/conny/production/monitor.py +353 -0
- package/src/conny/utils/__init__.py +2 -0
- package/src/conny/utils/helpers.py +75 -0
- package/src/conny/utils/i18n.py +619 -0
- package/src/core/admin_engines.py +772 -0
- package/src/core/globals.py +11845 -0
- package/src/core/orchestrator.py +273 -0
- package/src/core/production_monitor.py +353 -0
- package/src/core/runtime.py +5487 -0
- package/src/domain/onboarding_flow.py +230 -0
- package/src/domain/prompts/__init__.py +1 -0
- package/src/domain/prompts/prospect_pitch.py +282 -0
- package/src/domain/send_guard.py +636 -0
- package/src/domain/swarm/queen.py +96 -0
- package/src/infrastructure/llm_providers/engine.py +487 -0
- package/src/interfaces/mcp_server.py +73 -0
- package/src/interfaces/nova_bridge.py +58 -0
- package/src/interfaces/web/admin_api.py +1379 -0
- package/src/interfaces/web/app.py +9408 -0
- package/src/interfaces/web/demo_handler.py +3450 -0
- package/src/interfaces/web/static/generate_avatars.py +46 -0
- package/v7/__init__.py +46 -0
- package/v7/agents/__init__.py +46 -0
- package/v7/agents/agenda.py +77 -0
- package/v7/agents/base.py +216 -0
- package/v7/agents/captacion.py +60 -0
- package/v7/agents/conocimiento.py +69 -0
- package/v7/agents/escalacion.py +83 -0
- package/v7/agents/objeciones.py +109 -0
- package/v7/agents/seguimiento.py +71 -0
- package/v7/memory/__init__.py +46 -0
- package/v7/memory/patient_profile.py +200 -0
- package/v7/orchestrator.py +275 -0
- package/v7/postprocess.py +127 -0
- package/v7/router.py +239 -0
- package/verify_conversation_impl.py +48 -0
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""conny_learning.py — Real-time 3-layer learning engine."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import time
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
log = logging.getLogger("conny.learning")
|
|
13
|
+
|
|
14
|
+
POSITIVE_SIGNALS = [
|
|
15
|
+
"gracias", "perfecto", "listo", "genial", "excelente", "dale", "ok perfecto",
|
|
16
|
+
"thanks", "great", "perfect", "awesome",
|
|
17
|
+
"te agradezco", "muy amable", "me queda claro", "entendido",
|
|
18
|
+
]
|
|
19
|
+
NEGATIVE_SIGNALS = [
|
|
20
|
+
"no entiendo", "eso no es", "no me sirve", "otra vez", "repite",
|
|
21
|
+
"eso ya lo dije", "ya te dije", "no es eso", "equivocad",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RealTimeLearningEngine:
|
|
26
|
+
"""3-layer learning: per-turn, per-session, admin-corrected."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, base_dir: str = "memory_store"):
|
|
29
|
+
self._base = Path(base_dir)
|
|
30
|
+
self._teachings_dir = Path("teachings")
|
|
31
|
+
self._teachings_dir.mkdir(exist_ok=True)
|
|
32
|
+
|
|
33
|
+
def _instance_dir(self, instance_id: str) -> Path:
|
|
34
|
+
d = self._base / instance_id / "learning"
|
|
35
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
return d
|
|
37
|
+
|
|
38
|
+
async def learn_from_turn(self, instance_id: str, user_msg: str,
|
|
39
|
+
bot_response: str, user_reply: str = ""):
|
|
40
|
+
idir = self._instance_dir(instance_id)
|
|
41
|
+
if user_reply and any(s in user_reply.lower() for s in POSITIVE_SIGNALS):
|
|
42
|
+
await self._reinforce_pattern(idir, bot_response, user_msg)
|
|
43
|
+
if user_reply and any(s in user_reply.lower() for s in NEGATIVE_SIGNALS):
|
|
44
|
+
await self._flag_failed_response(idir, bot_response, user_msg, user_reply)
|
|
45
|
+
|
|
46
|
+
async def _reinforce_pattern(self, idir: Path, response: str, trigger: str):
|
|
47
|
+
file = idir / "reinforced.jsonl"
|
|
48
|
+
entry = {"ts": datetime.now().isoformat(), "trigger": trigger[:200], "response": response[:300]}
|
|
49
|
+
with open(file, "a") as f:
|
|
50
|
+
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
|
51
|
+
|
|
52
|
+
async def _flag_failed_response(self, idir: Path, response: str,
|
|
53
|
+
user_msg: str, user_reply: str):
|
|
54
|
+
file = idir / "failures.jsonl"
|
|
55
|
+
entry = {
|
|
56
|
+
"ts": datetime.now().isoformat(),
|
|
57
|
+
"user_msg": user_msg[:200],
|
|
58
|
+
"bot_response": response[:300],
|
|
59
|
+
"user_complaint": user_reply[:200],
|
|
60
|
+
}
|
|
61
|
+
with open(file, "a") as f:
|
|
62
|
+
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
|
63
|
+
log.info(f"[learning] flagged failed response for {idir.parent.name}")
|
|
64
|
+
|
|
65
|
+
async def learn_from_session(self, instance_id: str, messages: List[Dict],
|
|
66
|
+
outcome: str = "unknown"):
|
|
67
|
+
idir = self._instance_dir(instance_id)
|
|
68
|
+
file = idir / "sessions.jsonl"
|
|
69
|
+
entry = {
|
|
70
|
+
"ts": datetime.now().isoformat(),
|
|
71
|
+
"outcome": outcome,
|
|
72
|
+
"turns": len(messages),
|
|
73
|
+
"summary": self._summarize_session(messages),
|
|
74
|
+
}
|
|
75
|
+
with open(file, "a") as f:
|
|
76
|
+
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
|
77
|
+
if outcome == "booked":
|
|
78
|
+
await self._save_successful_flow(idir, messages)
|
|
79
|
+
elif outcome == "abandoned":
|
|
80
|
+
await self._save_dropout_point(idir, messages)
|
|
81
|
+
|
|
82
|
+
async def _save_successful_flow(self, idir: Path, messages: List[Dict]):
|
|
83
|
+
file = idir / "successful_flows.jsonl"
|
|
84
|
+
flow = [{"role": m["role"], "content": m["content"][:150]} for m in messages[-8:]]
|
|
85
|
+
with open(file, "a") as f:
|
|
86
|
+
f.write(json.dumps({"ts": datetime.now().isoformat(), "flow": flow}, ensure_ascii=False) + "\n")
|
|
87
|
+
|
|
88
|
+
async def _save_dropout_point(self, idir: Path, messages: List[Dict]):
|
|
89
|
+
file = idir / "dropouts.jsonl"
|
|
90
|
+
last_bot = ""
|
|
91
|
+
for m in reversed(messages):
|
|
92
|
+
if m.get("role") == "assistant":
|
|
93
|
+
last_bot = m["content"][:200]
|
|
94
|
+
break
|
|
95
|
+
with open(file, "a") as f:
|
|
96
|
+
f.write(json.dumps({"ts": datetime.now().isoformat(), "last_bot_msg": last_bot}, ensure_ascii=False) + "\n")
|
|
97
|
+
|
|
98
|
+
def _summarize_session(self, messages: List[Dict]) -> str:
|
|
99
|
+
user_msgs = [m["content"] for m in messages if m.get("role") == "user"]
|
|
100
|
+
return " | ".join(msg[:50] for msg in user_msgs[:5]) if user_msgs else "empty"
|
|
101
|
+
|
|
102
|
+
async def learn_from_admin(self, instance_id: str, question: str, answer: str,
|
|
103
|
+
admin_id: str = "") -> str:
|
|
104
|
+
teachings_file = self._teachings_dir / f"{instance_id}.jsonl"
|
|
105
|
+
entry = {
|
|
106
|
+
"ts": datetime.now().isoformat(),
|
|
107
|
+
"question": question,
|
|
108
|
+
"answer": answer,
|
|
109
|
+
"taught_by": admin_id,
|
|
110
|
+
"question_hash": hashlib.md5(question.lower().strip().encode()).hexdigest()[:12],
|
|
111
|
+
}
|
|
112
|
+
with open(teachings_file, "a") as f:
|
|
113
|
+
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
faq_file = self._base / instance_id / "semantic" / "faqs.json"
|
|
117
|
+
faq_file.parent.mkdir(parents=True, exist_ok=True)
|
|
118
|
+
faqs = json.loads(faq_file.read_text()) if faq_file.exists() else {}
|
|
119
|
+
faqs[entry["question_hash"]] = {
|
|
120
|
+
"question": question,
|
|
121
|
+
"answer": answer,
|
|
122
|
+
"frequency": faqs.get(entry["question_hash"], {}).get("frequency", 0) + 1,
|
|
123
|
+
"source": "admin_taught",
|
|
124
|
+
"last_asked": datetime.now().isoformat(),
|
|
125
|
+
}
|
|
126
|
+
faq_file.write_text(json.dumps(faqs, ensure_ascii=False, indent=2))
|
|
127
|
+
except Exception as e:
|
|
128
|
+
log.warning(f"[learning] FAQ update failed: {e}")
|
|
129
|
+
|
|
130
|
+
log.info(f"[learning] admin taught: '{question[:50]}' → '{answer[:50]}'")
|
|
131
|
+
return f"✅ Aprendido. Ya sé responder: '{question[:50]}...'"
|
|
132
|
+
|
|
133
|
+
async def get_teachings(self, instance_id: str, limit: int = 50) -> List[Dict]:
|
|
134
|
+
teachings_file = self._teachings_dir / f"{instance_id}.jsonl"
|
|
135
|
+
if not teachings_file.exists():
|
|
136
|
+
return []
|
|
137
|
+
teachings = []
|
|
138
|
+
for line in open(teachings_file):
|
|
139
|
+
try:
|
|
140
|
+
teachings.append(json.loads(line))
|
|
141
|
+
except Exception:
|
|
142
|
+
continue
|
|
143
|
+
return teachings[-limit:]
|
|
144
|
+
|
|
145
|
+
def build_teachings_prompt(self, teachings: List[Dict]) -> str:
|
|
146
|
+
if not teachings:
|
|
147
|
+
return ""
|
|
148
|
+
lines = ["INFORMACIÓN APRENDIDA (verificada por admin):"]
|
|
149
|
+
for t in teachings:
|
|
150
|
+
lines.append(f"- Pregunta: {t['question']}\n Respuesta: {t['answer']}")
|
|
151
|
+
return "\n".join(lines)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
learning_engine = RealTimeLearningEngine()
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
"""Episodic, semantic, and procedural memory engine with TF-IDF recall."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import re
|
|
8
|
+
import time
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
12
|
+
import numpy as np
|
|
13
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
14
|
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
15
|
+
|
|
16
|
+
log = logging.getLogger("conny.memory")
|
|
17
|
+
|
|
18
|
+
class AgentDB:
|
|
19
|
+
"""
|
|
20
|
+
Motor HNSW/Vectorial local ultra-rápido para recuperar patrones.
|
|
21
|
+
Implementación nativa usando Cosine Similarity para evitar dependencias pesadas.
|
|
22
|
+
"""
|
|
23
|
+
def __init__(self, namespace: str):
|
|
24
|
+
self.namespace = namespace
|
|
25
|
+
self.vectorizer = TfidfVectorizer()
|
|
26
|
+
self.vectors = None
|
|
27
|
+
self.documents = []
|
|
28
|
+
self.metadatas = []
|
|
29
|
+
|
|
30
|
+
def add_texts(self, texts: List[str], metadatas: List[Dict]):
|
|
31
|
+
if not texts:
|
|
32
|
+
return
|
|
33
|
+
self.documents.extend(texts)
|
|
34
|
+
self.metadatas.extend(metadatas)
|
|
35
|
+
# Update index
|
|
36
|
+
self.vectors = self.vectorizer.fit_transform(self.documents)
|
|
37
|
+
|
|
38
|
+
def similarity_search(self, query: str, k: int = 3) -> List[Dict]:
|
|
39
|
+
if not self.documents or self.vectors is None:
|
|
40
|
+
return []
|
|
41
|
+
|
|
42
|
+
query_vec = self.vectorizer.transform([query])
|
|
43
|
+
similarities = cosine_similarity(query_vec, self.vectors).flatten()
|
|
44
|
+
top_indices = similarities.argsort()[-k:][::-1]
|
|
45
|
+
|
|
46
|
+
results = []
|
|
47
|
+
for idx in top_indices:
|
|
48
|
+
if similarities[idx] > 0.1: # threshold
|
|
49
|
+
results.append({
|
|
50
|
+
"content": self.documents[idx],
|
|
51
|
+
"metadata": self.metadatas[idx],
|
|
52
|
+
"score": float(similarities[idx])
|
|
53
|
+
})
|
|
54
|
+
return results
|
|
55
|
+
|
|
56
|
+
class ConnyMemoryEngine:
|
|
57
|
+
"""Per-instance memory with episodic recall + semantic extraction + procedural learning."""
|
|
58
|
+
|
|
59
|
+
def __init__(self, base_dir: str = "memory_store"):
|
|
60
|
+
self._base = Path(base_dir)
|
|
61
|
+
self._base.mkdir(exist_ok=True)
|
|
62
|
+
self._agent_db: Dict[str, AgentDB] = {}
|
|
63
|
+
|
|
64
|
+
def _get_agent_db(self, instance_id: str) -> AgentDB:
|
|
65
|
+
if instance_id not in self._agent_db:
|
|
66
|
+
db = AgentDB(namespace=instance_id)
|
|
67
|
+
self._agent_db[instance_id] = db
|
|
68
|
+
return self._agent_db[instance_id]
|
|
69
|
+
|
|
70
|
+
def _instance_dir(self, instance_id: str) -> Path:
|
|
71
|
+
d = self._base / instance_id
|
|
72
|
+
for sub in ("episodic", "semantic", "procedural", "working"):
|
|
73
|
+
(d / sub).mkdir(parents=True, exist_ok=True)
|
|
74
|
+
return d
|
|
75
|
+
|
|
76
|
+
async def ingest_conversation(self, instance_id: str, chat_id: str, messages: List[Dict[str, str]]):
|
|
77
|
+
"""After every conversation: store episodic + extract entities + update FAQ frequency."""
|
|
78
|
+
idir = self._instance_dir(instance_id)
|
|
79
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
80
|
+
|
|
81
|
+
# 1. Store episodic
|
|
82
|
+
ep_file = idir / "episodic" / f"{today}.jsonl"
|
|
83
|
+
entry = {
|
|
84
|
+
"ts": datetime.now().isoformat(),
|
|
85
|
+
"chat_id": chat_id,
|
|
86
|
+
"messages": messages[-20:],
|
|
87
|
+
}
|
|
88
|
+
with open(ep_file, "a") as f:
|
|
89
|
+
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
|
90
|
+
|
|
91
|
+
# 2. Extract entities
|
|
92
|
+
entities = self._extract_entities(messages)
|
|
93
|
+
if entities:
|
|
94
|
+
ent_file = idir / "semantic" / "entities.json"
|
|
95
|
+
existing = json.loads(ent_file.read_text()) if ent_file.exists() else {}
|
|
96
|
+
for etype, values in entities.items():
|
|
97
|
+
if etype not in existing:
|
|
98
|
+
existing[etype] = {}
|
|
99
|
+
for val in values:
|
|
100
|
+
if val in existing[etype]:
|
|
101
|
+
existing[etype][val]["count"] += 1
|
|
102
|
+
existing[etype][val]["last_seen"] = datetime.now().isoformat()
|
|
103
|
+
else:
|
|
104
|
+
existing[etype][val] = {
|
|
105
|
+
"count": 1,
|
|
106
|
+
"last_seen": datetime.now().isoformat(),
|
|
107
|
+
"chat_id": chat_id,
|
|
108
|
+
}
|
|
109
|
+
ent_file.write_text(json.dumps(existing, ensure_ascii=False, indent=2))
|
|
110
|
+
|
|
111
|
+
# 3. Update FAQ frequency
|
|
112
|
+
user_questions = [
|
|
113
|
+
m["content"] for m in messages
|
|
114
|
+
if m.get("role") == "user" and "?" in m.get("content", "")
|
|
115
|
+
]
|
|
116
|
+
if user_questions:
|
|
117
|
+
faq_file = idir / "semantic" / "faqs.json"
|
|
118
|
+
faqs = json.loads(faq_file.read_text()) if faq_file.exists() else {}
|
|
119
|
+
for q in user_questions:
|
|
120
|
+
qhash = hashlib.md5(q.lower().strip().encode()).hexdigest()[:12]
|
|
121
|
+
if qhash in faqs:
|
|
122
|
+
faqs[qhash]["frequency"] += 1
|
|
123
|
+
faqs[qhash]["last_asked"] = datetime.now().isoformat()
|
|
124
|
+
else:
|
|
125
|
+
answer = ""
|
|
126
|
+
for i, m in enumerate(messages):
|
|
127
|
+
if m.get("content") == q and i + 1 < len(messages):
|
|
128
|
+
answer = messages[i + 1].get("content", "")
|
|
129
|
+
break
|
|
130
|
+
faqs[qhash] = {
|
|
131
|
+
"question": q,
|
|
132
|
+
"answer": answer[:500],
|
|
133
|
+
"frequency": 1,
|
|
134
|
+
"first_asked": datetime.now().isoformat(),
|
|
135
|
+
"last_asked": datetime.now().isoformat(),
|
|
136
|
+
}
|
|
137
|
+
faq_file.write_text(json.dumps(faqs, ensure_ascii=False, indent=2))
|
|
138
|
+
|
|
139
|
+
# Invalidate AgentDB cache so it rebuilds on next recall
|
|
140
|
+
self._agent_db.pop(instance_id, None)
|
|
141
|
+
|
|
142
|
+
async def recall_context(self, instance_id: str, user_message: str, top_k: int = 5) -> List[Dict]:
|
|
143
|
+
"""Before every LLM call: retrieve relevant past exchanges using AgentDB HNSW similarity."""
|
|
144
|
+
if not user_message.strip():
|
|
145
|
+
return []
|
|
146
|
+
|
|
147
|
+
db = self._get_agent_db(instance_id)
|
|
148
|
+
|
|
149
|
+
# Si AgentDB está vacío, lo poblamos una vez leyendo el historial reciente
|
|
150
|
+
if not db.documents:
|
|
151
|
+
idir = self._instance_dir(instance_id)
|
|
152
|
+
ep_dir = idir / "episodic"
|
|
153
|
+
cutoff = datetime.now() - timedelta(days=30)
|
|
154
|
+
|
|
155
|
+
texts = []
|
|
156
|
+
metadatas = []
|
|
157
|
+
|
|
158
|
+
if ep_dir.exists():
|
|
159
|
+
for f in sorted(ep_dir.glob("*.jsonl"), reverse=True):
|
|
160
|
+
try:
|
|
161
|
+
file_date = datetime.strptime(f.stem, "%Y-%m-%d")
|
|
162
|
+
if file_date < cutoff:
|
|
163
|
+
break
|
|
164
|
+
except ValueError:
|
|
165
|
+
continue
|
|
166
|
+
with open(f) as fh:
|
|
167
|
+
for line in fh:
|
|
168
|
+
try:
|
|
169
|
+
entry = json.loads(line)
|
|
170
|
+
text = " ".join(m.get("content", "") for m in entry.get("messages", []))
|
|
171
|
+
if text.strip():
|
|
172
|
+
texts.append(text)
|
|
173
|
+
metadatas.append(entry)
|
|
174
|
+
except json.JSONDecodeError:
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
db.add_texts(texts, metadatas)
|
|
178
|
+
|
|
179
|
+
results = db.similarity_search(user_message, k=top_k)
|
|
180
|
+
|
|
181
|
+
# Formateamos para compatibilidad con el resto del pipeline
|
|
182
|
+
formatted_results = []
|
|
183
|
+
for res in results:
|
|
184
|
+
entry = res["metadata"]
|
|
185
|
+
formatted_results.append({
|
|
186
|
+
"score": res["score"],
|
|
187
|
+
"messages": entry.get("messages", [])[-6:],
|
|
188
|
+
"chat_id": entry.get("chat_id", ""),
|
|
189
|
+
"ts": entry.get("ts", "")
|
|
190
|
+
})
|
|
191
|
+
return formatted_results
|
|
192
|
+
|
|
193
|
+
async def get_top_faqs(self, instance_id: str, limit: int = 20) -> List[Dict]:
|
|
194
|
+
"""Get most frequently asked questions for system prompt injection."""
|
|
195
|
+
idir = self._instance_dir(instance_id)
|
|
196
|
+
faq_file = idir / "semantic" / "faqs.json"
|
|
197
|
+
if not faq_file.exists():
|
|
198
|
+
return []
|
|
199
|
+
faqs = json.loads(faq_file.read_text())
|
|
200
|
+
sorted_faqs = sorted(faqs.values(), key=lambda x: x.get("frequency", 0), reverse=True)
|
|
201
|
+
return sorted_faqs[:limit]
|
|
202
|
+
|
|
203
|
+
async def learn_from_success(self, instance_id: str, chat_id: str,
|
|
204
|
+
flow: List[Dict], outcome: str = "booking"):
|
|
205
|
+
"""Store successful conversation patterns."""
|
|
206
|
+
idir = self._instance_dir(instance_id)
|
|
207
|
+
success_file = idir / "procedural" / "successful_flows.json"
|
|
208
|
+
existing = json.loads(success_file.read_text()) if success_file.exists() else []
|
|
209
|
+
existing.append({
|
|
210
|
+
"ts": datetime.now().isoformat(),
|
|
211
|
+
"chat_id": chat_id,
|
|
212
|
+
"outcome": outcome,
|
|
213
|
+
"flow_summary": [
|
|
214
|
+
{"role": m["role"], "content": m["content"][:200]}
|
|
215
|
+
for m in flow[-10:]
|
|
216
|
+
],
|
|
217
|
+
})
|
|
218
|
+
existing = existing[-200:]
|
|
219
|
+
success_file.write_text(json.dumps(existing, ensure_ascii=False, indent=2))
|
|
220
|
+
|
|
221
|
+
async def learn_from_failure(self, instance_id: str, chat_id: str,
|
|
222
|
+
flow: List[Dict], reason: str = "escalated"):
|
|
223
|
+
"""Store failed conversation patterns for avoidance."""
|
|
224
|
+
idir = self._instance_dir(instance_id)
|
|
225
|
+
fail_file = idir / "procedural" / "failed_flows.json"
|
|
226
|
+
existing = json.loads(fail_file.read_text()) if fail_file.exists() else []
|
|
227
|
+
existing.append({
|
|
228
|
+
"ts": datetime.now().isoformat(),
|
|
229
|
+
"chat_id": chat_id,
|
|
230
|
+
"reason": reason,
|
|
231
|
+
"flow_summary": [
|
|
232
|
+
{"role": m["role"], "content": m["content"][:200]}
|
|
233
|
+
for m in flow[-10:]
|
|
234
|
+
],
|
|
235
|
+
})
|
|
236
|
+
existing = existing[-100:]
|
|
237
|
+
fail_file.write_text(json.dumps(existing, ensure_ascii=False, indent=2))
|
|
238
|
+
|
|
239
|
+
async def weekly_consolidation(self, instance_id: str):
|
|
240
|
+
"""Merge episodic -> semantic, prune duplicates, update FAQ index. Run weekly."""
|
|
241
|
+
idir = self._instance_dir(instance_id)
|
|
242
|
+
log.info(f"[memory] starting weekly consolidation for {instance_id}")
|
|
243
|
+
|
|
244
|
+
# 1. Re-scan all episodic for FAQ extraction
|
|
245
|
+
ep_dir = idir / "episodic"
|
|
246
|
+
all_questions: List[Dict] = []
|
|
247
|
+
for f in ep_dir.glob("*.jsonl"):
|
|
248
|
+
with open(f) as fh:
|
|
249
|
+
for line in fh:
|
|
250
|
+
try:
|
|
251
|
+
entry = json.loads(line)
|
|
252
|
+
msgs = entry.get("messages", [])
|
|
253
|
+
for i, m in enumerate(msgs):
|
|
254
|
+
if m.get("role") == "user" and "?" in m.get("content", ""):
|
|
255
|
+
answer = ""
|
|
256
|
+
if i + 1 < len(msgs) and msgs[i + 1].get("role") == "assistant":
|
|
257
|
+
answer = msgs[i + 1]["content"][:500]
|
|
258
|
+
all_questions.append({"q": m["content"], "a": answer})
|
|
259
|
+
except Exception:
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
# 2. Update FAQ index
|
|
263
|
+
faq_file = idir / "semantic" / "faqs.json"
|
|
264
|
+
faqs = json.loads(faq_file.read_text()) if faq_file.exists() else {}
|
|
265
|
+
|
|
266
|
+
for qa in all_questions:
|
|
267
|
+
qhash = hashlib.md5(qa["q"].lower().strip().encode()).hexdigest()[:12]
|
|
268
|
+
if qhash in faqs:
|
|
269
|
+
faqs[qhash]["frequency"] += 1
|
|
270
|
+
if qa["a"] and len(qa["a"]) > len(faqs[qhash].get("answer", "")):
|
|
271
|
+
faqs[qhash]["answer"] = qa["a"]
|
|
272
|
+
else:
|
|
273
|
+
faqs[qhash] = {
|
|
274
|
+
"question": qa["q"],
|
|
275
|
+
"answer": qa["a"],
|
|
276
|
+
"frequency": 1,
|
|
277
|
+
"first_asked": datetime.now().isoformat(),
|
|
278
|
+
"last_asked": datetime.now().isoformat(),
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
faq_file.write_text(json.dumps(faqs, ensure_ascii=False, indent=2))
|
|
282
|
+
|
|
283
|
+
# 3. Prune episodic older than 90 days
|
|
284
|
+
cutoff = datetime.now() - timedelta(days=90)
|
|
285
|
+
for f in ep_dir.glob("*.jsonl"):
|
|
286
|
+
try:
|
|
287
|
+
file_date = datetime.strptime(f.stem, "%Y-%m-%d")
|
|
288
|
+
if file_date < cutoff:
|
|
289
|
+
f.unlink()
|
|
290
|
+
log.info(f"[memory] pruned old episodic: {f.name}")
|
|
291
|
+
except Exception:
|
|
292
|
+
continue
|
|
293
|
+
|
|
294
|
+
log.info(f"[memory] consolidation complete for {instance_id}: {len(faqs)} FAQs")
|
|
295
|
+
|
|
296
|
+
def _extract_entities(self, messages: List[Dict]) -> Dict[str, List[str]]:
|
|
297
|
+
"""Simple regex-based entity extraction."""
|
|
298
|
+
entities: Dict[str, List[str]] = {"phones": [], "emails": [], "names": []}
|
|
299
|
+
text = " ".join(
|
|
300
|
+
m.get("content", "") for m in messages if m.get("role") == "user"
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
# Colombian phone numbers
|
|
304
|
+
phones = re.findall(r"\b3[0-9]{9}\b", text)
|
|
305
|
+
entities["phones"] = list(set(phones))
|
|
306
|
+
|
|
307
|
+
# Emails
|
|
308
|
+
emails = re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
|
|
309
|
+
entities["emails"] = list(set(emails))
|
|
310
|
+
|
|
311
|
+
# Names after "me llamo", "soy", "mi nombre es"
|
|
312
|
+
name_patterns = [
|
|
313
|
+
r"(?:me llamo|soy|mi nombre es)\s+([A-ZÁÉÍÓÚ][a-záéíóú]+(?:\s+[A-ZÁÉÍÓÚ][a-záéíóú]+)?)",
|
|
314
|
+
]
|
|
315
|
+
for pat in name_patterns:
|
|
316
|
+
matches = re.findall(pat, text)
|
|
317
|
+
entities["names"].extend(matches)
|
|
318
|
+
entities["names"] = list(set(entities["names"]))
|
|
319
|
+
|
|
320
|
+
return {k: v for k, v in entities.items() if v}
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# Singleton
|
|
324
|
+
memory_engine = ConnyMemoryEngine()
|