cortexflow-ai 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortexflow_ai/__init__.py +8 -0
- cortexflow_ai/agent/__init__.py +1 -0
- cortexflow_ai/agent/pipeline.py +194 -0
- cortexflow_ai/agent/runtime.py +467 -0
- cortexflow_ai/agent/session.py +168 -0
- cortexflow_ai/channels/__init__.py +1 -0
- cortexflow_ai/channels/base.py +99 -0
- cortexflow_ai/channels/discord_.py +145 -0
- cortexflow_ai/channels/email_.py +256 -0
- cortexflow_ai/channels/irc.py +261 -0
- cortexflow_ai/channels/mastodon_.py +235 -0
- cortexflow_ai/channels/matrix.py +196 -0
- cortexflow_ai/channels/mattermost.py +235 -0
- cortexflow_ai/channels/nextcloud.py +297 -0
- cortexflow_ai/channels/signal_.py +221 -0
- cortexflow_ai/channels/slack.py +214 -0
- cortexflow_ai/channels/sms.py +176 -0
- cortexflow_ai/channels/teams.py +214 -0
- cortexflow_ai/channels/telegram.py +151 -0
- cortexflow_ai/channels/webhook.py +201 -0
- cortexflow_ai/channels/whatsapp.py +218 -0
- cortexflow_ai/cli.py +805 -0
- cortexflow_ai/commands/__init__.py +17 -0
- cortexflow_ai/commands/handler.py +202 -0
- cortexflow_ai/config.py +180 -0
- cortexflow_ai/gateway/__init__.py +1 -0
- cortexflow_ai/gateway/main.py +110 -0
- cortexflow_ai/gateway/routes.py +295 -0
- cortexflow_ai/gateway/websocket.py +189 -0
- cortexflow_ai/init_wizard.py +261 -0
- cortexflow_ai/memory/__init__.py +1 -0
- cortexflow_ai/memory/archiver.py +119 -0
- cortexflow_ai/memory/compactor.py +188 -0
- cortexflow_ai/memory/long_term.py +382 -0
- cortexflow_ai/memory/retrieval.py +337 -0
- cortexflow_ai/memory/short_term.py +190 -0
- cortexflow_ai/memory/tagging.py +101 -0
- cortexflow_ai/models/__init__.py +1 -0
- cortexflow_ai/models/deepseek.py +180 -0
- cortexflow_ai/models/openai_.py +157 -0
- cortexflow_ai/models/router.py +451 -0
- cortexflow_ai/observability/__init__.py +1 -0
- cortexflow_ai/observability/logs.py +161 -0
- cortexflow_ai/observability/metrics.py +324 -0
- cortexflow_ai/plugins/__init__.py +1 -0
- cortexflow_ai/plugins/base.py +101 -0
- cortexflow_ai/plugins/registry.py +150 -0
- cortexflow_ai/reflection/__init__.py +1 -0
- cortexflow_ai/reflection/engine.py +214 -0
- cortexflow_ai/tools/__init__.py +1 -0
- cortexflow_ai/tools/base.py +114 -0
- cortexflow_ai/tools/file_ops.py +180 -0
- cortexflow_ai/tools/registry.py +160 -0
- cortexflow_ai/tools/web_search.py +140 -0
- cortexflow_ai/update_checker.py +58 -0
- cortexflow_ai/voice/__init__.py +1 -0
- cortexflow_ai/voice/stt.py +106 -0
- cortexflow_ai/voice/tts.py +230 -0
- cortexflow_ai/voice/wake_word.py +211 -0
- cortexflow_ai/workspace.py +158 -0
- cortexflow_ai-2.0.0.dist-info/METADATA +609 -0
- cortexflow_ai-2.0.0.dist-info/RECORD +66 -0
- cortexflow_ai-2.0.0.dist-info/WHEEL +5 -0
- cortexflow_ai-2.0.0.dist-info/entry_points.txt +2 -0
- cortexflow_ai-2.0.0.dist-info/licenses/LICENSE +105 -0
- cortexflow_ai-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""CortexFlow v2 — Personal AI Assistant Gateway.
|
|
2
|
+
|
|
3
|
+
A local-first AI assistant that connects to all major messaging platforms
|
|
4
|
+
with 3-tier hierarchical memory, task-aware LLM routing, and cross-platform voice.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
__version__ = "2.0.0"
|
|
8
|
+
__author__ = "Amit Chandra"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Agent runtime: session management, cognitive pipeline, and main loop."""
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Cognitive pipeline: intent extraction → memory retrieval → generation → reflection.
|
|
2
|
+
|
|
3
|
+
The pipeline is the heart of CortexFlow's intelligence layer. Each inbound
|
|
4
|
+
message passes through these stages:
|
|
5
|
+
|
|
6
|
+
1. Intent extraction — classify what the user wants (Gemini Flash, cheap)
|
|
7
|
+
2. Memory retrieval — assemble context from 3-tier memory
|
|
8
|
+
3. Prompt assembly — workspace system prompt + memory + conversation
|
|
9
|
+
4. Generation — route to optimal model via ModelRouter
|
|
10
|
+
5. Reflection — quality-score the response (async, non-blocking)
|
|
11
|
+
6. Memory storage — persist to short-term Redis and trigger long-term write
|
|
12
|
+
|
|
13
|
+
Stages 1–5 are synchronous within the request path.
|
|
14
|
+
Stage 6 is fire-and-forget (asyncio.create_task).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import asyncio
|
|
20
|
+
import logging
|
|
21
|
+
import time
|
|
22
|
+
from dataclasses import dataclass
|
|
23
|
+
|
|
24
|
+
from cortexflow_ai.agent.session import Session
|
|
25
|
+
from cortexflow_ai.channels.base import InboundMessage
|
|
26
|
+
from cortexflow_ai.memory.retrieval import MemoryRetrievalPipeline, RetrievalContext
|
|
27
|
+
from cortexflow_ai.models.router import GenerationResult, ModelRouter
|
|
28
|
+
from cortexflow_ai.reflection.engine import ReflectionEngine
|
|
29
|
+
from cortexflow_ai.workspace import WorkspaceFiles
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
# Intent labels understood by the pipeline
|
|
34
|
+
INTENT_TASK_MAP: dict[str, str] = {
|
|
35
|
+
"code": "code_generation",
|
|
36
|
+
"debug": "code_review",
|
|
37
|
+
"explain": "summarization",
|
|
38
|
+
"summarize": "summarization",
|
|
39
|
+
"plan": "task_decomposition",
|
|
40
|
+
"write": "general",
|
|
41
|
+
"question": "general",
|
|
42
|
+
"chat": "general",
|
|
43
|
+
"other": "general",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class PipelineResult:
|
|
49
|
+
"""Output of one pipeline pass."""
|
|
50
|
+
|
|
51
|
+
response: str
|
|
52
|
+
model: str
|
|
53
|
+
provider: str
|
|
54
|
+
intent: str
|
|
55
|
+
task_type: str
|
|
56
|
+
quality_score: float | None = None # filled in by reflection (async)
|
|
57
|
+
retrieval_token_estimate: int = 0
|
|
58
|
+
latency_ms: float = 0.0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class CognitivePipeline:
|
|
62
|
+
"""Executes the full intent → memory → generate → reflect loop.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
router: LLM router for generation and intent extraction.
|
|
66
|
+
memory: Memory retrieval pipeline (3-tier).
|
|
67
|
+
workspace: Loaded workspace files (SOUL/TOOLS/RULES).
|
|
68
|
+
agent_name: Name of the assistant (used in system prompt).
|
|
69
|
+
reflection: Optional reflection engine. When provided, each response is
|
|
70
|
+
quality-scored inline (and self-corrected if below the
|
|
71
|
+
engine's threshold) before being returned. When None
|
|
72
|
+
(default), reflection is skipped and quality_score is None.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
router: ModelRouter,
|
|
78
|
+
memory: MemoryRetrievalPipeline,
|
|
79
|
+
workspace: WorkspaceFiles,
|
|
80
|
+
agent_name: str = "CortexFlow",
|
|
81
|
+
reflection: ReflectionEngine | None = None,
|
|
82
|
+
) -> None:
|
|
83
|
+
self._router = router
|
|
84
|
+
self._memory = memory
|
|
85
|
+
self._workspace = workspace
|
|
86
|
+
self._agent_name = agent_name
|
|
87
|
+
self._reflection = reflection
|
|
88
|
+
|
|
89
|
+
async def run(
|
|
90
|
+
self,
|
|
91
|
+
message: InboundMessage,
|
|
92
|
+
session: Session,
|
|
93
|
+
) -> PipelineResult:
|
|
94
|
+
"""Process one inbound message and return the assistant's response."""
|
|
95
|
+
t0 = time.monotonic()
|
|
96
|
+
text = message.text or ""
|
|
97
|
+
|
|
98
|
+
# ── Stage 1: Intent extraction ─────────────────────────────────
|
|
99
|
+
intent = await self._extract_intent(text)
|
|
100
|
+
task_type = INTENT_TASK_MAP.get(intent, "general")
|
|
101
|
+
logger.debug("pipeline.intent text=%r intent=%s task_type=%s", text[:60], intent, task_type)
|
|
102
|
+
|
|
103
|
+
# ── Stage 2: Memory retrieval ──────────────────────────────────
|
|
104
|
+
ctx = await self._memory.retrieve(text, top_k=8)
|
|
105
|
+
|
|
106
|
+
# ── Stage 3: Prompt assembly ────────────────────────────────────
|
|
107
|
+
system_prompt = self._build_system(ctx, session)
|
|
108
|
+
user_prompt = self._build_user(text, session)
|
|
109
|
+
|
|
110
|
+
# ── Stage 4: Generation ─────────────────────────────────────────
|
|
111
|
+
gen: GenerationResult = await self._router.generate(
|
|
112
|
+
user_prompt,
|
|
113
|
+
task_type=task_type,
|
|
114
|
+
system=system_prompt,
|
|
115
|
+
)
|
|
116
|
+
response_text = gen.text.strip()
|
|
117
|
+
|
|
118
|
+
# ── Stage 5: Reflection (optional, inline) ─────────────────────
|
|
119
|
+
quality_score: float | None = None
|
|
120
|
+
if self._reflection is not None:
|
|
121
|
+
try:
|
|
122
|
+
refl = await self._reflection.reflect(text, response_text)
|
|
123
|
+
response_text = refl.final_response
|
|
124
|
+
quality_score = refl.score
|
|
125
|
+
except Exception as exc:
|
|
126
|
+
logger.debug("reflection failed (%s) — keeping original response", exc)
|
|
127
|
+
|
|
128
|
+
# ── Stage 6: Update session history ────────────────────────────
|
|
129
|
+
session.add_turn("user", text)
|
|
130
|
+
session.add_turn("assistant", response_text, model=gen.model)
|
|
131
|
+
|
|
132
|
+
# ── Stage 7: Persist short-term memory (fire-and-forget) ───────
|
|
133
|
+
asyncio.create_task(
|
|
134
|
+
self._memory.store_short_term(
|
|
135
|
+
key=f"turn:{session.turn_count}",
|
|
136
|
+
value={"user": text, "assistant": response_text},
|
|
137
|
+
)
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
latency = (time.monotonic() - t0) * 1000
|
|
141
|
+
return PipelineResult(
|
|
142
|
+
response=response_text,
|
|
143
|
+
model=gen.model,
|
|
144
|
+
provider=gen.provider,
|
|
145
|
+
intent=intent,
|
|
146
|
+
task_type=task_type,
|
|
147
|
+
quality_score=quality_score,
|
|
148
|
+
retrieval_token_estimate=ctx.token_estimate,
|
|
149
|
+
latency_ms=round(latency, 1),
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# ------------------------------------------------------------------
|
|
153
|
+
# Prompt builders
|
|
154
|
+
# ------------------------------------------------------------------
|
|
155
|
+
|
|
156
|
+
def _build_system(self, ctx: RetrievalContext, session: Session) -> str:
|
|
157
|
+
parts: list[str] = [self._workspace.to_system_prompt(self._agent_name)]
|
|
158
|
+
|
|
159
|
+
memory_blocks = ctx.to_prompt_blocks()
|
|
160
|
+
if memory_blocks:
|
|
161
|
+
parts.append("# Relevant memory\n" + "\n\n".join(memory_blocks))
|
|
162
|
+
|
|
163
|
+
return "\n\n".join(parts)
|
|
164
|
+
|
|
165
|
+
def _build_user(self, text: str, session: Session) -> str:
|
|
166
|
+
history = session.build_prompt(include_turns=10)
|
|
167
|
+
if history:
|
|
168
|
+
return f"{history}\nUser: {text}"
|
|
169
|
+
return text
|
|
170
|
+
|
|
171
|
+
# ------------------------------------------------------------------
|
|
172
|
+
# Intent extraction
|
|
173
|
+
# ------------------------------------------------------------------
|
|
174
|
+
|
|
175
|
+
async def _extract_intent(self, text: str) -> str:
|
|
176
|
+
if len(text) < 5:
|
|
177
|
+
return "chat"
|
|
178
|
+
try:
|
|
179
|
+
result = await self._router.generate(
|
|
180
|
+
f"""Classify this user message into ONE of these intents:
|
|
181
|
+
code, debug, explain, summarize, plan, write, question, chat, other
|
|
182
|
+
|
|
183
|
+
Message: {text[:500]}
|
|
184
|
+
|
|
185
|
+
Reply with ONLY the intent word, nothing else.""",
|
|
186
|
+
task_type="intent_extraction",
|
|
187
|
+
max_tokens=10,
|
|
188
|
+
temperature=0.0,
|
|
189
|
+
)
|
|
190
|
+
intent = result.text.strip().lower().split()[0]
|
|
191
|
+
return intent if intent in INTENT_TASK_MAP else "other"
|
|
192
|
+
except Exception as exc:
|
|
193
|
+
logger.debug("intent_extraction failed (%s), using 'general'", exc)
|
|
194
|
+
return "other"
|
|
@@ -0,0 +1,467 @@
|
|
|
1
|
+
"""AgentRuntime — the top-level orchestrator that wires everything together.
|
|
2
|
+
|
|
3
|
+
Responsibilities:
|
|
4
|
+
- Register channel adapters from config
|
|
5
|
+
- Route inbound messages from any channel to the CognitivePipeline
|
|
6
|
+
- Handle built-in slash commands (/reset, /memory, /status, /compact)
|
|
7
|
+
- Send the pipeline's response back via the originating adapter
|
|
8
|
+
- Manage session GC (idle session cleanup)
|
|
9
|
+
- Expose runtime metrics (message count, active sessions, errors)
|
|
10
|
+
|
|
11
|
+
Usage::
|
|
12
|
+
|
|
13
|
+
runtime = AgentRuntime.from_config(cfg)
|
|
14
|
+
await runtime.start()
|
|
15
|
+
# ... gateway is running ...
|
|
16
|
+
await runtime.stop()
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import asyncio
|
|
22
|
+
import logging
|
|
23
|
+
import os
|
|
24
|
+
import time
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
from cortexflow_ai.agent.pipeline import CognitivePipeline, PipelineResult
|
|
29
|
+
from cortexflow_ai.agent.session import SessionManager
|
|
30
|
+
from cortexflow_ai.channels.base import Attachment, ChannelAdapter, InboundMessage
|
|
31
|
+
from cortexflow_ai.config import CortexFlowConfig
|
|
32
|
+
from cortexflow_ai.memory.long_term import LongTermMemory
|
|
33
|
+
from cortexflow_ai.memory.retrieval import MemoryRetrievalPipeline
|
|
34
|
+
from cortexflow_ai.models.router import ModelRouter
|
|
35
|
+
from cortexflow_ai.observability.metrics import REGISTRY
|
|
36
|
+
from cortexflow_ai.workspace import WorkspaceLoader
|
|
37
|
+
|
|
38
|
+
logger = logging.getLogger(__name__)
|
|
39
|
+
|
|
40
|
+
# Slash commands handled by the runtime (not the LLM)
|
|
41
|
+
_SLASH_COMMANDS = {"/reset", "/memory", "/status", "/compact", "/help"}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class RuntimeMetrics:
|
|
46
|
+
"""Running counters updated as messages are processed."""
|
|
47
|
+
|
|
48
|
+
messages_received: int = 0
|
|
49
|
+
messages_sent: int = 0
|
|
50
|
+
errors: int = 0
|
|
51
|
+
pipeline_latency_ms_total: float = 0.0
|
|
52
|
+
started_at: float = field(default_factory=time.time)
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def avg_latency_ms(self) -> float:
|
|
56
|
+
if self.messages_received == 0:
|
|
57
|
+
return 0.0
|
|
58
|
+
return self.pipeline_latency_ms_total / self.messages_received
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def uptime_seconds(self) -> float:
|
|
62
|
+
return time.time() - self.started_at
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class AgentRuntime:
|
|
66
|
+
"""Top-level runtime that connects channels, sessions, and the pipeline.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
pipeline: The cognitive pipeline (intent → generate).
|
|
70
|
+
session_mgr: Session manager (creates/retrieves sessions).
|
|
71
|
+
adapters: Registered channel adapters.
|
|
72
|
+
gc_interval: Seconds between idle session cleanup. Default 300.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
pipeline: CognitivePipeline,
|
|
78
|
+
session_mgr: SessionManager,
|
|
79
|
+
adapters: list[ChannelAdapter] | None = None,
|
|
80
|
+
gc_interval: float = 300.0,
|
|
81
|
+
long_term: LongTermMemory | None = None,
|
|
82
|
+
stt: Any | None = None,
|
|
83
|
+
tts: Any | None = None,
|
|
84
|
+
) -> None:
|
|
85
|
+
self._pipeline = pipeline
|
|
86
|
+
self._sessions = session_mgr
|
|
87
|
+
self._adapters: dict[str, ChannelAdapter] = {}
|
|
88
|
+
self._gc_interval = gc_interval
|
|
89
|
+
self._gc_task: asyncio.Task | None = None # type: ignore[type-arg]
|
|
90
|
+
# Direct long-term memory handle — used by the REST API (memory routes)
|
|
91
|
+
# and any caller that needs raw LIKE search/delete without the full
|
|
92
|
+
# 3-tier retrieval pipeline.
|
|
93
|
+
self._long_term = long_term
|
|
94
|
+
# Voice note round-trip: inbound audio attachments are transcribed
|
|
95
|
+
# via stt before the pipeline runs; replies to voice-only messages
|
|
96
|
+
# are synthesized back to audio via tts. Both are best-effort —
|
|
97
|
+
# None disables the corresponding half.
|
|
98
|
+
self._stt = stt
|
|
99
|
+
self._tts = tts
|
|
100
|
+
self.metrics = RuntimeMetrics()
|
|
101
|
+
|
|
102
|
+
for adapter in (adapters or []):
|
|
103
|
+
self.register_adapter(adapter)
|
|
104
|
+
|
|
105
|
+
# ------------------------------------------------------------------
|
|
106
|
+
# Factory
|
|
107
|
+
# ------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def from_config(cls, cfg: CortexFlowConfig) -> "AgentRuntime":
|
|
111
|
+
"""Build a fully-wired AgentRuntime from a CortexFlowConfig."""
|
|
112
|
+
router = ModelRouter(
|
|
113
|
+
anthropic_api_key=getattr(cfg.models, "anthropic_api_key", None),
|
|
114
|
+
gemini_api_key=getattr(cfg.models, "gemini_api_key", None),
|
|
115
|
+
deepseek_api_key=getattr(cfg.models, "deepseek_api_key", None),
|
|
116
|
+
ollama_base_url=getattr(cfg.models, "ollama_base_url", "http://localhost:11434"),
|
|
117
|
+
)
|
|
118
|
+
memory = MemoryRetrievalPipeline(
|
|
119
|
+
redis_url=cfg.memory.redis_url,
|
|
120
|
+
qdrant_url=cfg.memory.qdrant_url,
|
|
121
|
+
sqlite_path=cfg.memory.sqlite_path,
|
|
122
|
+
)
|
|
123
|
+
loader = WorkspaceLoader()
|
|
124
|
+
workspace_files = loader.get()
|
|
125
|
+
|
|
126
|
+
reflection = None
|
|
127
|
+
try:
|
|
128
|
+
from cortexflow_ai.reflection.engine import ReflectionEngine
|
|
129
|
+
reflection = ReflectionEngine(router=router)
|
|
130
|
+
except Exception as exc:
|
|
131
|
+
logger.warning("runtime: reflection engine unavailable (%s)", exc)
|
|
132
|
+
|
|
133
|
+
pipeline = CognitivePipeline(
|
|
134
|
+
router=router,
|
|
135
|
+
memory=memory,
|
|
136
|
+
workspace=workspace_files,
|
|
137
|
+
agent_name=cfg.agent.name,
|
|
138
|
+
reflection=reflection,
|
|
139
|
+
)
|
|
140
|
+
session_mgr = SessionManager()
|
|
141
|
+
long_term = LongTermMemory(db_path=os.path.expanduser(cfg.memory.sqlite_path))
|
|
142
|
+
|
|
143
|
+
stt = None
|
|
144
|
+
try:
|
|
145
|
+
if getattr(cfg.voice, "stt", "whisper") != "none":
|
|
146
|
+
from cortexflow_ai.voice.stt import WhisperSTT
|
|
147
|
+
stt = WhisperSTT(model_size=getattr(cfg.voice, "stt_model", "base"))
|
|
148
|
+
except Exception as exc:
|
|
149
|
+
logger.warning("runtime: STT unavailable (%s)", exc)
|
|
150
|
+
|
|
151
|
+
tts = None
|
|
152
|
+
try:
|
|
153
|
+
if getattr(cfg.voice, "tts_engine", "kokoro") != "none":
|
|
154
|
+
from cortexflow_ai.voice.tts import TTSEngine
|
|
155
|
+
tts = TTSEngine(
|
|
156
|
+
elevenlabs_api_key=getattr(cfg.voice, "elevenlabs_api_key", None),
|
|
157
|
+
elevenlabs_voice_id=getattr(cfg.voice, "elevenlabs_voice_id", None) or None,
|
|
158
|
+
)
|
|
159
|
+
except Exception as exc:
|
|
160
|
+
logger.warning("runtime: TTS unavailable (%s)", exc)
|
|
161
|
+
|
|
162
|
+
adapters = _build_adapters(cfg)
|
|
163
|
+
return cls(
|
|
164
|
+
pipeline=pipeline,
|
|
165
|
+
session_mgr=session_mgr,
|
|
166
|
+
adapters=adapters,
|
|
167
|
+
long_term=long_term,
|
|
168
|
+
stt=stt,
|
|
169
|
+
tts=tts,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# ------------------------------------------------------------------
|
|
173
|
+
# Lifecycle
|
|
174
|
+
# ------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
def register_adapter(self, adapter: ChannelAdapter) -> None:
|
|
177
|
+
adapter.on_message(self._on_message)
|
|
178
|
+
self._adapters[adapter.channel_id] = adapter
|
|
179
|
+
|
|
180
|
+
async def start(self) -> None:
|
|
181
|
+
"""Connect all registered adapters and start the GC loop."""
|
|
182
|
+
for channel_id, adapter in self._adapters.items():
|
|
183
|
+
try:
|
|
184
|
+
await adapter.connect()
|
|
185
|
+
logger.info("runtime: channel %s connected", channel_id)
|
|
186
|
+
except Exception as exc:
|
|
187
|
+
logger.error("runtime: channel %s failed to connect: %s", channel_id, exc)
|
|
188
|
+
|
|
189
|
+
self._gc_task = asyncio.create_task(self._gc_loop())
|
|
190
|
+
logger.info(
|
|
191
|
+
"AgentRuntime started — %d channel(s) active", len(self._adapters)
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
async def stop(self) -> None:
|
|
195
|
+
"""Disconnect all adapters and cancel the GC loop."""
|
|
196
|
+
if self._gc_task:
|
|
197
|
+
self._gc_task.cancel()
|
|
198
|
+
try:
|
|
199
|
+
await self._gc_task
|
|
200
|
+
except asyncio.CancelledError:
|
|
201
|
+
pass
|
|
202
|
+
|
|
203
|
+
for adapter in self._adapters.values():
|
|
204
|
+
try:
|
|
205
|
+
await adapter.disconnect()
|
|
206
|
+
except Exception as exc:
|
|
207
|
+
logger.warning("runtime: adapter %s disconnect error: %s", adapter.channel_id, exc)
|
|
208
|
+
|
|
209
|
+
logger.info("AgentRuntime stopped")
|
|
210
|
+
|
|
211
|
+
# ------------------------------------------------------------------
|
|
212
|
+
# Message dispatch
|
|
213
|
+
# ------------------------------------------------------------------
|
|
214
|
+
|
|
215
|
+
async def _on_message(self, msg: InboundMessage) -> None:
|
|
216
|
+
"""Adapter callback: compute a reply and send it back via the adapter."""
|
|
217
|
+
is_voice = await self._maybe_transcribe(msg)
|
|
218
|
+
reply = await self._reply_for(msg)
|
|
219
|
+
await self._send_reply(msg, reply, as_voice=is_voice)
|
|
220
|
+
|
|
221
|
+
async def _maybe_transcribe(self, msg: InboundMessage) -> bool:
|
|
222
|
+
"""Transcribe an audio attachment into msg.text when msg has no text.
|
|
223
|
+
|
|
224
|
+
Mutates *msg.text* in place so the rest of the pipeline (slash
|
|
225
|
+
commands, CognitivePipeline) sees the transcript like any other
|
|
226
|
+
text message. Returns True if a voice note was transcribed, so the
|
|
227
|
+
caller knows to reply in kind via TTS.
|
|
228
|
+
"""
|
|
229
|
+
if (msg.text or "").strip():
|
|
230
|
+
return False
|
|
231
|
+
|
|
232
|
+
audio = next((a for a in msg.attachments if a.type == "audio"), None)
|
|
233
|
+
if audio is None or self._stt is None:
|
|
234
|
+
return False
|
|
235
|
+
|
|
236
|
+
audio_bytes = audio.data
|
|
237
|
+
if audio_bytes is None and audio.url:
|
|
238
|
+
audio_bytes = await self._fetch_attachment_bytes(audio.url)
|
|
239
|
+
if audio_bytes is None:
|
|
240
|
+
return False
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
transcript = (await self._stt.transcribe(audio_bytes)).strip()
|
|
244
|
+
except Exception as exc:
|
|
245
|
+
logger.warning("runtime: STT transcription failed: %s", exc)
|
|
246
|
+
return False
|
|
247
|
+
|
|
248
|
+
if not transcript:
|
|
249
|
+
return False
|
|
250
|
+
|
|
251
|
+
msg.text = transcript
|
|
252
|
+
logger.info(
|
|
253
|
+
"runtime: transcribed voice note %s/%s -> %r",
|
|
254
|
+
msg.channel, msg.sender_id[:8], transcript[:60],
|
|
255
|
+
)
|
|
256
|
+
return True
|
|
257
|
+
|
|
258
|
+
async def _fetch_attachment_bytes(self, url: str) -> bytes | None:
|
|
259
|
+
try:
|
|
260
|
+
import httpx
|
|
261
|
+
|
|
262
|
+
async with httpx.AsyncClient() as client:
|
|
263
|
+
resp = await client.get(url, timeout=15.0)
|
|
264
|
+
resp.raise_for_status()
|
|
265
|
+
return resp.content
|
|
266
|
+
except Exception as exc:
|
|
267
|
+
logger.warning("runtime: failed to fetch attachment %s: %s", url, exc)
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
async def process_inbound_text(
|
|
271
|
+
self,
|
|
272
|
+
channel: str,
|
|
273
|
+
sender_id: str,
|
|
274
|
+
text: str,
|
|
275
|
+
*,
|
|
276
|
+
sender_name: str = "web",
|
|
277
|
+
) -> str:
|
|
278
|
+
"""Process a message from a non-adapter caller (e.g. the WebSocket UI).
|
|
279
|
+
|
|
280
|
+
Runs the same dispatch path as an adapter message but returns the
|
|
281
|
+
reply string directly instead of sending it through a channel adapter.
|
|
282
|
+
"""
|
|
283
|
+
msg = InboundMessage(
|
|
284
|
+
channel=channel,
|
|
285
|
+
sender_id=sender_id,
|
|
286
|
+
sender_name=sender_name,
|
|
287
|
+
text=text,
|
|
288
|
+
thread_id=None,
|
|
289
|
+
timestamp=time.time(),
|
|
290
|
+
raw={},
|
|
291
|
+
)
|
|
292
|
+
return await self._reply_for(msg)
|
|
293
|
+
|
|
294
|
+
async def _reply_for(self, msg: InboundMessage) -> str:
|
|
295
|
+
"""Compute the assistant's reply for an inbound message.
|
|
296
|
+
|
|
297
|
+
Shared by adapter dispatch (`_on_message`) and direct callers
|
|
298
|
+
(`process_inbound_text`). Updates both the private RuntimeMetrics and
|
|
299
|
+
the global Prometheus REGISTRY.
|
|
300
|
+
"""
|
|
301
|
+
self.metrics.messages_received += 1
|
|
302
|
+
REGISTRY.inc("messages_total", labels={"channel": msg.channel})
|
|
303
|
+
REGISTRY.set("active_sessions", float(self._sessions.active_count))
|
|
304
|
+
|
|
305
|
+
text = (msg.text or "").strip()
|
|
306
|
+
|
|
307
|
+
# Handle built-in slash commands
|
|
308
|
+
if text.startswith("/"):
|
|
309
|
+
cmd = text.split()[0].lower()
|
|
310
|
+
if cmd in _SLASH_COMMANDS:
|
|
311
|
+
reply = await self._command_reply(cmd, msg)
|
|
312
|
+
self.metrics.messages_sent += 1
|
|
313
|
+
return reply
|
|
314
|
+
|
|
315
|
+
# Normal message → cognitive pipeline
|
|
316
|
+
session = self._sessions.get_or_create(msg.channel, msg.sender_id)
|
|
317
|
+
try:
|
|
318
|
+
result: PipelineResult = await self._pipeline.run(msg, session)
|
|
319
|
+
self.metrics.pipeline_latency_ms_total += result.latency_ms
|
|
320
|
+
self.metrics.messages_sent += 1
|
|
321
|
+
REGISTRY.inc("generation_requests_total", labels={"model": result.model})
|
|
322
|
+
REGISTRY.observe(
|
|
323
|
+
"generation_latency_ms", result.latency_ms, labels={"model": result.model}
|
|
324
|
+
)
|
|
325
|
+
logger.info(
|
|
326
|
+
"runtime: %s/%s → %s (%.0fms)",
|
|
327
|
+
msg.channel, msg.sender_id[:8], result.model, result.latency_ms,
|
|
328
|
+
)
|
|
329
|
+
return result.response
|
|
330
|
+
except Exception as exc:
|
|
331
|
+
self.metrics.errors += 1
|
|
332
|
+
REGISTRY.inc("messages_errors_total", labels={"channel": msg.channel})
|
|
333
|
+
logger.error("runtime: pipeline error for %s/%s: %s", msg.channel, msg.sender_id, exc)
|
|
334
|
+
return "Sorry, something went wrong. Please try again."
|
|
335
|
+
|
|
336
|
+
async def _send_reply(
|
|
337
|
+
self, original: InboundMessage, text: str, *, as_voice: bool = False
|
|
338
|
+
) -> None:
|
|
339
|
+
adapter = self._adapters.get(original.channel)
|
|
340
|
+
if not adapter:
|
|
341
|
+
logger.warning("runtime: no adapter for channel %s", original.channel)
|
|
342
|
+
return
|
|
343
|
+
|
|
344
|
+
attachments: list[Attachment] | None = None
|
|
345
|
+
if as_voice and self._tts is not None:
|
|
346
|
+
audio = await self._synthesize_reply(text)
|
|
347
|
+
if audio:
|
|
348
|
+
attachments = [Attachment(type="audio", data=audio, mime_type="audio/mpeg")]
|
|
349
|
+
|
|
350
|
+
await adapter.send(
|
|
351
|
+
original.sender_id, text, reply_to=original.reply_to_id, attachments=attachments
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
async def _synthesize_reply(self, text: str) -> bytes | None:
|
|
355
|
+
try:
|
|
356
|
+
return await self._tts.synthesize(text)
|
|
357
|
+
except Exception as exc:
|
|
358
|
+
logger.warning("runtime: TTS synthesis failed: %s", exc)
|
|
359
|
+
return None
|
|
360
|
+
|
|
361
|
+
# ------------------------------------------------------------------
|
|
362
|
+
# Built-in commands
|
|
363
|
+
# ------------------------------------------------------------------
|
|
364
|
+
|
|
365
|
+
async def _command_reply(self, cmd: str, msg: InboundMessage) -> str:
|
|
366
|
+
session = self._sessions.get_or_create(msg.channel, msg.sender_id)
|
|
367
|
+
|
|
368
|
+
if cmd == "/reset":
|
|
369
|
+
session.clear()
|
|
370
|
+
reply = "Session reset. Starting fresh."
|
|
371
|
+
|
|
372
|
+
elif cmd == "/memory":
|
|
373
|
+
ctx = await self._pipeline._memory.retrieve(
|
|
374
|
+
"recent context", top_k=5, include_semantic=False
|
|
375
|
+
)
|
|
376
|
+
if ctx.results:
|
|
377
|
+
lines = [f"- {r.content}" for r in ctx.results[:5]]
|
|
378
|
+
reply = "Recent memory:\n" + "\n".join(lines)
|
|
379
|
+
else:
|
|
380
|
+
reply = "No memory entries found for this session."
|
|
381
|
+
|
|
382
|
+
elif cmd == "/status":
|
|
383
|
+
reply = (
|
|
384
|
+
f"CortexFlow Status\n"
|
|
385
|
+
f"Uptime: {self.metrics.uptime_seconds:.0f}s\n"
|
|
386
|
+
f"Active sessions: {self._sessions.active_count}\n"
|
|
387
|
+
f"Messages handled: {self.metrics.messages_received}\n"
|
|
388
|
+
f"Avg latency: {self.metrics.avg_latency_ms:.0f}ms\n"
|
|
389
|
+
f"Errors: {self.metrics.errors}"
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
elif cmd == "/compact":
|
|
393
|
+
if session.turn_count < 4:
|
|
394
|
+
reply = "Not enough history to compact yet."
|
|
395
|
+
else:
|
|
396
|
+
summary_prompt = (
|
|
397
|
+
"Summarise this conversation in 3-5 bullet points, preserving key facts:\n\n"
|
|
398
|
+
+ session.build_prompt()
|
|
399
|
+
)
|
|
400
|
+
try:
|
|
401
|
+
gen = await self._pipeline._router.generate(
|
|
402
|
+
summary_prompt, task_type="summarization", max_tokens=300
|
|
403
|
+
)
|
|
404
|
+
session.clear()
|
|
405
|
+
session.add_turn("system", f"Conversation summary:\n{gen.text.strip()}")
|
|
406
|
+
reply = f"Conversation compacted. Summary:\n{gen.text.strip()}"
|
|
407
|
+
except Exception as exc:
|
|
408
|
+
reply = f"Compact failed: {exc}"
|
|
409
|
+
|
|
410
|
+
else: # /help
|
|
411
|
+
reply = (
|
|
412
|
+
"Commands:\n"
|
|
413
|
+
"/reset — Clear conversation history\n"
|
|
414
|
+
"/memory — Show recent memory\n"
|
|
415
|
+
"/status — Runtime statistics\n"
|
|
416
|
+
"/compact — Summarize and compress history\n"
|
|
417
|
+
"/help — Show this message"
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
return reply
|
|
421
|
+
|
|
422
|
+
# ------------------------------------------------------------------
|
|
423
|
+
# GC loop
|
|
424
|
+
# ------------------------------------------------------------------
|
|
425
|
+
|
|
426
|
+
async def _gc_loop(self) -> None:
|
|
427
|
+
while True:
|
|
428
|
+
await asyncio.sleep(self._gc_interval)
|
|
429
|
+
removed = self._sessions.gc()
|
|
430
|
+
REGISTRY.set("active_sessions", float(self._sessions.active_count))
|
|
431
|
+
if removed:
|
|
432
|
+
logger.debug("runtime: GC removed %d idle sessions", removed)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _build_adapters(cfg: CortexFlowConfig) -> list[ChannelAdapter]:
|
|
436
|
+
"""Instantiate adapters for all enabled channels in config."""
|
|
437
|
+
adapters: list[ChannelAdapter] = []
|
|
438
|
+
for name, ch_cfg in cfg.channels.items():
|
|
439
|
+
if not ch_cfg.enabled:
|
|
440
|
+
continue
|
|
441
|
+
adapter = _make_adapter(name, ch_cfg.extra)
|
|
442
|
+
if adapter:
|
|
443
|
+
adapters.append(adapter)
|
|
444
|
+
return adapters
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _make_adapter(name: str, config: dict[str, Any]) -> ChannelAdapter | None:
|
|
448
|
+
try:
|
|
449
|
+
if name == "telegram":
|
|
450
|
+
from cortexflow_ai.channels.telegram import TelegramAdapter
|
|
451
|
+
return TelegramAdapter(config)
|
|
452
|
+
if name == "discord":
|
|
453
|
+
from cortexflow_ai.channels.discord_ import DiscordAdapter
|
|
454
|
+
return DiscordAdapter(config)
|
|
455
|
+
if name == "slack":
|
|
456
|
+
from cortexflow_ai.channels.slack import SlackAdapter
|
|
457
|
+
return SlackAdapter(config)
|
|
458
|
+
if name == "whatsapp":
|
|
459
|
+
from cortexflow_ai.channels.whatsapp import WhatsAppAdapter
|
|
460
|
+
return WhatsAppAdapter(config)
|
|
461
|
+
if name == "email":
|
|
462
|
+
from cortexflow_ai.channels.email_ import EmailAdapter
|
|
463
|
+
return EmailAdapter(config)
|
|
464
|
+
except Exception as exc:
|
|
465
|
+
logger.warning("runtime: could not load adapter %s: %s", name, exc)
|
|
466
|
+
logger.debug("runtime: unknown channel %s — skipping", name)
|
|
467
|
+
return None
|