alive-ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +24 -0
- package/LICENSE +21 -0
- package/README.md +143 -0
- package/alive_ai/__init__.py +3 -0
- package/brain/__init__.py +59 -0
- package/brain/almost_said.py +154 -0
- package/brain/bid_detector.py +636 -0
- package/brain/conversation_flow.py +135 -0
- package/brain/curiosity.py +328 -0
- package/brain/default_mode.py +1438 -0
- package/brain/dreams.py +220 -0
- package/brain/embeddings/__init__.py +82 -0
- package/brain/emotional_memory.py +949 -0
- package/brain/global_activity.py +173 -0
- package/brain/group_dynamics.py +63 -0
- package/brain/linguistic.py +235 -0
- package/brain/llm/__init__.py +63 -0
- package/brain/llm/base.py +33 -0
- package/brain/llm/fallback_router.py +309 -0
- package/brain/llm/manifest.md +30 -0
- package/brain/llm/ollama.py +218 -0
- package/brain/llm/openrouter.py +151 -0
- package/brain/llm/provider.py +205 -0
- package/brain/llm/unified.py +423 -0
- package/brain/llm/zai.py +169 -0
- package/brain/manifest.md +23 -0
- package/brain/memory/__init__.py +123 -0
- package/brain/memory/episodic.py +92 -0
- package/brain/memory/fact_extractor.py +209 -0
- package/brain/memory/index.py +54 -0
- package/brain/memory/manager.py +151 -0
- package/brain/memory/summarizer.py +102 -0
- package/brain/memory/vector_store.py +297 -0
- package/brain/memory/working.py +43 -0
- package/brain/narrative.py +343 -0
- package/brain/stt/__init__.py +4 -0
- package/brain/stt/google_stt.py +83 -0
- package/brain/stt/whisper_stt.py +82 -0
- package/brain/subconscious/__init__.py +33 -0
- package/brain/subconscious/actions.py +136 -0
- package/brain/subconscious/evaluation.py +166 -0
- package/brain/subconscious/goal_system.py +90 -0
- package/brain/subconscious/goals.py +41 -0
- package/brain/subconscious/impulse_generator.py +200 -0
- package/brain/subconscious/impulses.py +48 -0
- package/brain/subconscious/learning.py +24 -0
- package/brain/subconscious/learning_system.py +79 -0
- package/brain/subconscious/loop.py +398 -0
- package/brain/subconscious/manifest.md +32 -0
- package/brain/subconscious/relationship.py +47 -0
- package/brain/subconscious/relationship_memory.py +83 -0
- package/brain/subconscious/response_analyzer.py +74 -0
- package/brain/subconscious/templates.py +70 -0
- package/brain/subconscious/thought.py +37 -0
- package/brain/subconscious/working_memory.py +97 -0
- package/cli/index.js +371 -0
- package/config/directives.example.json +28 -0
- package/config/instructions.example.md +16 -0
- package/config/self.example.json +74 -0
- package/config/settings.example.json +95 -0
- package/core/__init__.py +1 -0
- package/core/config.py +54 -0
- package/core/directives.py +198 -0
- package/core/events.py +50 -0
- package/core/follow_up.py +267 -0
- package/core/hot_reload.py +174 -0
- package/core/initialization.py +253 -0
- package/core/manifest.md +28 -0
- package/core/media_handler.py +241 -0
- package/core/memory_monitor.py +200 -0
- package/core/message_handler.py +1440 -0
- package/core/proactive_generator.py +277 -0
- package/core/self.py +188 -0
- package/core/settings.py +169 -0
- package/core/skills_registry.py +357 -0
- package/core/state.py +27 -0
- package/core/subconscious_bridge.py +93 -0
- package/core/thinking.py +175 -0
- package/core/user_manager.py +306 -0
- package/core/user_tracker.py +144 -0
- package/demo/index.html +144 -0
- package/docker-compose.yml +28 -0
- package/docs/assets/logo.svg +15 -0
- package/docs/index.html +355 -0
- package/heart/__init__.py +93 -0
- package/heart/afterglow.py +215 -0
- package/heart/attachment.py +186 -0
- package/heart/circadian.py +251 -0
- package/heart/complex_emotions.py +114 -0
- package/heart/conflicts.py +589 -0
- package/heart/core.py +387 -0
- package/heart/emotional_decay.py +59 -0
- package/heart/emotional_memory.py +261 -0
- package/heart/emotional_state.py +146 -0
- package/heart/emotional_variability.py +156 -0
- package/heart/hormonal.py +424 -0
- package/heart/inconsistency.py +1222 -0
- package/heart/integrity.py +469 -0
- package/heart/interoception.py +997 -0
- package/heart/love.py +120 -0
- package/heart/manifest.md +25 -0
- package/heart/mood_shifts.py +169 -0
- package/heart/phantom_somatic.py +259 -0
- package/heart/predictive.py +374 -0
- package/heart/scars.py +474 -0
- package/heart/somatic.py +482 -0
- package/heart/soul.py +633 -0
- package/heart/telemetry.py +942 -0
- package/heart/triggers.py +119 -0
- package/heart/unconscious.py +443 -0
- package/input/__init__.py +1 -0
- package/input/manifest.md +24 -0
- package/input/telegram/__init__.py +1 -0
- package/input/telegram/commands.py +762 -0
- package/input/telegram/listener.py +532 -0
- package/main.py +90 -0
- package/manifest.md +28 -0
- package/mypics/.gitkeep +1 -0
- package/myvids/.gitkeep +1 -0
- package/output/__init__.py +1 -0
- package/output/images/__init__.py +1 -0
- package/output/images/fal_gen.py +43 -0
- package/output/manifest.md +26 -0
- package/output/text/__init__.py +1 -0
- package/output/text/sender.py +22 -0
- package/output/voice/__init__.py +64 -0
- package/output/voice/google_tts.py +252 -0
- package/output/voice/gtts_tts.py +214 -0
- package/output/voice/vibe_tts.py +190 -0
- package/package.json +58 -0
- package/pyproject.toml +23 -0
- package/requirements.txt +21 -0
- package/skills/__init__.py +1 -0
- package/skills/anticipation_engine/__init__.py +8 -0
- package/skills/anticipation_engine/engine.py +618 -0
- package/skills/anticipation_engine/manifest.md +192 -0
- package/skills/calendar/__init__.py +1 -0
- package/skills/content_unlocks/__init__.py +8 -0
- package/skills/content_unlocks/manifest.md +231 -0
- package/skills/content_unlocks/unlocks.py +945 -0
- package/skills/exclusive_moments/__init__.py +8 -0
- package/skills/exclusive_moments/manifest.md +145 -0
- package/skills/exclusive_moments/moments.py +506 -0
- package/skills/intimacy_layers/__init__.py +8 -0
- package/skills/intimacy_layers/layers.py +703 -0
- package/skills/intimacy_layers/manifest.md +203 -0
- package/skills/manifest.md +67 -0
- package/skills/memory_callbacks/__init__.py +9 -0
- package/skills/memory_callbacks/callbacks.py +748 -0
- package/skills/memory_callbacks/manifest.md +170 -0
- package/skills/message_scheduler/__init__.py +19 -0
- package/skills/message_scheduler/manifest.md +107 -0
- package/skills/message_scheduler/scheduler.py +510 -0
- package/skills/photo_manager/__init__.py +1 -0
- package/skills/photo_manager/scanner.py +296 -0
- package/skills/relationship_milestones/__init__.py +8 -0
- package/skills/relationship_milestones/manifest.md +206 -0
- package/skills/relationship_milestones/tracker.py +494 -0
- package/skills/self_authorship/__init__.py +23 -0
- package/skills/self_authorship/author.py +331 -0
- package/skills/self_authorship/manifest.md +24 -0
- package/skills/video_manager/__init__.py +5 -0
- package/skills/video_manager/manifest.md +37 -0
- package/skills/video_manager/scanner.py +229 -0
- package/webui/__init__.py +3 -0
- package/webui/app.py +936 -0
- package/webui/bridge.py +366 -0
- package/webui/static/index.html +2070 -0
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Brain: LLM - Fallback Router
|
|
3
|
+
Simple router that tries providers in order with logging and error handling.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import time
|
|
8
|
+
from typing import Optional, List, Dict, Tuple, Callable
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from enum import Enum
|
|
11
|
+
|
|
12
|
+
from .base import BaseLLM
|
|
13
|
+
from .zai import ZAIClient
|
|
14
|
+
from .openrouter import OpenRouterClient
|
|
15
|
+
from .ollama import OllamaClient
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FallbackResult(Enum):
|
|
19
|
+
"""Result of a fallback attempt"""
|
|
20
|
+
SUCCESS = "success"
|
|
21
|
+
EMPTY_RESPONSE = "empty_response"
|
|
22
|
+
TIMEOUT = "timeout"
|
|
23
|
+
ERROR = "error"
|
|
24
|
+
UNAVAILABLE = "unavailable"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class FallbackLog:
|
|
29
|
+
"""Log entry for a fallback attempt"""
|
|
30
|
+
provider: str
|
|
31
|
+
result: FallbackResult
|
|
32
|
+
latency_ms: float
|
|
33
|
+
error: Optional[str] = None
|
|
34
|
+
response_preview: Optional[str] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class FallbackRouter:
|
|
38
|
+
"""
|
|
39
|
+
Simple fallback router for LLM providers.
|
|
40
|
+
|
|
41
|
+
Tries providers in configured order:
|
|
42
|
+
1. ZAI (primary)
|
|
43
|
+
2. OpenRouter (cloud fallback)
|
|
44
|
+
3. Ollama (local fallback)
|
|
45
|
+
|
|
46
|
+
Features:
|
|
47
|
+
- Automatic failover on errors or empty responses
|
|
48
|
+
- Configurable timeouts
|
|
49
|
+
- Detailed logging of all attempts
|
|
50
|
+
- Health tracking
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
providers: List[Tuple[str, BaseLLM]],
|
|
56
|
+
timeout_seconds: float = 60,
|
|
57
|
+
retry_on_empty: bool = True
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Initialize the fallback router.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
providers: List of (name, client) tuples in fallback order
|
|
64
|
+
timeout_seconds: Maximum time per provider
|
|
65
|
+
retry_on_empty: Whether to retry once on empty response
|
|
66
|
+
"""
|
|
67
|
+
self.providers = providers
|
|
68
|
+
self.timeout_seconds = timeout_seconds
|
|
69
|
+
self.retry_on_empty = retry_on_empty
|
|
70
|
+
self._log: List[FallbackLog] = []
|
|
71
|
+
self._max_log_size = 100
|
|
72
|
+
|
|
73
|
+
def _log_attempt(self, entry: FallbackLog):
|
|
74
|
+
"""Log an attempt"""
|
|
75
|
+
self._log.append(entry)
|
|
76
|
+
# Trim log if too large
|
|
77
|
+
if len(self._log) > self._max_log_size:
|
|
78
|
+
self._log = self._log[-self._max_log_size:]
|
|
79
|
+
|
|
80
|
+
async def chat(
|
|
81
|
+
self,
|
|
82
|
+
messages: List[Dict[str, str]],
|
|
83
|
+
max_tokens: int = 500,
|
|
84
|
+
temperature: float = None
|
|
85
|
+
) -> Tuple[Optional[str], str]:
|
|
86
|
+
"""
|
|
87
|
+
Try providers in order until one succeeds.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
messages: Chat messages
|
|
91
|
+
max_tokens: Max tokens to generate
|
|
92
|
+
temperature: Sampling temperature
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Tuple of (response, provider_name) or (None, "") if all fail
|
|
96
|
+
"""
|
|
97
|
+
for provider_name, client in self.providers:
|
|
98
|
+
response = await self._try_provider(
|
|
99
|
+
provider_name, client, messages, max_tokens, temperature
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
if response:
|
|
103
|
+
return response, provider_name
|
|
104
|
+
|
|
105
|
+
return None, ""
|
|
106
|
+
|
|
107
|
+
async def _try_provider(
|
|
108
|
+
self,
|
|
109
|
+
name: str,
|
|
110
|
+
client: BaseLLM,
|
|
111
|
+
messages: List[Dict[str, str]],
|
|
112
|
+
max_tokens: int,
|
|
113
|
+
temperature: float
|
|
114
|
+
) -> Optional[str]:
|
|
115
|
+
"""Try a single provider"""
|
|
116
|
+
start_time = time.time()
|
|
117
|
+
|
|
118
|
+
# Check availability if supported
|
|
119
|
+
if hasattr(client, 'is_available'):
|
|
120
|
+
try:
|
|
121
|
+
available = await asyncio.wait_for(
|
|
122
|
+
client.is_available(),
|
|
123
|
+
timeout=5
|
|
124
|
+
)
|
|
125
|
+
if not available:
|
|
126
|
+
self._log_attempt(FallbackLog(
|
|
127
|
+
provider=name,
|
|
128
|
+
result=FallbackResult.UNAVAILABLE,
|
|
129
|
+
latency_ms=(time.time() - start_time) * 1000,
|
|
130
|
+
error="Provider not available"
|
|
131
|
+
))
|
|
132
|
+
return None
|
|
133
|
+
except Exception as e:
|
|
134
|
+
self._log_attempt(FallbackLog(
|
|
135
|
+
provider=name,
|
|
136
|
+
result=FallbackResult.UNAVAILABLE,
|
|
137
|
+
latency_ms=(time.time() - start_time) * 1000,
|
|
138
|
+
error=f"Availability check failed: {e}"
|
|
139
|
+
))
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
# Try the chat request
|
|
143
|
+
try:
|
|
144
|
+
response = await asyncio.wait_for(
|
|
145
|
+
client.chat(messages, max_tokens=max_tokens, temperature=temperature),
|
|
146
|
+
timeout=self.timeout_seconds
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
150
|
+
|
|
151
|
+
if not response or not response.strip():
|
|
152
|
+
# Empty response - retry once if configured
|
|
153
|
+
if self.retry_on_empty:
|
|
154
|
+
print(f"[FallbackRouter] Empty response from {name}, retrying...")
|
|
155
|
+
response = await asyncio.wait_for(
|
|
156
|
+
client.chat(messages, max_tokens=max_tokens, temperature=0.7),
|
|
157
|
+
timeout=self.timeout_seconds
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
if not response or not response.strip():
|
|
161
|
+
self._log_attempt(FallbackLog(
|
|
162
|
+
provider=name,
|
|
163
|
+
result=FallbackResult.EMPTY_RESPONSE,
|
|
164
|
+
latency_ms=latency_ms
|
|
165
|
+
))
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
# Success!
|
|
169
|
+
self._log_attempt(FallbackLog(
|
|
170
|
+
provider=name,
|
|
171
|
+
result=FallbackResult.SUCCESS,
|
|
172
|
+
latency_ms=latency_ms,
|
|
173
|
+
response_preview=response[:50] if response else None
|
|
174
|
+
))
|
|
175
|
+
print(f"[FallbackRouter] Success from {name} in {latency_ms:.0f}ms")
|
|
176
|
+
return response
|
|
177
|
+
|
|
178
|
+
except asyncio.TimeoutError:
|
|
179
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
180
|
+
self._log_attempt(FallbackLog(
|
|
181
|
+
provider=name,
|
|
182
|
+
result=FallbackResult.TIMEOUT,
|
|
183
|
+
latency_ms=latency_ms,
|
|
184
|
+
error=f"Timeout after {self.timeout_seconds}s"
|
|
185
|
+
))
|
|
186
|
+
print(f"[FallbackRouter] Timeout from {name}")
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
latency_ms = (time.time() - start_time) * 1000
|
|
191
|
+
self._log_attempt(FallbackLog(
|
|
192
|
+
provider=name,
|
|
193
|
+
result=FallbackResult.ERROR,
|
|
194
|
+
latency_ms=latency_ms,
|
|
195
|
+
error=str(e)
|
|
196
|
+
))
|
|
197
|
+
print(f"[FallbackRouter] Error from {name}: {e}")
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
def get_log(self, limit: int = 20) -> List[FallbackLog]:
|
|
201
|
+
"""Get recent log entries"""
|
|
202
|
+
return self._log[-limit:]
|
|
203
|
+
|
|
204
|
+
def get_stats(self) -> dict:
|
|
205
|
+
"""Get statistics about fallback attempts"""
|
|
206
|
+
if not self._log:
|
|
207
|
+
return {"total": 0}
|
|
208
|
+
|
|
209
|
+
success_count = sum(1 for e in self._log if e.result == FallbackResult.SUCCESS)
|
|
210
|
+
by_provider = {}
|
|
211
|
+
for entry in self._log:
|
|
212
|
+
if entry.provider not in by_provider:
|
|
213
|
+
by_provider[entry.provider] = {"total": 0, "success": 0}
|
|
214
|
+
by_provider[entry.provider]["total"] += 1
|
|
215
|
+
if entry.result == FallbackResult.SUCCESS:
|
|
216
|
+
by_provider[entry.provider]["success"] += 1
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
"total": len(self._log),
|
|
220
|
+
"success_rate": success_count / len(self._log),
|
|
221
|
+
"by_provider": by_provider
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def create_fallback_router_from_settings(settings_getter: Callable = None) -> FallbackRouter:
|
|
226
|
+
"""
|
|
227
|
+
Create a FallbackRouter from settings.json configuration.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
settings_getter: Function to get settings
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Configured FallbackRouter
|
|
234
|
+
"""
|
|
235
|
+
import os
|
|
236
|
+
|
|
237
|
+
# Get settings getter if not provided
|
|
238
|
+
if settings_getter is None:
|
|
239
|
+
try:
|
|
240
|
+
from core.settings import get as settings_get
|
|
241
|
+
settings_getter = settings_get
|
|
242
|
+
except ImportError:
|
|
243
|
+
settings_getter = lambda k, d=None: d
|
|
244
|
+
|
|
245
|
+
# Get LLM fallback config
|
|
246
|
+
llm_config = settings_getter("LLM_FALLBACK", {})
|
|
247
|
+
if not llm_config:
|
|
248
|
+
llm_config = {}
|
|
249
|
+
|
|
250
|
+
# Get fallback order
|
|
251
|
+
order = llm_config.get("ORDER", ["zai", "openrouter", "ollama"])
|
|
252
|
+
timeout = llm_config.get("TIMEOUT_SECONDS", 60)
|
|
253
|
+
retry_on_empty = llm_config.get("RETRY_ON_EMPTY", True)
|
|
254
|
+
|
|
255
|
+
providers = []
|
|
256
|
+
|
|
257
|
+
for name in order:
|
|
258
|
+
name_lower = name.lower()
|
|
259
|
+
client = None
|
|
260
|
+
|
|
261
|
+
if name_lower == "zai":
|
|
262
|
+
api_key = settings_getter("ZAI_API_KEY") or os.environ.get("ZAI_API_KEY", "")
|
|
263
|
+
model = settings_getter("ZAI_MODEL_MAIN") or os.environ.get("ZAI_MODEL_MAIN", "glm-4.6v")
|
|
264
|
+
if api_key:
|
|
265
|
+
client = ZAIClient(api_key, model)
|
|
266
|
+
|
|
267
|
+
elif name_lower == "openrouter":
|
|
268
|
+
api_key = settings_getter("OPENROUTER_API_KEY") or os.environ.get("OPENROUTER_API_KEY", "")
|
|
269
|
+
model = settings_getter("OPENROUTER_MODEL_MAIN") or os.environ.get("OPENROUTER_MODEL_MAIN", "anthropic/claude-3.5-sonnet")
|
|
270
|
+
if api_key:
|
|
271
|
+
client = OpenRouterClient(api_key, model)
|
|
272
|
+
|
|
273
|
+
elif name_lower == "ollama":
|
|
274
|
+
url = llm_config.get("OLLAMA_URL", "http://172.17.0.1:11434")
|
|
275
|
+
model = llm_config.get("OLLAMA_MODEL", "phi4:latest")
|
|
276
|
+
client = OllamaClient("", model, url)
|
|
277
|
+
|
|
278
|
+
if client:
|
|
279
|
+
providers.append((name_lower, client))
|
|
280
|
+
print(f"[FallbackRouter] Added provider: {name_lower}")
|
|
281
|
+
|
|
282
|
+
if not providers:
|
|
283
|
+
print("[FallbackRouter] Warning: No providers configured!")
|
|
284
|
+
|
|
285
|
+
return FallbackRouter(
|
|
286
|
+
providers=providers,
|
|
287
|
+
timeout_seconds=timeout,
|
|
288
|
+
retry_on_empty=retry_on_empty
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
# Singleton instance
|
|
293
|
+
_router: Optional[FallbackRouter] = None
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def get_fallback_router() -> FallbackRouter:
|
|
297
|
+
"""Get the global FallbackRouter instance"""
|
|
298
|
+
global _router
|
|
299
|
+
|
|
300
|
+
if _router is None:
|
|
301
|
+
_router = create_fallback_router_from_settings()
|
|
302
|
+
|
|
303
|
+
return _router
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def reset_fallback_router():
|
|
307
|
+
"""Reset the singleton (for testing)"""
|
|
308
|
+
global _router
|
|
309
|
+
_router = None
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Brain: LLM Module
|
|
2
|
+
|
|
3
|
+
Multi-provider LLM support for flexible model selection.
|
|
4
|
+
|
|
5
|
+
## Files
|
|
6
|
+
- `base.py` - Abstract BaseLLM class
|
|
7
|
+
- `zai.py` - ZAI API client (GLM models)
|
|
8
|
+
- `openrouter.py` - OpenRouter API client
|
|
9
|
+
- `provider.py` - Factory for creating LLM clients
|
|
10
|
+
|
|
11
|
+
## Provider Selection
|
|
12
|
+
Set `LLM_PROVIDER` env var: `zai` (default) or `openrouter`
|
|
13
|
+
|
|
14
|
+
## Task-Specific Models
|
|
15
|
+
- **main** - Primary conversation model
|
|
16
|
+
- **thinking** - Deep reasoning, complex decisions
|
|
17
|
+
- **fast** - Quick responses, impulses, subconscious
|
|
18
|
+
|
|
19
|
+
## Usage
|
|
20
|
+
```python
|
|
21
|
+
from brain.llm import get_main_llm, get_fast_llm, get_thinking_llm
|
|
22
|
+
|
|
23
|
+
llm = get_main_llm()
|
|
24
|
+
response = await llm.chat(messages, max_tokens=500)
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Environment Variables
|
|
28
|
+
- `LLM_PROVIDER` - zai or openrouter
|
|
29
|
+
- `ZAI_API_KEY`, `ZAI_MODEL_MAIN/FAST/THINKING`
|
|
30
|
+
- `OPENROUTER_API_KEY`, `OPENROUTER_MODEL_MAIN/FAST/THINKING`
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Brain: LLM - Ollama API Client
|
|
3
|
+
Local LLM support via Ollama for ultimate fallback
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import aiohttp
|
|
7
|
+
import asyncio
|
|
8
|
+
from typing import Optional, List, Dict
|
|
9
|
+
from .base import BaseLLM
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OllamaClient(BaseLLM):
|
|
13
|
+
"""Ollama API client for local LLM inference"""
|
|
14
|
+
|
|
15
|
+
# Default URLs - try Docker host access first, then localhost
|
|
16
|
+
DEFAULT_URLS = [
|
|
17
|
+
"http://172.17.0.1:11434", # Docker bridge gateway
|
|
18
|
+
"http://host.docker.internal:11434", # Docker Desktop
|
|
19
|
+
"http://localhost:11434", # Local
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
def __init__(self, api_key: str = "", model: str = "phi4:latest", base_url: str = None):
|
|
23
|
+
# Ollama doesn't need an API key, but we keep the interface consistent
|
|
24
|
+
super().__init__(api_key or "local", model)
|
|
25
|
+
self.base_url = base_url
|
|
26
|
+
self.session: Optional[aiohttp.ClientSession] = None
|
|
27
|
+
self._available: Optional[bool] = None
|
|
28
|
+
self._last_check: float = 0
|
|
29
|
+
|
|
30
|
+
async def _get_session(self) -> aiohttp.ClientSession:
|
|
31
|
+
if self.session is None or self.session.closed:
|
|
32
|
+
self.session = aiohttp.ClientSession()
|
|
33
|
+
return self.session
|
|
34
|
+
|
|
35
|
+
async def _discover_url(self) -> Optional[str]:
|
|
36
|
+
"""Try to discover a working Ollama URL"""
|
|
37
|
+
if self.base_url:
|
|
38
|
+
return self.base_url
|
|
39
|
+
|
|
40
|
+
session = await self._get_session()
|
|
41
|
+
|
|
42
|
+
for url in self.DEFAULT_URLS:
|
|
43
|
+
try:
|
|
44
|
+
async with session.get(
|
|
45
|
+
f"{url}/api/tags",
|
|
46
|
+
timeout=aiohttp.ClientTimeout(total=2)
|
|
47
|
+
) as resp:
|
|
48
|
+
if resp.status == 200:
|
|
49
|
+
print(f"[Ollama] Discovered at {url}")
|
|
50
|
+
self.base_url = url
|
|
51
|
+
return url
|
|
52
|
+
except Exception:
|
|
53
|
+
continue
|
|
54
|
+
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
async def is_available(self) -> bool:
|
|
58
|
+
"""Check if Ollama is running and the model is available"""
|
|
59
|
+
import time
|
|
60
|
+
|
|
61
|
+
# Cache availability for 30 seconds
|
|
62
|
+
if self._available is not None and time.time() - self._last_check < 30:
|
|
63
|
+
return self._available
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
url = await self._discover_url()
|
|
67
|
+
if not url:
|
|
68
|
+
self._available = False
|
|
69
|
+
self._last_check = time.time()
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
session = await self._get_session()
|
|
73
|
+
|
|
74
|
+
# Check if model is available
|
|
75
|
+
async with session.get(
|
|
76
|
+
f"{url}/api/tags",
|
|
77
|
+
timeout=aiohttp.ClientTimeout(total=5)
|
|
78
|
+
) as resp:
|
|
79
|
+
if resp.status == 200:
|
|
80
|
+
data = await resp.json()
|
|
81
|
+
models = data.get("models", [])
|
|
82
|
+
model_names = [m.get("name", "") for m in models]
|
|
83
|
+
|
|
84
|
+
# Check if our model (or base model name) is available
|
|
85
|
+
model_base = self.model.split(":")[0]
|
|
86
|
+
available = any(
|
|
87
|
+
self.model in name or model_base in name
|
|
88
|
+
for name in model_names
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if not available and models:
|
|
92
|
+
# Fall back to first available model
|
|
93
|
+
self.model = models[0].get("name", self.model)
|
|
94
|
+
print(f"[Ollama] Model not found, using {self.model}")
|
|
95
|
+
available = True
|
|
96
|
+
|
|
97
|
+
self._available = available
|
|
98
|
+
self._last_check = time.time()
|
|
99
|
+
return available
|
|
100
|
+
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f"[Ollama] Availability check failed: {e}")
|
|
103
|
+
|
|
104
|
+
self._available = False
|
|
105
|
+
self._last_check = time.time()
|
|
106
|
+
return False
|
|
107
|
+
|
|
108
|
+
async def chat(
|
|
109
|
+
self,
|
|
110
|
+
messages: List[Dict[str, str]],
|
|
111
|
+
max_tokens: int = 500,
|
|
112
|
+
temperature: float = None
|
|
113
|
+
) -> Optional[str]:
|
|
114
|
+
"""Send chat completion request via Ollama API"""
|
|
115
|
+
import os
|
|
116
|
+
import time
|
|
117
|
+
|
|
118
|
+
# Use passed temperature, or environment variable, or default
|
|
119
|
+
if temperature is None:
|
|
120
|
+
temperature = float(os.environ.get("LLM_TEMPERATURE", "0.95"))
|
|
121
|
+
|
|
122
|
+
# Discover URL if not set
|
|
123
|
+
if not self.base_url:
|
|
124
|
+
url = await self._discover_url()
|
|
125
|
+
if not url:
|
|
126
|
+
print("[Ollama] No reachable Ollama instance")
|
|
127
|
+
return None
|
|
128
|
+
else:
|
|
129
|
+
url = self.base_url
|
|
130
|
+
print(f"[Ollama] Using configured URL: {url}")
|
|
131
|
+
|
|
132
|
+
session = await self._get_session()
|
|
133
|
+
|
|
134
|
+
# Convert messages to Ollama format
|
|
135
|
+
# Ollama expects a different format than OpenAI
|
|
136
|
+
ollama_messages = []
|
|
137
|
+
for msg in messages:
|
|
138
|
+
role = msg.get("role", "user")
|
|
139
|
+
content = msg.get("content", "")
|
|
140
|
+
if role in ("user", "assistant", "system"):
|
|
141
|
+
ollama_messages.append({"role": role, "content": content})
|
|
142
|
+
|
|
143
|
+
payload = {
|
|
144
|
+
"model": self.model,
|
|
145
|
+
"messages": ollama_messages,
|
|
146
|
+
"stream": False,
|
|
147
|
+
"options": {
|
|
148
|
+
"num_predict": max_tokens,
|
|
149
|
+
"temperature": temperature,
|
|
150
|
+
"top_p": 0.9,
|
|
151
|
+
"repeat_penalty": 1.1,
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
print(f"[Ollama] Request to {url} with model {self.model}")
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
start_time = time.time()
|
|
159
|
+
|
|
160
|
+
async with session.post(
|
|
161
|
+
f"{url}/api/chat",
|
|
162
|
+
json=payload,
|
|
163
|
+
timeout=aiohttp.ClientTimeout(total=120) # Local can be slower
|
|
164
|
+
) as resp:
|
|
165
|
+
if resp.status != 200:
|
|
166
|
+
error = await resp.text()
|
|
167
|
+
print(f"[Ollama] Error {resp.status}: {error[:300]}")
|
|
168
|
+
return None
|
|
169
|
+
|
|
170
|
+
data = await resp.json()
|
|
171
|
+
print(f"[Ollama] Raw response keys: {list(data.keys())}")
|
|
172
|
+
|
|
173
|
+
# Check for error in response
|
|
174
|
+
if "error" in data:
|
|
175
|
+
print(f"[Ollama] API Error: {data['error']}")
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
# Ollama response format
|
|
179
|
+
message = data.get("message", {})
|
|
180
|
+
content = message.get("content", "")
|
|
181
|
+
|
|
182
|
+
# Some models (like deepseek-r1, JOSIEFIED) use 'thinking' field
|
|
183
|
+
if not content or not content.strip():
|
|
184
|
+
thinking = message.get("thinking", "")
|
|
185
|
+
if thinking and thinking.strip():
|
|
186
|
+
content = thinking
|
|
187
|
+
print(f"[Ollama] Using 'thinking' content instead of 'content'")
|
|
188
|
+
|
|
189
|
+
if not content or not content.strip():
|
|
190
|
+
print(f"[Ollama] Empty content in response: {data}")
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
elapsed = time.time() - start_time
|
|
194
|
+
print(f"[Ollama] Response ({elapsed:.1f}s): {content[:80]}...")
|
|
195
|
+
|
|
196
|
+
# Mark as available since we got a response
|
|
197
|
+
self._available = True
|
|
198
|
+
self._last_check = time.time()
|
|
199
|
+
|
|
200
|
+
return content.strip()
|
|
201
|
+
|
|
202
|
+
except asyncio.TimeoutError:
|
|
203
|
+
print(f"[Ollama] Timeout (120s)")
|
|
204
|
+
return None
|
|
205
|
+
except aiohttp.ClientError as e:
|
|
206
|
+
print(f"[Ollama] Connection error: {e}")
|
|
207
|
+
self._available = False
|
|
208
|
+
return None
|
|
209
|
+
except Exception as e:
|
|
210
|
+
print(f"[Ollama] Exception: {e}")
|
|
211
|
+
import traceback
|
|
212
|
+
traceback.print_exc()
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
async def close(self):
|
|
216
|
+
"""Close the client session"""
|
|
217
|
+
if self.session and not self.session.closed:
|
|
218
|
+
await self.session.close()
|