superbrain-server 1.0.2-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/bin/superbrain.js +196 -0
  2. package/package.json +23 -0
  3. package/payload/.dockerignore +45 -0
  4. package/payload/.env.example +58 -0
  5. package/payload/Dockerfile +73 -0
  6. package/payload/analyzers/__init__.py +0 -0
  7. package/payload/analyzers/audio_transcribe.py +225 -0
  8. package/payload/analyzers/caption.py +244 -0
  9. package/payload/analyzers/music_identifier.py +346 -0
  10. package/payload/analyzers/text_analyzer.py +117 -0
  11. package/payload/analyzers/visual_analyze.py +218 -0
  12. package/payload/analyzers/webpage_analyzer.py +789 -0
  13. package/payload/analyzers/youtube_analyzer.py +320 -0
  14. package/payload/api.py +1676 -0
  15. package/payload/config/.api_keys.example +22 -0
  16. package/payload/config/model_rankings.json +492 -0
  17. package/payload/config/openrouter_free_models.json +1364 -0
  18. package/payload/config/whisper_model.txt +1 -0
  19. package/payload/config_settings.py +185 -0
  20. package/payload/core/__init__.py +0 -0
  21. package/payload/core/category_manager.py +219 -0
  22. package/payload/core/database.py +811 -0
  23. package/payload/core/link_checker.py +300 -0
  24. package/payload/core/model_router.py +1253 -0
  25. package/payload/docker-compose.yml +120 -0
  26. package/payload/instagram/__init__.py +0 -0
  27. package/payload/instagram/instagram_downloader.py +253 -0
  28. package/payload/instagram/instagram_login.py +190 -0
  29. package/payload/main.py +912 -0
  30. package/payload/requirements.txt +39 -0
  31. package/payload/reset.py +311 -0
  32. package/payload/start-docker-prod.sh +125 -0
  33. package/payload/start-docker.sh +56 -0
  34. package/payload/start.py +1302 -0
  35. package/payload/static/favicon.ico +0 -0
  36. package/payload/stop-docker.sh +16 -0
  37. package/payload/utils/__init__.py +0 -0
  38. package/payload/utils/db_stats.py +108 -0
  39. package/payload/utils/manage_token.py +91 -0
@@ -0,0 +1,1253 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ AI Model Router for SuperBrain
4
+ ================================
5
+ Multi-provider routing with automatic fallback, response-time ranking,
6
+ and dynamic re-ranking as models change speed or go down.
7
+
8
+ Inspired by / referencing:
9
+ • FreeRide (Shaivpidadi/FreeRide) — dynamic OpenRouter free-model discovery,
10
+ quality scoring (context/capabilities/recency/provider trust), 30-min
11
+ rate-limit cooldown, 6-hour cache
12
+ • Nexlify (dev-sufyaan/Nexlify) — route:"fallback" uptime optimizer, newer
13
+ model IDs (QwQ-32B, DeepSeek-Chat, Dolphin-Mistral, DeepSeek-R1-Qwen-32B),
14
+ Google + Groq + OpenRouter unified approach
15
+ • openrouter-free-model (jomonylw) — free model detection logic
16
+
17
+ Priority order (defaults, adjusted dynamically by measured latency):
18
+ TEXT: Groq → Gemini → OpenRouter (hardcoded best) → Dynamic free OpenRouter → Local Ollama
19
+ VISION: Gemini → Groq Vision → OpenRouter Vision → Local Ollama Vision
20
+
21
+ API keys — store in backend/.api_keys (gitignored), one per line:
22
+ GROQ_API_KEY=gsk_...
23
+ GEMINI_API_KEY=AIza...
24
+ OPENROUTER_API_KEY=sk-or-...
25
+
26
+ Performance state persisted to backend/model_rankings.json (rankings survive restarts).
27
+ Dynamic model list cached in backend/openrouter_free_models.json (refreshed every 6 h).
28
+
29
+ CLI:
30
+ python model_router.py → show rankings
31
+ python model_router.py reset <key> → clear cooldown for a model key
32
+ python model_router.py refresh → force-refresh OpenRouter free model list
33
+ """
34
+
35
+ import os
36
+ import json
37
+ import time
38
+ import base64
39
+ import threading
40
+ from pathlib import Path
41
+ from typing import Optional, List, Dict, Any
42
+ from datetime import datetime, timedelta
43
+
44
+ CONFIG_DIR = Path(__file__).resolve().parent.parent / "config"
45
+ RANKINGS_FILE = CONFIG_DIR / "model_rankings.json"
46
+ API_KEYS_FILE = CONFIG_DIR / ".api_keys"
47
+
48
+ # How long to cool-down a failing model before retrying (seconds)
49
+ MODEL_DOWN_COOLDOWN_S = 300 # generic errors (5 min)
50
+ MODEL_RATE_LIMIT_COOLDOWN_S = 1800 # HTTP 429 rate-limit (30 min) — FreeRide approach
51
+ # Exponential moving average weight for response-time estimates
52
+ EMA_ALPHA = 0.3
53
+
54
+ # Cache file for dynamically discovered free OpenRouter models (FreeRide approach)
55
+ OPENROUTER_FREE_CACHE_FILE = CONFIG_DIR / "openrouter_free_models.json"
56
+ OPENROUTER_FREE_CACHE_HOURS = 6 # re-fetch every 6 h
57
+ OPENROUTER_API_MODELS_URL = "https://openrouter.ai/api/v1/models"
58
+
59
+ # Trusted providers — affects scoring weight for dynamic discovery (FreeRide)
60
+ TRUSTED_PROVIDERS = [
61
+ "google", "meta-llama", "mistralai", "deepseek",
62
+ "nvidia", "qwen", "microsoft", "allenai", "arcee-ai",
63
+ ]
64
+
65
+ # Quality-score weights used to rank free OpenRouter models (FreeRide approach)
66
+ RANKING_WEIGHTS = {
67
+ "context_length": 0.40, # Longer = handle bigger payloads
68
+ "capabilities": 0.30, # Vision / tools / structured output
69
+ "recency": 0.20, # Newer models = better perf
70
+ "provider_trust": 0.10, # Prefer known providers
71
+ }
72
+
73
+ # ─────────────────────────────────────────────────────────────────────────────
74
+ # MODEL REGISTRY
75
+ # Each entry defines the key, provider, model ID, task type, and base priority.
76
+ # base_priority: lower = preferred. 100+ = local last-resort.
77
+ # ─────────────────────────────────────────────────────────────────────────────
78
+
79
+ MODELS: List[Dict[str, Any]] = [
80
+ # ── TEXT ─────────────────────────────────────────────────────────────────
81
+ {
82
+ "key": "groq_gpt_oss_20b",
83
+ "provider": "groq",
84
+ "model_id": "openai/gpt-oss-20b",
85
+ "type": "text",
86
+ "base_priority": 0.5,
87
+ "desc": "Groq GPT-OSS 20B — fastest model on Groq at 1000 t/s",
88
+ },
89
+ {
90
+ "key": "groq_llama33_70b",
91
+ "provider": "groq",
92
+ "model_id": "llama-3.3-70b-versatile",
93
+ "type": "text",
94
+ "base_priority": 1,
95
+ "desc": "Groq LLaMA-3.3 70B — strong quality, 280 t/s",
96
+ },
97
+ {
98
+ "key": "groq_llama4_scout",
99
+ "provider": "groq",
100
+ "model_id": "meta-llama/llama-4-scout-17b-16e-instruct",
101
+ "type": "text",
102
+ "base_priority": 1.5,
103
+ "desc": "Groq Llama-4 Scout 17B — multimodal, 750 t/s (preview)",
104
+ },
105
+ {
106
+ "key": "groq_llama31_8b",
107
+ "provider": "groq",
108
+ "model_id": "llama-3.1-8b-instant",
109
+ "type": "text",
110
+ "base_priority": 2,
111
+ "desc": "Groq LLaMA-3.1 8B — ultra-fast at 560 t/s, great for summaries",
112
+ },
113
+ {
114
+ "key": "groq_qwen3_32b",
115
+ "provider": "groq",
116
+ "model_id": "qwen/qwen3-32b",
117
+ "type": "text",
118
+ "base_priority": 2.5,
119
+ "desc": "Groq Qwen3-32B — strong reasoning, 400 t/s (preview)",
120
+ },
121
+ {
122
+ "key": "groq_gpt_oss_120b",
123
+ "provider": "groq",
124
+ "model_id": "openai/gpt-oss-120b",
125
+ "type": "text",
126
+ "base_priority": 3,
127
+ "desc": "Groq GPT-OSS 120B — flagship 120B model, 500 t/s",
128
+ },
129
+ {
130
+ "key": "groq_gemma2_9b",
131
+ "provider": "groq",
132
+ "model_id": "gemma2-9b-it",
133
+ "type": "text",
134
+ "base_priority": 3.5,
135
+ "desc": "Groq Gemma-2 9B (deprecated fallback)",
136
+ },
137
+ {
138
+ "key": "groq_deepseek_r1_32b",
139
+ "provider": "groq",
140
+ "model_id": "deepseek-r1-distill-qwen-32b",
141
+ "type": "text",
142
+ "base_priority": 3.8,
143
+ "desc": "Groq DeepSeek-R1 Distill Qwen-32B — reasoning (deprecated fallback)",
144
+ },
145
+ {
146
+ "key": "gemini_25_flash",
147
+ "provider": "gemini",
148
+ "model_id": "gemini-2.5-flash",
149
+ "type": "text",
150
+ "base_priority": 4,
151
+ "desc": "Gemini 2.5 Flash — best price-performance, low-latency with reasoning",
152
+ },
153
+ {
154
+ "key": "gemini_25_flash_lite",
155
+ "provider": "gemini",
156
+ "model_id": "gemini-2.5-flash-lite",
157
+ "type": "text",
158
+ "base_priority": 4.5,
159
+ "desc": "Gemini 2.5 Flash-Lite — fastest & most budget-friendly in 2.5 family",
160
+ },
161
+ {
162
+ "key": "gemini_25_pro",
163
+ "provider": "gemini",
164
+ "model_id": "gemini-2.5-pro",
165
+ "type": "text",
166
+ "base_priority": 5,
167
+ "desc": "Gemini 2.5 Pro — most advanced 2.5, deep reasoning & coding",
168
+ },
169
+ {
170
+ "key": "gemini_3_flash",
171
+ "provider": "gemini",
172
+ "model_id": "gemini-3-flash-preview",
173
+ "type": "text",
174
+ "base_priority": 5.5,
175
+ "desc": "Gemini 3 Flash Preview — frontier-class, rivals larger models",
176
+ },
177
+ {
178
+ "key": "gemini_3_pro",
179
+ "provider": "gemini",
180
+ "model_id": "gemini-3-pro-preview",
181
+ "type": "text",
182
+ "base_priority": 6,
183
+ "desc": "Gemini 3 Pro Preview — state-of-the-art reasoning & multimodal",
184
+ },
185
+ {
186
+ "key": "gemini_31_pro",
187
+ "provider": "gemini",
188
+ "model_id": "gemini-3.1-pro-preview",
189
+ "type": "text",
190
+ "base_priority": 6.5,
191
+ "desc": "Gemini 3.1 Pro Preview — most advanced, agentic & vibe coding",
192
+ },
193
+ # ── Deprecated Gemini models (kept as deep fallbacks) ────────────────────
194
+ {
195
+ "key": "gemini_20_flash",
196
+ "provider": "gemini",
197
+ "model_id": "gemini-2.0-flash",
198
+ "type": "text",
199
+ "base_priority": 7,
200
+ "desc": "Gemini 2.0 Flash (deprecated — fallback only)",
201
+ },
202
+ {
203
+ "key": "gemini_20_flash_lite",
204
+ "provider": "gemini",
205
+ "model_id": "gemini-2.0-flash-lite",
206
+ "type": "text",
207
+ "base_priority": 7.5,
208
+ "desc": "Gemini 2.0 Flash-Lite (deprecated — fallback only)",
209
+ },
210
+ {
211
+ "key": "gemini_15_flash",
212
+ "provider": "gemini",
213
+ "model_id": "gemini-1.5-flash",
214
+ "type": "text",
215
+ "base_priority": 8,
216
+ "desc": "Gemini 1.5 Flash (deprecated — fallback only)",
217
+ },
218
+ {
219
+ "key": "openrouter_llama33_70b",
220
+ "provider": "openrouter",
221
+ "model_id": "meta-llama/llama-3.3-70b-instruct:free",
222
+ "type": "text",
223
+ "base_priority": 7,
224
+ "desc": "OpenRouter LLaMA-3.3 70B (free, 128K ctx)",
225
+ },
226
+ {
227
+ "key": "openrouter_deepseek_r1_0528",
228
+ "provider": "openrouter",
229
+ "model_id": "deepseek/deepseek-r1-0528:free",
230
+ "type": "text",
231
+ "base_priority": 7.5,
232
+ "desc": "OpenRouter DeepSeek R1-0528 (free, 163K ctx) — latest reasoning model",
233
+ },
234
+ {
235
+ "key": "openrouter_qwen3_235b",
236
+ "provider": "openrouter",
237
+ "model_id": "qwen/qwen3-235b-a22b-thinking-2507:free",
238
+ "type": "text",
239
+ "base_priority": 8,
240
+ "desc": "OpenRouter Qwen3-235B Thinking (free, 131K ctx) — frontier reasoning",
241
+ },
242
+ {
243
+ "key": "openrouter_hermes3_405b",
244
+ "provider": "openrouter",
245
+ "model_id": "nousresearch/hermes-3-llama-3.1-405b:free",
246
+ "type": "text",
247
+ "base_priority": 8.5,
248
+ "desc": "OpenRouter Hermes-3 LLaMA-3.1 405B (free, 131K ctx)",
249
+ },
250
+ {
251
+ "key": "openrouter_gpt_oss_120b",
252
+ "provider": "openrouter",
253
+ "model_id": "openai/gpt-oss-120b:free",
254
+ "type": "text",
255
+ "base_priority": 9,
256
+ "desc": "OpenRouter GPT-OSS 120B (free, 131K ctx)",
257
+ },
258
+ {
259
+ "key": "openrouter_gpt_oss_20b",
260
+ "provider": "openrouter",
261
+ "model_id": "openai/gpt-oss-20b:free",
262
+ "type": "text",
263
+ "base_priority": 9.5,
264
+ "desc": "OpenRouter GPT-OSS 20B (free, 131K ctx)",
265
+ },
266
+ {
267
+ "key": "openrouter_stepfun_flash",
268
+ "provider": "openrouter",
269
+ "model_id": "stepfun/step-3.5-flash:free",
270
+ "type": "text",
271
+ "base_priority": 10,
272
+ "desc": "OpenRouter StepFun Step-3.5 Flash (free, 256K ctx)",
273
+ },
274
+ {
275
+ "key": "openrouter_nemotron_30b",
276
+ "provider": "openrouter",
277
+ "model_id": "nvidia/nemotron-3-nano-30b-a3b:free",
278
+ "type": "text",
279
+ "base_priority": 10.5,
280
+ "desc": "OpenRouter NVIDIA Nemotron-3 Nano 30B (free, 256K ctx)",
281
+ },
282
+ {
283
+ "key": "openrouter_qwen3_next_80b",
284
+ "provider": "openrouter",
285
+ "model_id": "qwen/qwen3-next-80b-a3b-instruct:free",
286
+ "type": "text",
287
+ "base_priority": 11,
288
+ "desc": "OpenRouter Qwen3-Next 80B (free, 262K ctx)",
289
+ },
290
+ {
291
+ "key": "openrouter_gemma3_27b",
292
+ "provider": "openrouter",
293
+ "model_id": "google/gemma-3-27b-it:free",
294
+ "type": "text",
295
+ "base_priority": 11.5,
296
+ "desc": "OpenRouter Gemma-3 27B (free, 131K ctx) — also vision capable",
297
+ },
298
+ {
299
+ "key": "openrouter_mistral_small31",
300
+ "provider": "openrouter",
301
+ "model_id": "mistralai/mistral-small-3.1-24b-instruct:free",
302
+ "type": "text",
303
+ "base_priority": 12,
304
+ "desc": "OpenRouter Mistral Small 3.1 24B (free, 128K ctx) — also vision capable",
305
+ },
306
+ {
307
+ "key": "openrouter_glm45_air",
308
+ "provider": "openrouter",
309
+ "model_id": "z-ai/glm-4.5-air:free",
310
+ "type": "text",
311
+ "base_priority": 12.5,
312
+ "desc": "OpenRouter GLM-4.5 Air (free, 131K ctx)",
313
+ },
314
+ {
315
+ "key": "openrouter_dolphin_venice",
316
+ "provider": "openrouter",
317
+ "model_id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
318
+ "type": "text",
319
+ "base_priority": 13,
320
+ "desc": "OpenRouter Dolphin Mistral 24B Venice Edition (free, 32K ctx)",
321
+ },
322
+ {
323
+ "key": "local_qwen3",
324
+ "provider": "ollama",
325
+ "model_id": "qwen3-vl:4b",
326
+ "type": "text",
327
+ "base_priority": 100,
328
+ "desc": "Local Ollama Qwen3-VL 4B — LAST RESORT (requires Ollama running)",
329
+ },
330
+
331
+ # ── VISION ───────────────────────────────────────────────────────────────
332
+ {
333
+ "key": "gemini_25_flash_vision",
334
+ "provider": "gemini",
335
+ "model_id": "gemini-2.5-flash",
336
+ "type": "vision",
337
+ "base_priority": 1,
338
+ "desc": "Gemini 2.5 Flash Vision — best price-performance multimodal",
339
+ },
340
+ {
341
+ "key": "gemini_25_flash_lite_vision",
342
+ "provider": "gemini",
343
+ "model_id": "gemini-2.5-flash-lite",
344
+ "type": "vision",
345
+ "base_priority": 1.5,
346
+ "desc": "Gemini 2.5 Flash-Lite Vision — fastest multimodal",
347
+ },
348
+ {
349
+ "key": "gemini_25_pro_vision",
350
+ "provider": "gemini",
351
+ "model_id": "gemini-2.5-pro",
352
+ "type": "vision",
353
+ "base_priority": 2,
354
+ "desc": "Gemini 2.5 Pro Vision — advanced multimodal understanding",
355
+ },
356
+ {
357
+ "key": "gemini_3_flash_vision",
358
+ "provider": "gemini",
359
+ "model_id": "gemini-3-flash-preview",
360
+ "type": "vision",
361
+ "base_priority": 2.5,
362
+ "desc": "Gemini 3 Flash Preview Vision — frontier-class multimodal",
363
+ },
364
+ {
365
+ "key": "gemini_3_pro_vision",
366
+ "provider": "gemini",
367
+ "model_id": "gemini-3-pro-preview",
368
+ "type": "vision",
369
+ "base_priority": 3,
370
+ "desc": "Gemini 3 Pro Preview Vision — state-of-the-art multimodal reasoning",
371
+ },
372
+ {
373
+ "key": "gemini_31_pro_vision",
374
+ "provider": "gemini",
375
+ "model_id": "gemini-3.1-pro-preview",
376
+ "type": "vision",
377
+ "base_priority": 3.5,
378
+ "desc": "Gemini 3.1 Pro Preview Vision — most advanced multimodal",
379
+ },
380
+ # ── Deprecated Gemini vision models (kept as deep fallbacks) ─────────────
381
+ {
382
+ "key": "gemini_20_flash_vision",
383
+ "provider": "gemini",
384
+ "model_id": "gemini-2.0-flash",
385
+ "type": "vision",
386
+ "base_priority": 4,
387
+ "desc": "Gemini 2.0 Flash Vision (deprecated — fallback only)",
388
+ },
389
+ {
390
+ "key": "gemini_20_flash_lite_vision",
391
+ "provider": "gemini",
392
+ "model_id": "gemini-2.0-flash-lite",
393
+ "type": "vision",
394
+ "base_priority": 4.5,
395
+ "desc": "Gemini 2.0 Flash-Lite Vision (deprecated — fallback only)",
396
+ },
397
+ {
398
+ "key": "gemini_15_flash_vision",
399
+ "provider": "gemini",
400
+ "model_id": "gemini-1.5-flash",
401
+ "type": "vision",
402
+ "base_priority": 4.8,
403
+ "desc": "Gemini 1.5 Flash Vision (deprecated — fallback only)",
404
+ },
405
+ {
406
+ "key": "groq_llama4_scout_vision",
407
+ "provider": "groq",
408
+ "model_id": "meta-llama/llama-4-scout-17b-16e-instruct",
409
+ "type": "vision",
410
+ "base_priority": 5,
411
+ "desc": "Groq Llama-4 Scout 17B Vision — multimodal, 750 t/s (preview)",
412
+ },
413
+ {
414
+ "key": "groq_vision_11b",
415
+ "provider": "groq",
416
+ "model_id": "llama-3.2-11b-vision-preview",
417
+ "type": "vision",
418
+ "base_priority": 5.5,
419
+ "desc": "Groq LLaMA-3.2 11B Vision (deprecated fallback)",
420
+ },
421
+ {
422
+ "key": "groq_vision_90b",
423
+ "provider": "groq",
424
+ "model_id": "llama-3.2-90b-vision-preview",
425
+ "type": "vision",
426
+ "base_priority": 6,
427
+ "desc": "Groq LLaMA-3.2 90B Vision — highest quality Groq vision (deprecated fallback)",
428
+ },
429
+ {
430
+ "key": "openrouter_qwen3_vl_235b",
431
+ "provider": "openrouter",
432
+ "model_id": "qwen/qwen3-vl-235b-a22b-thinking:free",
433
+ "type": "vision",
434
+ "base_priority": 7,
435
+ "desc": "OpenRouter Qwen3-VL 235B Vision (free, 131K ctx) — flagship vision model",
436
+ },
437
+ {
438
+ "key": "openrouter_qwen3_vl_30b",
439
+ "provider": "openrouter",
440
+ "model_id": "qwen/qwen3-vl-30b-a3b-thinking:free",
441
+ "type": "vision",
442
+ "base_priority": 7.5,
443
+ "desc": "OpenRouter Qwen3-VL 30B Vision (free, 131K ctx)",
444
+ },
445
+ {
446
+ "key": "openrouter_nvidia_vl",
447
+ "provider": "openrouter",
448
+ "model_id": "nvidia/nemotron-nano-12b-v2-vl:free",
449
+ "type": "vision",
450
+ "base_priority": 8,
451
+ "desc": "OpenRouter NVIDIA Nemotron-Nano 12B VL (free, 128K ctx) — supports video",
452
+ },
453
+ {
454
+ "key": "openrouter_gemma3_vision",
455
+ "provider": "openrouter",
456
+ "model_id": "google/gemma-3-27b-it:free",
457
+ "type": "vision",
458
+ "base_priority": 8.5,
459
+ "desc": "OpenRouter Gemma-3 27B Vision (free, 131K ctx)",
460
+ },
461
+ {
462
+ "key": "openrouter_mistral_vision",
463
+ "provider": "openrouter",
464
+ "model_id": "mistralai/mistral-small-3.1-24b-instruct:free",
465
+ "type": "vision",
466
+ "base_priority": 9,
467
+ "desc": "OpenRouter Mistral Small 3.1 24B Vision (free, 128K ctx)",
468
+ },
469
+ {
470
+ "key": "local_qwen3_vl",
471
+ "provider": "ollama",
472
+ "model_id": "qwen3-vl:4b",
473
+ "type": "vision",
474
+ "base_priority": 100,
475
+ "desc": "Local Ollama Qwen3-VL 4B — LAST RESORT (requires Ollama running)",
476
+ },
477
+ ]
478
+
479
+ MODELS_BY_KEY: Dict[str, Dict] = {m["key"]: m for m in MODELS}
480
+
481
+
482
+ def _has_image_input(m: Dict) -> bool:
483
+ """Return True if an OpenRouter model object supports image (vision) input."""
484
+ arch = m.get("architecture", {})
485
+ # OpenRouter returns input_modalities as a list, e.g. ["text", "image"]
486
+ mods: Any = arch.get("input_modalities") or arch.get("modality") or ""
487
+ if isinstance(mods, list):
488
+ return "image" in mods
489
+ return "image" in str(mods)
490
+
491
+
492
+ # ─────────────────────────────────────────────────────────────────────────────
493
+ # MODEL ROUTER
494
+ # ─────────────────────────────────────────────────────────────────────────────
495
+
496
+ class ModelRouter:
497
+ """
498
+ Routes AI requests to the best available model.
499
+ - Tries models in order of effective priority (base + response-time penalty)
500
+ - Marks failed models as 'down' for MODEL_DOWN_COOLDOWN_S seconds
501
+ - Updates EMA response-time estimates after each successful call
502
+ - Saves state to model_rankings.json so rankings persist across restarts
503
+ """
504
+
505
+ def __init__(self):
506
+ self._lock = threading.Lock()
507
+ self._api_keys: Dict[str, str] = {}
508
+ self._state: Dict[str, Dict] = {}
509
+ # Dynamically discovered free OpenRouter models (FreeRide approach)
510
+ self._dynamic_models: Dict[str, Dict] = {}
511
+ self._dynamic_models_lock = threading.Lock()
512
+ self._load_api_keys()
513
+ self._load_state()
514
+ self._print_startup_status()
515
+ # Background: discover & rank free OpenRouter models, auto-refreshes every OPENROUTER_FREE_CACHE_HOURS
516
+ threading.Thread(target=self._auto_refresh_loop, daemon=True).start()
517
+
518
+ # ── Configuration ─────────────────────────────────────────────────────────
519
+
520
+ def _load_api_keys(self):
521
+ """Load API keys from environment and .api_keys file."""
522
+ for k in ("GROQ_API_KEY", "GEMINI_API_KEY", "OPENROUTER_API_KEY"):
523
+ v = os.environ.get(k)
524
+ if v:
525
+ self._api_keys[k] = v
526
+
527
+ if API_KEYS_FILE.exists():
528
+ with open(API_KEYS_FILE, "r") as f:
529
+ for line in f:
530
+ line = line.strip()
531
+ if line and not line.startswith("#") and "=" in line:
532
+ k, v = line.split("=", 1)
533
+ self._api_keys[k.strip()] = v.strip()
534
+
535
+ def _key(self, name: str) -> Optional[str]:
536
+ return self._api_keys.get(name) or None
537
+
538
+ def reload_api_keys(self):
539
+ """Reload API keys from environment and .api_keys file."""
540
+ self._api_keys.clear()
541
+ self._load_api_keys()
542
+ logger.info("🔑 API keys reloaded")
543
+
544
+ def get_available_providers(self) -> Dict[str, bool]:
545
+ """Return dict of provider -> has_key."""
546
+ return {
547
+ "groq": bool(self._key("GROQ_API_KEY")),
548
+ "gemini": bool(self._key("GEMINI_API_KEY")),
549
+ "openrouter": bool(self._key("OPENROUTER_API_KEY")),
550
+ "ollama": True, # Always available
551
+ }
552
+
553
+ def set_api_key(self, provider: str, api_key: str) -> bool:
554
+ """Set an API key for a provider and persist to file."""
555
+ key_name = f"{provider.upper()}_API_KEY"
556
+ valid_providers = ["GROQ_API_KEY", "GEMINI_API_KEY", "OPENROUTER_API_KEY"]
557
+
558
+ if key_name not in valid_providers:
559
+ return False
560
+
561
+ # Update in-memory
562
+ self._api_keys[key_name] = api_key
563
+
564
+ # Persist to file
565
+ self._persist_api_key(key_name, api_key)
566
+ return True
567
+
568
+ def delete_api_key(self, provider: str) -> bool:
569
+ """Delete an API key for a provider."""
570
+ key_name = f"{provider.upper()}_API_KEY"
571
+ valid_providers = ["GROQ_API_KEY", "GEMINI_API_KEY", "OPENROUTER_API_KEY"]
572
+
573
+ if key_name not in valid_providers:
574
+ return False
575
+
576
+ # Remove from memory
577
+ self._api_keys.pop(key_name, None)
578
+
579
+ # Remove from file
580
+ self._remove_api_key(key_name)
581
+ return True
582
+
583
+ def _persist_api_key(self, key_name: str, api_key: str):
584
+ """Persist an API key to the .api_keys file."""
585
+ lines = []
586
+ updated = False
587
+
588
+ if API_KEYS_FILE.exists():
589
+ with open(API_KEYS_FILE, "r") as f:
590
+ for line in f:
591
+ if line.strip().startswith(f"{key_name}="):
592
+ lines.append(f"{key_name}={api_key}\n")
593
+ updated = True
594
+ else:
595
+ lines.append(line)
596
+
597
+ if not updated:
598
+ lines.append(f"{key_name}={api_key}\n")
599
+
600
+ with open(API_KEYS_FILE, "w") as f:
601
+ f.writelines(lines)
602
+
603
+ def _remove_api_key(self, key_name: str):
604
+ """Remove an API key from the .api_keys file."""
605
+ if not API_KEYS_FILE.exists():
606
+ return
607
+
608
+ lines = []
609
+ with open(API_KEYS_FILE, "r") as f:
610
+ for line in f:
611
+ if not line.strip().startswith(f"{key_name}="):
612
+ lines.append(line)
613
+
614
+ with open(API_KEYS_FILE, "w") as f:
615
+ f.writelines(lines)
616
+
617
+ # ── Dynamic OpenRouter free-model discovery (FreeRide approach) ────────────
618
+
619
+ def _default_model_state_dynamic(self, key: str, base_priority: float = 50) -> Dict:
620
+ """Create a default state dict for any model key (static or dynamic)."""
621
+ m = MODELS_BY_KEY.get(key) or self._dynamic_models.get(key) or {}
622
+ bp = m.get("base_priority", base_priority)
623
+ return {
624
+ "key": key,
625
+ "avg_response_s": None,
626
+ "success_count": 0,
627
+ "fail_count": 0,
628
+ "down_until": None,
629
+ "last_used": None,
630
+ "last_error": None,
631
+ "base_priority": bp,
632
+ }
633
+
634
+ def _score_openrouter_model(self, m: Dict) -> float:
635
+ """
636
+ Score an OpenRouter model 0-1 for quality ranking.
637
+ Factors: context length (40%), capabilities (30%), recency (20%), provider trust (10%).
638
+ Based on FreeRide's ranking algorithm.
639
+ """
640
+ score = 0.0
641
+ # Context length: normalise to 1M tokens
642
+ ctx = m.get("context_length", 0)
643
+ score += min(ctx / 1_000_000, 1.0) * RANKING_WEIGHTS["context_length"]
644
+ # Capabilities: normalise to 10 supported parameters
645
+ caps = m.get("supported_parameters", [])
646
+ score += min(len(caps) / 10, 1.0) * RANKING_WEIGHTS["capabilities"]
647
+ # Recency: newer = better (scores 1 at launch, decays to 0 over 1 year)
648
+ created = m.get("created", 0)
649
+ if created:
650
+ days_old = (time.time() - created) / 86400
651
+ score += max(0.0, 1.0 - days_old / 365) * RANKING_WEIGHTS["recency"]
652
+ # Provider trust
653
+ model_id = m.get("id", "")
654
+ provider = model_id.split("/")[0] if "/" in model_id else ""
655
+ if provider in TRUSTED_PROVIDERS:
656
+ trust_idx = TRUSTED_PROVIDERS.index(provider)
657
+ score += (1 - trust_idx / len(TRUSTED_PROVIDERS)) * RANKING_WEIGHTS["provider_trust"]
658
+ return score
659
+
660
+ def _auto_refresh_loop(self):
661
+ """Run _refresh_openrouter_models once immediately, then repeat every OPENROUTER_FREE_CACHE_HOURS."""
662
+ while True:
663
+ try:
664
+ self._refresh_openrouter_models()
665
+ except Exception as e:
666
+ print(f"⚠️ OpenRouter auto-refresh error: {e}")
667
+ time.sleep(OPENROUTER_FREE_CACHE_HOURS * 3600)
668
+
669
+ def _refresh_openrouter_models(self):
670
+ """
671
+ Fetch free models from OpenRouter API, score & rank them, cache to disk,
672
+ and inject the top models into self._dynamic_models for routing.
673
+ Called by _auto_refresh_loop every OPENROUTER_FREE_CACHE_HOURS; safe to call manually.
674
+ Based on FreeRide's fetch_all_models + filter_free_models + rank_free_models.
675
+ """
676
+ api_key = self._key("OPENROUTER_API_KEY")
677
+ if not api_key:
678
+ return # No key → skip
679
+
680
+ # Check cache freshness
681
+ try:
682
+ if OPENROUTER_FREE_CACHE_FILE.exists():
683
+ cache = json.loads(OPENROUTER_FREE_CACHE_FILE.read_text())
684
+ cached_at = datetime.fromisoformat(cache.get("cached_at", "2000-01-01"))
685
+ if (datetime.utcnow() - cached_at).total_seconds() < OPENROUTER_FREE_CACHE_HOURS * 3600:
686
+ models = cache.get("models", [])
687
+ self._inject_dynamic_models(models)
688
+ vision_count = sum(1 for m in models if _has_image_input(m))
689
+ next_refresh_m = int(
690
+ (OPENROUTER_FREE_CACHE_HOURS * 3600
691
+ - (datetime.utcnow() - cached_at).total_seconds()) / 60
692
+ )
693
+ print(f"🔄 OpenRouter free models: loaded {len(models)} from cache "
694
+ f"({vision_count} vision-capable) — next refresh in ~{next_refresh_m}m")
695
+ return
696
+ except Exception:
697
+ pass
698
+
699
+ # Fetch from API
700
+ try:
701
+ import requests as _req
702
+ resp = _req.get(
703
+ OPENROUTER_API_MODELS_URL,
704
+ headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
705
+ timeout=30,
706
+ )
707
+ resp.raise_for_status()
708
+ all_models = resp.json().get("data", [])
709
+ except Exception as e:
710
+ print(f"⚠️ OpenRouter model discovery failed: {e}")
711
+ return
712
+
713
+ # Filter for free models (pricing.prompt == 0 or :free suffix)
714
+ free_models = []
715
+ for m in all_models:
716
+ mid = m.get("id", "")
717
+ pricing = m.get("pricing", {})
718
+ try:
719
+ if float(pricing.get("prompt", 1)) == 0:
720
+ free_models.append(m)
721
+ continue
722
+ except (TypeError, ValueError):
723
+ pass
724
+ if ":free" in mid and m not in free_models:
725
+ free_models.append(m)
726
+
727
+ # Score and rank
728
+ scored = sorted(free_models, key=self._score_openrouter_model, reverse=True)
729
+ # Take top 30 across all types (text + vision-capable)
730
+ top = scored[:30]
731
+
732
+ # Persist cache
733
+ try:
734
+ OPENROUTER_FREE_CACHE_FILE.write_text(json.dumps({
735
+ "cached_at": datetime.utcnow().isoformat(),
736
+ "models": top,
737
+ }, indent=2))
738
+ except Exception:
739
+ pass
740
+
741
+ self._inject_dynamic_models(top)
742
+ vision_count = sum(
743
+ 1 for m in top
744
+ if _has_image_input(m)
745
+ )
746
+ print(f"🔄 OpenRouter free models: discovered & ranked {len(top)} models ({vision_count} vision-capable) — next refresh in {OPENROUTER_FREE_CACHE_HOURS}h")
747
+
748
+ def _inject_dynamic_models(self, raw_models: List[Dict]):
749
+ """
750
+ Convert raw OpenRouter API model objects into routing entries and
751
+ add them to self._dynamic_models with priorities starting at 20
752
+ (after all hardcoded models, so they serve as additional fallbacks).
753
+ Models already in the static MODELS_BY_KEY are skipped.
754
+ Vision-capable models get an additional entry with type='vision'.
755
+ """
756
+ static_model_ids = {mm["model_id"] for mm in MODELS_BY_KEY.values()}
757
+ with self._dynamic_models_lock:
758
+ self._dynamic_models.clear()
759
+ for i, m in enumerate(raw_models):
760
+ mid = m.get("id", "")
761
+ if not mid:
762
+ continue
763
+ safe_id = mid.replace("/", "_").replace(":", "_").replace(".", "_")
764
+ model_id_free = mid if ":free" in mid else f"{mid}:free"
765
+ score = self._score_openrouter_model(m)
766
+ is_vision = _has_image_input(m)
767
+ base_p = 20 + i
768
+
769
+ # Text entry — skip if already a static entry
770
+ if mid not in static_model_ids:
771
+ key = f"dyn_{safe_id}"
772
+ entry = {
773
+ "key": key,
774
+ "provider": "openrouter",
775
+ "model_id": model_id_free,
776
+ "type": "text",
777
+ "base_priority": base_p,
778
+ "desc": f"[Dynamic] {mid} — score={score:.3f}",
779
+ }
780
+ self._dynamic_models[key] = entry
781
+ if key not in self._state:
782
+ self._state[key] = self._default_model_state_dynamic(key, base_p)
783
+
784
+ # Vision entry — inject if vision-capable and not already in static vision models
785
+ if is_vision:
786
+ static_vision_ids = {
787
+ mm["model_id"] for mm in MODELS_BY_KEY.values() if mm["type"] == "vision"
788
+ }
789
+ if mid not in static_vision_ids:
790
+ vkey = f"dyn_v_{safe_id}"
791
+ ventry = {
792
+ "key": vkey,
793
+ "provider": "openrouter",
794
+ "model_id": model_id_free,
795
+ "type": "vision",
796
+ "base_priority": base_p,
797
+ "desc": f"[Dynamic-Vision] {mid} — score={score:.3f}",
798
+ }
799
+ self._dynamic_models[vkey] = ventry
800
+ if vkey not in self._state:
801
+ self._state[vkey] = self._default_model_state_dynamic(vkey, base_p)
802
+
803
+ # ── State persistence ──────────────────────────────────────────────────────
804
+
805
+ def _default_model_state(self, model_key: str) -> Dict:
806
+ """Backward-compat wrapper — delegates to _default_model_state_dynamic."""
807
+ return self._default_model_state_dynamic(model_key)
808
+
809
+ def _load_state(self):
810
+ if RANKINGS_FILE.exists():
811
+ try:
812
+ with open(RANKINGS_FILE, "r") as f:
813
+ saved = json.load(f)
814
+ for key in MODELS_BY_KEY:
815
+ self._state[key] = saved.get(key, self._default_model_state(key))
816
+ return
817
+ except Exception:
818
+ pass
819
+ for key in MODELS_BY_KEY:
820
+ self._state[key] = self._default_model_state(key)
821
+
822
+ def _save_state(self):
823
+ try:
824
+ with open(RANKINGS_FILE, "w") as f:
825
+ json.dump(self._state, f, indent=2, default=str)
826
+ except Exception:
827
+ pass
828
+
829
+ # ── Availability & ranking ─────────────────────────────────────────────────
830
+
831
+ def _is_available(self, model_key: str) -> bool:
832
+ """True if the model has an API key (or is local) and is not in cooldown."""
833
+ # Look up in static registry first, then dynamic
834
+ m = MODELS_BY_KEY.get(model_key) or self._dynamic_models.get(model_key)
835
+ if m is None:
836
+ return False
837
+ prov = m["provider"]
838
+
839
+ if prov == "groq" and not self._key("GROQ_API_KEY"):
840
+ return False
841
+ if prov == "gemini" and not self._key("GEMINI_API_KEY"):
842
+ return False
843
+ if prov == "openrouter" and not self._key("OPENROUTER_API_KEY"):
844
+ return False
845
+
846
+ s = self._state.get(model_key, self._default_model_state_dynamic(model_key))
847
+ if s.get("down_until"):
848
+ try:
849
+ if datetime.utcnow() < datetime.fromisoformat(s["down_until"]):
850
+ return False
851
+ except Exception:
852
+ pass
853
+ return True
854
+
855
+ def _effective_priority(self, model_key: str) -> float:
856
+ """
857
+ Compute sort key (lower = better).
858
+ = base_priority + response_time_penalty (0–10)
859
+ Unmeasured models keep their exact base_priority.
860
+ """
861
+ m = MODELS_BY_KEY.get(model_key) or self._dynamic_models.get(model_key) or {}
862
+ s = self._state.get(model_key) or {}
863
+ base = s.get("base_priority", m.get("base_priority", 50))
864
+ avg_t = s.get("avg_response_s")
865
+ if avg_t is None:
866
+ return float(base)
867
+ # Each extra 3 s above a 2 s baseline adds 1 point of penalty (max 10)
868
+ penalty = min(10.0, max(0.0, (avg_t - 2.0) / 3.0))
869
+ return float(base) + penalty
870
+
871
+ def _ranked_models(self, task_type: str) -> List[str]:
872
+ """Return available model keys for task_type, best first (static + dynamic)."""
873
+ static_candidates = [
874
+ k for k, m in MODELS_BY_KEY.items()
875
+ if m["type"] == task_type and self._is_available(k)
876
+ ]
877
+ # Add dynamic OpenRouter models (text only; no vision discovery yet)
878
+ dynamic_candidates = []
879
+ if task_type == "text":
880
+ with self._dynamic_models_lock:
881
+ dynamic_candidates = [
882
+ k for k, m in self._dynamic_models.items()
883
+ if m["type"] == task_type and self._is_available(k)
884
+ and k not in MODELS_BY_KEY # don't double-count
885
+ ]
886
+ all_candidates = static_candidates + dynamic_candidates
887
+ return sorted(all_candidates, key=self._effective_priority)
888
+
889
+ # ── State recording ────────────────────────────────────────────────────────
890
+
891
+ def _record_success(self, model_key: str, elapsed: float):
892
+ # Ensure state exists (dynamic models may not be pre-seeded)
893
+ if model_key not in self._state:
894
+ self._state[model_key] = self._default_model_state_dynamic(model_key)
895
+ with self._lock:
896
+ s = self._state[model_key]
897
+ prev = s["avg_response_s"]
898
+ s["avg_response_s"] = (
899
+ elapsed if prev is None
900
+ else EMA_ALPHA * elapsed + (1 - EMA_ALPHA) * prev
901
+ )
902
+ s["success_count"] = s.get("success_count", 0) + 1
903
+ s["down_until"] = None
904
+ s["last_used"] = datetime.utcnow().isoformat()
905
+ s["last_error"] = None
906
+ self._save_state()
907
+
908
+ def _record_failure(self, model_key: str, error: str, status_code: int = 0):
909
+ """
910
+ Mark a model as down after failure.
911
+ HTTP 429 rate-limit uses a longer cooldown (30 min, FreeRide approach).
912
+ Other errors use the short cooldown (5 min).
913
+ """
914
+ is_rate_limit = (status_code == 429) or ("429" in str(error)) or ("rate" in str(error).lower() and "limit" in str(error).lower())
915
+ cooldown = MODEL_RATE_LIMIT_COOLDOWN_S if is_rate_limit else MODEL_DOWN_COOLDOWN_S
916
+ reason = "rate-limited" if is_rate_limit else "error"
917
+
918
+ # Ensure state exists for dynamically discovered models too
919
+ if model_key not in self._state:
920
+ self._state[model_key] = self._default_model_state_dynamic(model_key)
921
+
922
+ with self._lock:
923
+ s = self._state[model_key]
924
+ s["fail_count"] = s.get("fail_count", 0) + 1
925
+ s["last_error"] = str(error)[:200]
926
+ s["down_until"] = (
927
+ datetime.utcnow() + timedelta(seconds=cooldown)
928
+ ).isoformat()
929
+ self._save_state()
930
+ print(
931
+ f" ⚠️ [{model_key}] {reason} — DOWN for {cooldown}s — {str(error)[:80]}"
932
+ )
933
+
934
+ # ── Provider implementations ───────────────────────────────────────────────
935
+
936
+ def _groq_text(self, model_id: str, prompt: str) -> str:
937
+ from groq import Groq
938
+ client = Groq(api_key=self._key("GROQ_API_KEY"))
939
+ r = client.chat.completions.create(
940
+ model=model_id,
941
+ messages=[{"role": "user", "content": prompt}],
942
+ max_tokens=800,
943
+ temperature=0.7,
944
+ )
945
+ return r.choices[0].message.content.strip()
946
+
947
+ def _groq_vision(self, model_id: str, prompt: str, images_b64: List[str]) -> str:
948
+ from groq import Groq
949
+ client = Groq(api_key=self._key("GROQ_API_KEY"))
950
+ # Groq vision: send up to 1 image (11b model limit)
951
+ content: List[Dict] = []
952
+ for b64 in images_b64[:1]: # Groq supports 1 image per request
953
+ content.append({
954
+ "type": "image_url",
955
+ "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
956
+ })
957
+ content.append({"type": "text", "text": prompt})
958
+ r = client.chat.completions.create(
959
+ model=model_id,
960
+ messages=[{"role": "user", "content": content}],
961
+ max_tokens=800,
962
+ temperature=0.7,
963
+ )
964
+ return r.choices[0].message.content.strip()
965
+
966
+ def _gemini_text(self, model_id: str, prompt: str) -> str:
967
+ import google.generativeai as genai
968
+ genai.configure(api_key=self._key("GEMINI_API_KEY"))
969
+ model = genai.GenerativeModel(model_id)
970
+ r = model.generate_content(
971
+ prompt,
972
+ generation_config={"max_output_tokens": 800, "temperature": 0.7},
973
+ )
974
+ return r.text.strip()
975
+
976
+ def _gemini_vision(self, model_id: str, prompt: str, images_b64: List[str]) -> str:
977
+ import google.generativeai as genai
978
+ genai.configure(api_key=self._key("GEMINI_API_KEY"))
979
+ model = genai.GenerativeModel(model_id)
980
+ parts: List[Any] = []
981
+ for b64 in images_b64:
982
+ parts.append({"inline_data": {"mime_type": "image/jpeg", "data": b64}})
983
+ parts.append(prompt)
984
+ r = model.generate_content(
985
+ parts,
986
+ generation_config={"max_output_tokens": 800, "temperature": 0.7},
987
+ )
988
+ return r.text.strip()
989
+
990
+ def _openrouter_text(self, model_id: str, prompt: str) -> str:
991
+ import requests
992
+ resp = requests.post(
993
+ "https://openrouter.ai/api/v1/chat/completions",
994
+ headers={
995
+ "Authorization": f"Bearer {self._key('OPENROUTER_API_KEY')}",
996
+ "HTTP-Referer": "https://github.com/superbrain",
997
+ "X-Title": "SuperBrain",
998
+ "Content-Type": "application/json",
999
+ },
1000
+ json={
1001
+ "model": model_id,
1002
+ "messages": [{"role": "user", "content": prompt}],
1003
+ "max_tokens": 800,
1004
+ "temperature": 0.7,
1005
+ "route": "fallback", # Nexlify uptime-optimizer: auto-fallback on provider failures
1006
+ },
1007
+ timeout=60,
1008
+ )
1009
+ if resp.status_code == 429:
1010
+ raise Exception(f"429 rate limit: {resp.text[:200]}")
1011
+ resp.raise_for_status()
1012
+ return resp.json()["choices"][0]["message"]["content"].strip()
1013
+
1014
+ def _openrouter_vision(self, model_id: str, prompt: str, images_b64: List[str]) -> str:
1015
+ import requests
1016
+ content: List[Dict] = []
1017
+ for b64 in images_b64:
1018
+ content.append({
1019
+ "type": "image_url",
1020
+ "image_url": {"url": f"data:image/jpeg;base64,{b64}"},
1021
+ })
1022
+ content.append({"type": "text", "text": prompt})
1023
+ resp = requests.post(
1024
+ "https://openrouter.ai/api/v1/chat/completions",
1025
+ headers={
1026
+ "Authorization": f"Bearer {self._key('OPENROUTER_API_KEY')}",
1027
+ "HTTP-Referer": "https://github.com/superbrain",
1028
+ "X-Title": "SuperBrain",
1029
+ "Content-Type": "application/json",
1030
+ },
1031
+ json={
1032
+ "model": model_id,
1033
+ "messages": [{"role": "user", "content": content}],
1034
+ "max_tokens": 800,
1035
+ "temperature": 0.7,
1036
+ "route": "fallback", # Nexlify uptime-optimizer: auto-fallback on provider failures
1037
+ },
1038
+ timeout=90,
1039
+ )
1040
+ if resp.status_code == 429:
1041
+ raise Exception(f"429 rate limit: {resp.text[:200]}")
1042
+ resp.raise_for_status()
1043
+ return resp.json()["choices"][0]["message"]["content"].strip()
1044
+
1045
+ def _ollama_text(self, model_id: str, prompt: str) -> str:
1046
+ import ollama
1047
+ r = ollama.generate(
1048
+ model=model_id,
1049
+ prompt=prompt,
1050
+ options={"temperature": 0.7, "num_predict": 800},
1051
+ )
1052
+ return r.get("response", "").strip()
1053
+
1054
+ def _ollama_vision(self, model_id: str, prompt: str, images_b64: List[str]) -> str:
1055
+ import ollama
1056
+ r = ollama.generate(
1057
+ model=model_id,
1058
+ prompt=prompt,
1059
+ images=images_b64,
1060
+ options={"temperature": 0.7, "num_predict": 800},
1061
+ )
1062
+ return r.get("response", "").strip()
1063
+
1064
+ # ── Public API ─────────────────────────────────────────────────────────────
1065
+
1066
+ def generate_text(self, prompt: str) -> str:
1067
+ """
1068
+ Generate text using the best available model.
1069
+ Falls back through the ranked list until one succeeds.
1070
+ Raises RuntimeError if all fail.
1071
+ """
1072
+ ranked = self._ranked_models("text")
1073
+ if not ranked:
1074
+ raise RuntimeError(
1075
+ "No text models available. Add API keys to backend/config/.api_keys"
1076
+ )
1077
+
1078
+ for key in ranked:
1079
+ m = MODELS_BY_KEY.get(key) or self._dynamic_models.get(key)
1080
+ if not m:
1081
+ continue
1082
+ print(f" 🤖 [{m['provider'].upper()}] {m['model_id']} ...", flush=True)
1083
+ t0 = time.time()
1084
+ try:
1085
+ prov = m["provider"]
1086
+ if prov == "groq":
1087
+ result = self._groq_text(m["model_id"], prompt)
1088
+ elif prov == "gemini":
1089
+ result = self._gemini_text(m["model_id"], prompt)
1090
+ elif prov == "openrouter":
1091
+ result = self._openrouter_text(m["model_id"], prompt)
1092
+ elif prov == "ollama":
1093
+ result = self._ollama_text(m["model_id"], prompt)
1094
+ else:
1095
+ continue
1096
+
1097
+ elapsed = time.time() - t0
1098
+ self._record_success(key, elapsed)
1099
+ print(f" ✓ {elapsed:.1f}s", flush=True)
1100
+ return result
1101
+
1102
+ except Exception as e:
1103
+ status = 429 if "429" in str(e) else 0
1104
+ self._record_failure(key, str(e), status_code=status)
1105
+ print(f" ✗ Failed ({type(e).__name__}), trying next …", flush=True)
1106
+
1107
+ raise RuntimeError("All text models failed.")
1108
+
1109
+ def analyze_images(self, prompt: str, images_b64: List[str]) -> str:
1110
+ """
1111
+ Analyze one or more images using the best available vision model.
1112
+ images_b64: list of base64-encoded JPEG strings.
1113
+ Falls back through the ranked list until one succeeds.
1114
+ Raises RuntimeError if all fail.
1115
+ """
1116
+ ranked = self._ranked_models("vision")
1117
+ if not ranked:
1118
+ raise RuntimeError(
1119
+ "No vision models available. Add API keys to backend/config/.api_keys"
1120
+ )
1121
+
1122
+ for key in ranked:
1123
+ m = MODELS_BY_KEY.get(key) or self._dynamic_models.get(key)
1124
+ if not m:
1125
+ continue
1126
+ print(f" 🔭 [{m['provider'].upper()}] {m['model_id']} ...", flush=True)
1127
+ t0 = time.time()
1128
+ try:
1129
+ prov = m["provider"]
1130
+ if prov == "groq":
1131
+ result = self._groq_vision(m["model_id"], prompt, images_b64)
1132
+ elif prov == "gemini":
1133
+ result = self._gemini_vision(m["model_id"], prompt, images_b64)
1134
+ elif prov == "openrouter":
1135
+ result = self._openrouter_vision(m["model_id"], prompt, images_b64)
1136
+ elif prov == "ollama":
1137
+ result = self._ollama_vision(m["model_id"], prompt, images_b64)
1138
+ else:
1139
+ continue
1140
+
1141
+ elapsed = time.time() - t0
1142
+ self._record_success(key, elapsed)
1143
+ print(f" ✓ {elapsed:.1f}s", flush=True)
1144
+ return result
1145
+
1146
+ except Exception as e:
1147
+ status = 429 if "429" in str(e) else 0
1148
+ self._record_failure(key, str(e), status_code=status)
1149
+ print(f" ✗ Failed ({type(e).__name__}), trying next …", flush=True)
1150
+
1151
+ raise RuntimeError("All vision models failed.")
1152
+
1153
+ # ── Utilities ──────────────────────────────────────────────────────────────
1154
+
1155
+ def print_rankings(self):
1156
+ """Print a full ranked table of all models with performance stats (static + dynamic)."""
1157
+ print("\n" + "=" * 80)
1158
+ print("🏆 AI MODEL RANKINGS (auto-sorted by speed + reliability)")
1159
+ print("=" * 80)
1160
+ for task_type in ("text", "vision"):
1161
+ print(f"\n{'─' * 80}")
1162
+ print(f" {task_type.upper()} MODELS (rank 1 = currently preferred)")
1163
+ print(f"{'─' * 80}")
1164
+ static_keys = [k for k, m in MODELS_BY_KEY.items() if m["type"] == task_type]
1165
+ dyn_keys = [k for k, m in self._dynamic_models.items() if m["type"] == task_type]
1166
+ all_keys = static_keys + [k for k in dyn_keys if k not in MODELS_BY_KEY]
1167
+ sorted_keys = sorted(all_keys, key=self._effective_priority)
1168
+ for i, key in enumerate(sorted_keys, 1):
1169
+ m = MODELS_BY_KEY.get(key) or self._dynamic_models.get(key)
1170
+ if not m:
1171
+ continue
1172
+ s = self._state.get(key, self._default_model_state_dynamic(key))
1173
+ avail = "✓ UP " if self._is_available(key) else "✗ DOWN"
1174
+ avg_t = s["avg_response_s"]
1175
+ avg_str = f"{avg_t:5.1f}s" if avg_t is not None else " new "
1176
+ n_ok = s.get("success_count", 0)
1177
+ n_err = s.get("fail_count", 0)
1178
+ tag = "[dyn]" if key.startswith("dyn_") else " "
1179
+ print(
1180
+ f" {i:2}. [{avail}] {tag} "
1181
+ f"{m['provider']:<12} "
1182
+ f"{m['model_id']:<48} "
1183
+ f"avg={avg_str} ok={n_ok} fail={n_err}"
1184
+ )
1185
+ dyn_count = len(self._dynamic_models)
1186
+ print(f"\n ({dyn_count} additional free models discovered dynamically from OpenRouter)")
1187
+ print()
1188
+
1189
+ def reset_model(self, model_key: str):
1190
+ """Manually clear the down-cooldown for a specific model key."""
1191
+ if model_key in self._state:
1192
+ with self._lock:
1193
+ self._state[model_key]["down_until"] = None
1194
+ self._state[model_key]["fail_count"] = 0
1195
+ self._save_state()
1196
+ print(f"✓ Reset model: {model_key}")
1197
+ else:
1198
+ print(f"Unknown model key: {model_key}")
1199
+
1200
+ def refresh_models(self):
1201
+ """Force-refresh the OpenRouter free model list (ignores cache)."""
1202
+ # Remove stale cache so _refresh_openrouter_models fetches fresh data
1203
+ try:
1204
+ OPENROUTER_FREE_CACHE_FILE.unlink(missing_ok=True)
1205
+ except Exception:
1206
+ pass
1207
+ self._refresh_openrouter_models()
1208
+ print(f"✓ Refreshed: {len(self._dynamic_models)} dynamic models loaded")
1209
+
1210
+ def _print_startup_status(self):
1211
+ parts = []
1212
+ parts.append("Groq ✓" if self._key("GROQ_API_KEY") else "Groq ✗")
1213
+ parts.append("Gemini ✓" if self._key("GEMINI_API_KEY") else "Gemini ✗")
1214
+ parts.append("OpenRouter ✓" if self._key("OPENROUTER_API_KEY") else "OpenRouter ✗")
1215
+ parts.append("Ollama (fallback)")
1216
+ print(f"🌐 Model Router initialised: {' | '.join(parts)}")
1217
+
1218
+
1219
+ # ─────────────────────────────────────────────────────────────────────────────
1220
+ # Singleton
1221
+ # ─────────────────────────────────────────────────────────────────────────────
1222
+
1223
+ _router_instance: Optional[ModelRouter] = None
1224
+ _router_lock = threading.Lock()
1225
+
1226
+
1227
+ def get_router() -> ModelRouter:
1228
+ """Get or create the shared ModelRouter instance."""
1229
+ global _router_instance
1230
+ if _router_instance is None:
1231
+ with _router_lock:
1232
+ if _router_instance is None:
1233
+ _router_instance = ModelRouter()
1234
+ return _router_instance
1235
+
1236
+
1237
+ # ─────────────────────────────────────────────────────────────────────────────
1238
+ # CLI — run this file directly to inspect rankings or reset a model
1239
+ # Usage:
1240
+ # python model_router.py → show rankings
1241
+ # python model_router.py reset <key> → clear cooldown for model key
1242
+ # ─────────────────────────────────────────────────────────────────────────────
1243
+
1244
+ if __name__ == "__main__":
1245
+ import sys as _sys
1246
+ router = get_router()
1247
+ import time as _time; _time.sleep(1) # give background thread a moment
1248
+ if len(_sys.argv) >= 3 and _sys.argv[1] == "reset":
1249
+ router.reset_model(_sys.argv[2])
1250
+ elif len(_sys.argv) >= 2 and _sys.argv[1] == "refresh":
1251
+ router.refresh_models()
1252
+ else:
1253
+ router.print_rankings()