npm - superbrain-server - Versions diffs - 1.0.43 → 1.0.45 - Mend

superbrain-server 1.0.43 → 1.0.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +1 -1
package/payload/config/model_rankings.json +35 -35
package/payload/core/model_router.py +13 -8
package/payload/start.py +12 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superbrain-server",
-  "version": "1.0.43",
+  "version": "1.0.45",
   "description": "1-Line Auto-Installer and Server Execution wrapper for SuperBrain",
   "main": "index.js",
   "bin": {

package/payload/config/model_rankings.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "groq_gpt_oss_20b": {
     "key": "groq_gpt_oss_20b",
-    "avg_response_s": 1.5378954618382221,
-    "success_count": 71,
+    "avg_response_s": 1.3635670783103586,
+    "success_count": 77,
     "fail_count": 6,
     "down_until": null,
-    "last_used": "2026-04-09T12:10:09.765726",
+    "last_used": "2026-04-09T17:35:12.529719",
     "last_error": null,
     "base_priority": 0.5
   },
@@ -173,49 +173,49 @@
     "key": "openrouter_llama33_70b",
     "avg_response_s": null,
     "success_count": 0,
-    "fail_count": 0,
-    "down_until": null,
+    "fail_count": 1,
+    "down_until": "2026-04-09T17:58:36.014066",
     "last_used": null,
-    "last_error": null,
+    "last_error": "429 rate limit: {\"error\":{\"message\":\"Provider returned error\",\"code\":429,\"metadata\":{\"raw\":\"meta-llama/llama-3.3-70b-instruct:free is temporarily rate-limited upstream. Please retry shortly, or add yo",
     "base_priority": 7
   },
   "openrouter_deepseek_r1_0528": {
     "key": "openrouter_deepseek_r1_0528",
     "avg_response_s": null,
     "success_count": 0,
-    "fail_count": 0,
-    "down_until": null,
+    "fail_count": 2,
+    "down_until": "2026-04-09T17:42:02.945931",
     "last_used": null,
-    "last_error": null,
+    "last_error": "404 Client Error: Not Found for url: https://openrouter.ai/api/v1/chat/completions",
     "base_priority": 7.5
   },
   "openrouter_qwen3_235b": {
     "key": "openrouter_qwen3_235b",
     "avg_response_s": null,
     "success_count": 0,
-    "fail_count": 0,
-    "down_until": null,
+    "fail_count": 2,
+    "down_until": "2026-04-09T17:42:03.575625",
     "last_used": null,
-    "last_error": null,
+    "last_error": "404 Client Error: Not Found for url: https://openrouter.ai/api/v1/chat/completions",
     "base_priority": 8
   },
   "openrouter_hermes3_405b": {
     "key": "openrouter_hermes3_405b",
     "avg_response_s": null,
     "success_count": 0,
-    "fail_count": 0,
-    "down_until": null,
+    "fail_count": 1,
+    "down_until": "2026-04-09T17:58:37.929800",
     "last_used": null,
-    "last_error": null,
+    "last_error": "429 rate limit: {\"error\":{\"message\":\"Provider returned error\",\"code\":429,\"metadata\":{\"raw\":\"nousresearch/hermes-3-llama-3.1-405b:free is temporarily rate-limited upstream. Please retry shortly, or add",
     "base_priority": 8.5
   },
   "openrouter_gpt_oss_120b": {
     "key": "openrouter_gpt_oss_120b",
-    "avg_response_s": null,
-    "success_count": 0,
+    "avg_response_s": 3.9147901058197023,
+    "success_count": 2,
     "fail_count": 0,
     "down_until": null,
-    "last_used": null,
+    "last_used": "2026-04-09T17:37:11.262276",
     "last_error": null,
     "base_priority": 9
   },
@@ -323,8 +323,8 @@
     "key": "gemini_25_flash_lite_vision",
     "avg_response_s": 6.709401964075168,
     "success_count": 14,
-    "fail_count": 21,
-    "down_until": "2026-04-09T12:14:58.761896",
+    "fail_count": 24,
+    "down_until": "2026-04-09T17:40:03.986631",
     "last_used": "2026-02-24T09:45:09.831171",
     "last_error": "No module named 'google.generativeai'",
     "base_priority": 1.5
@@ -333,8 +333,8 @@
     "key": "gemini_25_pro_vision",
     "avg_response_s": null,
     "success_count": 0,
-    "fail_count": 24,
-    "down_until": "2026-04-09T12:14:58.758850",
+    "fail_count": 27,
+    "down_until": "2026-04-09T17:40:03.984360",
     "last_used": null,
     "last_error": "No module named 'google.generativeai'",
     "base_priority": 2
@@ -353,8 +353,8 @@
     "key": "gemini_3_pro_vision",
     "avg_response_s": null,
     "success_count": 0,
-    "fail_count": 23,
-    "down_until": "2026-04-09T12:14:58.760897",
+    "fail_count": 26,
+    "down_until": "2026-04-09T17:40:03.984360",
     "last_used": null,
     "last_error": "No module named 'google.generativeai'",
     "base_priority": 3
@@ -363,8 +363,8 @@
     "key": "gemini_31_pro_vision",
     "avg_response_s": null,
     "success_count": 0,
-    "fail_count": 22,
-    "down_until": "2026-04-09T12:14:58.762895",
+    "fail_count": 25,
+    "down_until": "2026-04-09T17:40:03.987716",
     "last_used": null,
     "last_error": "No module named 'google.generativeai'",
     "base_priority": 3.5
@@ -373,8 +373,8 @@
     "key": "gemini_20_flash_vision",
     "avg_response_s": null,
     "success_count": 0,
-    "fail_count": 21,
-    "down_until": "2026-04-09T12:14:58.764895",
+    "fail_count": 24,
+    "down_until": "2026-04-09T17:40:03.987716",
     "last_used": null,
     "last_error": "No module named 'google.generativeai'",
     "base_priority": 4
@@ -383,8 +383,8 @@
     "key": "gemini_20_flash_lite_vision",
     "avg_response_s": null,
     "success_count": 0,
-    "fail_count": 21,
-    "down_until": "2026-04-09T12:14:58.765718",
+    "fail_count": 24,
+    "down_until": "2026-04-09T17:40:03.987716",
     "last_used": null,
     "last_error": "No module named 'google.generativeai'",
     "base_priority": 4.5
@@ -393,19 +393,19 @@
     "key": "gemini_15_flash_vision",
     "avg_response_s": null,
     "success_count": 0,
-    "fail_count": 21,
-    "down_until": "2026-04-09T12:14:58.765718",
+    "fail_count": 24,
+    "down_until": "2026-04-09T17:40:03.987716",
     "last_used": null,
     "last_error": "No module named 'google.generativeai'",
     "base_priority": 4.8
   },
   "groq_llama4_scout_vision": {
     "key": "groq_llama4_scout_vision",
-    "avg_response_s": 1.7195895721505887,
-    "success_count": 46,
+    "avg_response_s": 1.5534682586944504,
+    "success_count": 52,
     "fail_count": 5,
     "down_until": null,
-    "last_used": "2026-04-09T12:10:03.943317",
+    "last_used": "2026-04-09T17:35:07.357177",
     "last_error": null,
     "base_priority": 5
   },

package/payload/core/model_router.py CHANGED Viewed

@@ -489,6 +489,9 @@ def _has_image_input(m: Dict) -> bool:
     return "image" in str(mods)
+class RateLimitError(Exception):
+    pass
 # ─────────────────────────────────────────────────────────────────────────────
 #  MODEL ROUTER
 # ─────────────────────────────────────────────────────────────────────────────
@@ -707,10 +710,10 @@ class ModelRouter:
             resp.raise_for_status()
             all_models = resp.json().get("data", [])
         except Exception as e:
-            if "429" in str(e) or "quota" in str(e).lower():
-                raise RateLimitError("Quota limit hit")
-            raise e
-            print(f"⚠️  OpenRouter model discovery failed: {e}")
+            # if "429" in str(e) or "quota" in str(e).lower():
+            #     raise RateLimitError("Quota limit hit")
+            # raise e
+            print(f"⚠️  OpenRouter free model discovery failed: {e}")
             return
         # Filter for free models (pricing.prompt == 0 or :free suffix)
@@ -1103,8 +1106,9 @@ class ModelRouter:
                 return result
             except Exception as e:
-                if "429" in str(e) or "quota" in str(e).lower():
-                    raise RateLimitError("Quota limit hit")
+                # Do not immediately abort on quota, try next model
+                # if "429" in str(e) or "quota" in str(e).lower():
+                #     raise RateLimitError("Quota limit hit")
                 status = 429 if "429" in str(e) else 0
                 self._record_failure(key, str(e), status_code=status)
                 print(f"  ✗ Failed ({type(e).__name__}), trying next …", flush=True)
@@ -1149,8 +1153,9 @@ class ModelRouter:
                 return result
             except Exception as e:
-                if "429" in str(e) or "quota" in str(e).lower():
-                    raise RateLimitError("Quota limit hit")
+                # Do not immediately abort on quota, try next model
+                # if "429" in str(e) or "quota" in str(e).lower():
+                #     raise RateLimitError("Quota limit hit")
                 status = 429 if "429" in str(e) else 0
                 self._record_failure(key, str(e), status_code=status)
                 print(f"  ✗ Failed ({type(e).__name__}), trying next …", flush=True)

package/payload/start.py CHANGED Viewed

@@ -410,6 +410,12 @@ OLLAMA_MODEL = "qwen3-vl:4b"   # vision-language model, fits ~6 GB VRAM / ~8 GB
 def setup_ollama():
     h1("Step 4 of 7 — Offline AI Model (Ollama)")
+    machine = platform.machine().lower()
+    is_arm = any(arch in machine for arch in ["arm", "aarch64"])
+    if is_arm:
+        info(f"ARM device detected ({machine}) — skipping Ollama local installation (heavy resource/unsupported).")
+        return
     keys = _load_saved_api_keys()
     has_cloud_key = any(keys.get(k) for k in ("GEMINI_API_KEY", "GROQ_API_KEY", "OPENROUTER_API_KEY"))
@@ -547,6 +553,12 @@ WHISPER_MODELS = {
 def setup_whisper():
     h1("Step 5 of 7 — Offline Audio Transcription (Whisper)")
+    machine = platform.machine().lower()
+    is_arm = any(arch in machine for arch in ["arm", "aarch64"])
+    if is_arm:
+        info(f"ARM device detected ({machine}) — skipping local Whisper installation to prevent wheel build failures.")
+        return
     keys = _load_saved_api_keys()
     has_groq_key = bool(keys.get("GROQ_API_KEY") or os.getenv("GROQ_API_KEY"))