npm - @tiens.nguyen/gonext-local-worker - Versions diffs - 1.0.81 → 1.0.83 - Mend

@tiens.nguyen/gonext-local-worker 1.0.81 → 1.0.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/gonext_agent_chat.py +46 -2
package/package.json +1 -1

package/gonext_agent_chat.py CHANGED Viewed

@@ -64,6 +64,32 @@ def _http_request_impl(method, url, headers=None, body=None, timeout=25):
         return f"Error: {e}"
+def _detect_model_id(base_url, api_key=""):
+    """Ask an OpenAI-compatible server which model it serves.
+    Queries GET {base_url}/models and returns the first reported model id.
+    `base_url` already ends with /v1. Returns "" on any failure so callers can
+    fall back. Used when the user supplies a coding-model URL but no model name.
+    """
+    url = base_url.rstrip("/") + "/models"
+    headers = {"Accept": "application/json"}
+    if api_key and api_key != "local":
+        headers["Authorization"] = f"Bearer {api_key}"
+    req = urllib.request.Request(url, method="GET", headers=headers)
+    try:
+        with urllib.request.urlopen(req, timeout=10, context=_ssl_context()) as resp:
+            payload = json.loads(resp.read().decode("utf-8", errors="replace"))
+    except Exception as e:  # noqa: BLE001
+        _log(f"model detect failed {url}: {e}")
+        return ""
+    data = payload.get("data") if isinstance(payload, dict) else None
+    if isinstance(data, list) and data:
+        first = data[0]
+        if isinstance(first, dict) and isinstance(first.get("id"), str):
+            return first["id"].strip()
+    return ""
 def _summarise_step(step_log):
     """Return a short human-readable description of an agent step."""
     tool_calls = getattr(step_log, "tool_calls", None) or []
@@ -249,8 +275,26 @@ def run_agent_chat(cfg):
     # Optional dedicated coding/reasoning model for the CodeAgent's tool-use loop.
     # Routing, plain replies and summarization stay on the chat model (better at
     # natural language); the code model only drives http_request reasoning.
-    coding_base_url = (cfg.get("codingBaseURL") or "").strip() or agent_base_url
-    coding_model_id = (cfg.get("codingModelId") or "").strip() or agent_model_id
+    raw_coding_base = (cfg.get("codingBaseURL") or "").strip()
+    raw_coding_model = (cfg.get("codingModelId") or "").strip()
+    if raw_coding_base:
+        # A dedicated coding server is configured. If no model name was given,
+        # ask the server which model it serves (mlx_lm.server otherwise tries to
+        # download a mismatched name from HF and 404s).
+        detected = raw_coding_model or _detect_model_id(raw_coding_base, agent_api_key)
+        if detected:
+            coding_base_url = raw_coding_base
+            coding_model_id = detected
+        else:
+            _log(
+                f"coding model id unresolved for {raw_coding_base!r}; "
+                "falling back to chat model"
+            )
+            coding_base_url = agent_base_url
+            coding_model_id = agent_model_id
+    else:
+        coding_base_url = agent_base_url
+        coding_model_id = agent_model_id
     max_steps = int(cfg.get("maxSteps") or 5)
     _log(

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tiens.nguyen/gonext-local-worker",
-  "version": "1.0.81",
+  "version": "1.0.83",
   "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
   "type": "module",
   "license": "MIT",