@tiens.nguyen/gonext-local-worker 1.0.82 → 1.0.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -64,6 +64,32 @@ def _http_request_impl(method, url, headers=None, body=None, timeout=25):
64
64
  return f"Error: {e}"
65
65
 
66
66
 
67
+ def _detect_model_id(base_url, api_key=""):
68
+ """Ask an OpenAI-compatible server which model it serves.
69
+
70
+ Queries GET {base_url}/models and returns the first reported model id.
71
+ `base_url` already ends with /v1. Returns "" on any failure so callers can
72
+ fall back. Used when the user supplies a coding-model URL but no model name.
73
+ """
74
+ url = base_url.rstrip("/") + "/models"
75
+ headers = {"Accept": "application/json"}
76
+ if api_key and api_key != "local":
77
+ headers["Authorization"] = f"Bearer {api_key}"
78
+ req = urllib.request.Request(url, method="GET", headers=headers)
79
+ try:
80
+ with urllib.request.urlopen(req, timeout=10, context=_ssl_context()) as resp:
81
+ payload = json.loads(resp.read().decode("utf-8", errors="replace"))
82
+ except Exception as e: # noqa: BLE001
83
+ _log(f"model detect failed {url}: {e}")
84
+ return ""
85
+ data = payload.get("data") if isinstance(payload, dict) else None
86
+ if isinstance(data, list) and data:
87
+ first = data[0]
88
+ if isinstance(first, dict) and isinstance(first.get("id"), str):
89
+ return first["id"].strip()
90
+ return ""
91
+
92
+
67
93
  def _summarise_step(step_log):
68
94
  """Return a short human-readable description of an agent step."""
69
95
  tool_calls = getattr(step_log, "tool_calls", None) or []
@@ -249,8 +275,26 @@ def run_agent_chat(cfg):
249
275
  # Optional dedicated coding/reasoning model for the CodeAgent's tool-use loop.
250
276
  # Routing, plain replies and summarization stay on the chat model (better at
251
277
  # natural language); the code model only drives http_request reasoning.
252
- coding_base_url = (cfg.get("codingBaseURL") or "").strip() or agent_base_url
253
- coding_model_id = (cfg.get("codingModelId") or "").strip() or agent_model_id
278
+ raw_coding_base = (cfg.get("codingBaseURL") or "").strip()
279
+ raw_coding_model = (cfg.get("codingModelId") or "").strip()
280
+ if raw_coding_base:
281
+ # A dedicated coding server is configured. If no model name was given,
282
+ # ask the server which model it serves (mlx_lm.server otherwise tries to
283
+ # download a mismatched name from HF and 404s).
284
+ detected = raw_coding_model or _detect_model_id(raw_coding_base, agent_api_key)
285
+ if detected:
286
+ coding_base_url = raw_coding_base
287
+ coding_model_id = detected
288
+ else:
289
+ _log(
290
+ f"coding model id unresolved for {raw_coding_base!r}; "
291
+ "falling back to chat model"
292
+ )
293
+ coding_base_url = agent_base_url
294
+ coding_model_id = agent_model_id
295
+ else:
296
+ coding_base_url = agent_base_url
297
+ coding_model_id = agent_model_id
254
298
  max_steps = int(cfg.get("maxSteps") or 5)
255
299
 
256
300
  _log(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.82",
3
+ "version": "1.0.84",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",