npm - @tiens.nguyen/gonext-local-worker - Versions diffs - 1.0.90 → 1.0.92 - Mend

@tiens.nguyen/gonext-local-worker 1.0.90 → 1.0.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/gonext_agent_chat.py +80 -23
package/package.json +1 -1

package/gonext_agent_chat.py CHANGED Viewed

@@ -215,8 +215,8 @@ def _summarise_step(step_log):
         else:
             parts.append(f"→ Error: {err[:120]}")
-    label = f"Step {step_num}: " if step_num is not None else ""
-    return label + (" | ".join(parts) if parts else "thinking…")
+    # No numeric "Step N:" prefix — show only the semantic action.
+    return (" | ".join(parts) if parts else "thinking…")
 # Keywords that strongly indicate the user wants to make an HTTP/network request,
@@ -241,9 +241,13 @@ def _route(task_text: str, base_url: str, api_key: str, model_id: str) -> bool:
     Fast-path: if the user explicitly mentions network/request keywords → agent.
     Otherwise: ask the model to classify.
     """
+    # Show the routing stage in the web Thinking panel.
+    _emit({"type": "step", "text": "Routing your request…"})
     # Fast-path: explicit HTTP/network intent overrides the model classifier.
     if _AGENT_KEYWORDS.search(task_text):
         _log(f"router → YES (keyword match)")
+        _emit({"type": "step", "text": "→ Agent mode (needs tools)"})
         return True
     try:
@@ -271,9 +275,12 @@ def _route(task_text: str, base_url: str, api_key: str, model_id: str) -> bool:
         )
         answer = (resp.choices[0].message.content or "").strip().upper()
         _log(f"router → {answer!r} (model)")
-        return answer.startswith("Y")
+        is_agent = answer.startswith("Y")
+        _emit({"type": "step", "text": "→ Agent mode (needs tools)" if is_agent else "→ Chat reply"})
+        return is_agent
     except Exception as e:  # noqa: BLE001
         _log(f"router error: {e} — defaulting to agent")
+        _emit({"type": "step", "text": "→ Agent mode (needs tools)"})
         return True
@@ -339,6 +346,18 @@ def _plain_reply(messages: list, base_url: str, api_key: str, model_id: str) ->
         return f"[Error: {e}]"
+def _strip_tool_tags(text: str) -> str:
+    """Remove the internal hint tags we append to tool output (e.g. '[SUCCESS …]',
+    '[NOTE: …]', 'Note: This URL failed …') so they never leak into the user reply."""
+    out = []
+    for ln in (text or "").splitlines():
+        s = ln.strip()
+        if s.startswith("[SUCCESS") or s.startswith("[NOTE:") or s.startswith("Note: This URL failed"):
+            continue
+        out.append(ln)
+    return "\n".join(out).strip()
 def run_agent_chat(cfg):
     try:
         from smolagents import CodeAgent, OpenAIServerModel, tool
@@ -373,7 +392,11 @@ def run_agent_chat(cfg):
     else:
         coding_base_url = agent_base_url
         coding_model_id = agent_model_id
-    max_steps = int(cfg.get("maxSteps") or 5)
+    # Strict single-shot: exactly ONE agent model call per message. The single code
+    # block must call a tool AND final_answer together — no multi-step ReAct loop.
+    # If the model fails to call final_answer, the max-steps fallback below returns
+    # the last tool observation deterministically (no extra model call).
+    max_steps = 1
     _log(
         f"start model={agent_model_id!r} base={agent_base_url!r} "
@@ -451,6 +474,7 @@ def run_agent_chat(cfg):
     if not needs_agent:
         _log("router: plain chat (no HTTP needed)")
+        _emit({"type": "step", "text": "Composing a reply…"})
         answer = _plain_reply(messages, agent_base_url, agent_api_key, agent_model_id)
         _log(f"plain reply: {len(answer)} chars")
         _emit({"type": "final", "text": answer})
@@ -458,20 +482,23 @@ def run_agent_chat(cfg):
     # Agent path — from here all step events go into <think>.
     _log("router: agent (HTTP tool use needed)")
-    _emit({"type": "step", "text": "Planning HTTP request…"})
+    _emit({"type": "step", "text": "Choosing a tool…"})
     # Prepend explicit tool instructions so small models pick the right tool, never
     # fabricate URLs/responses, and always terminate with final_answer().
     from datetime import datetime as _dt_now
     now_str = _dt_now.now().astimezone().strftime("%A, %d %B %Y, %H:%M %Z")
     tool_hint = (
-        f"Current date/time: {now_str}.\n\n"
+        "YOU HAVE EXACTLY ONE TURN. Read the TASK above. In a single code block, call "
+        "the ONE tool that fits THAT task, then pass its result to final_answer(). "
+        "Do not plan multiple steps.\n\n"
         "You have THREE tools:\n"
         "  1. http_request(method, url, headers='', body='', username='', password='') — "
         "call a SPECIFIC known API/URL.\n"
-        "  2. web_search(query) — look up facts when you do NOT already have a real URL. "
-        "Returns a summary + source.\n"
-        "  3. get_current_datetime(timezone='') — current date/time (no HTTP needed).\n"
+        "  2. web_search(query) — look up facts about a person, place, thing, or topic "
+        "when you do NOT already have a real URL. Returns a summary + source.\n"
+        f"  3. get_current_datetime(timezone='') — current date/time ONLY (now: {now_str}). "
+        "Use this ONLY when the task explicitly asks for the date or time.\n"
         "\n"
         "http_request RETURN FORMAT: 'HTTP 200\\n{body}' — first line is 'HTTP <code>', body follows.\n"
         "\n"
@@ -485,9 +512,10 @@ def run_agent_chat(cfg):
         "  response = http_request('GET', url, headers='{\"Authorization\": \"Bearer TOKEN\"}')\n"
         "  final_answer(response)\n"
         "\n"
-        "CHOOSING A TOOL:\n"
-        "- Date/time question -> get_current_datetime(); do NOT use http_request.\n"
-        "- 'find' / 'look up' / 'what is' / general knowledge -> web_search(query).\n"
+        "CHOOSING A TOOL (match the TASK, not these examples):\n"
+        "- ONLY a date/time question (e.g. 'what is the date today') -> get_current_datetime().\n"
+        "- 'who is' / 'what is' / 'tell me about' / a person / place / topic / general "
+        "knowledge -> web_search(query).\n"
         "- A specific known API/URL was given -> http_request().\n"
         "\n"
         "RULES:\n"
@@ -499,11 +527,23 @@ def run_agent_chat(cfg):
         "- If a tool returns 'Error:' or HTTP 4xx/5xx, try a DIFFERENT approach, not the same URL.\n"
         "- Do NOT put final_answer outside the code block.\n\n"
     )
-    task_with_hint = tool_hint + "Task: " + task_text
+    # Lead with the TASK so the weak model anchors on what's actually being asked —
+    # not on the tool reference below. (Previously the hint led with the date, and the
+    # 3B model treated every message as a date question.)
+    task_with_hint = (
+        "TASK (answer THIS, choose the tool that fits it):\n"
+        f"{task_text}\n\n"
+        "----- TOOL REFERENCE -----\n"
+        + tool_hint
+    )
     # Track URLs that have already failed so we don't retry dead endpoints across steps.
     _failed_urls: set = set()
+    # Remember the last tool output so the single-shot fallback + the deterministic
+    # final formatting can report exactly what a tool returned (no extra model call).
+    _last_obs: dict = {"text": ""}
     @tool
     def http_request(method: str, url: str, headers: str = "", body: str = "",
                      username: str = "", password: str = "") -> str:
@@ -561,6 +601,7 @@ def run_agent_chat(cfg):
             result = result + "\n[SUCCESS — call final_answer(response) now, do not parse or retry]"
         _emit({"type": "step", "text": f"HTTP {method.upper()} {url} → {status_line}"})
         _log(f"http_request {method.upper()} {url} → {result[:80]}")
+        _last_obs["text"] = result
         return result
     @tool
@@ -582,6 +623,7 @@ def run_agent_chat(cfg):
         out = now.strftime("%A, %d %B %Y, %H:%M:%S %Z")
         _emit({"type": "step", "text": f"Current date/time → {out}"})
         _log(f"get_current_datetime({timezone!r}) → {out}")
+        _last_obs["text"] = out
         return out
     @tool
@@ -597,6 +639,7 @@ def run_agent_chat(cfg):
         _emit({"type": "step", "text": f"Searching the web → {query[:80]}"})
         result = _web_search_impl(query)
         _log(f"web_search {query[:60]!r} → {result[:80]}")
+        _last_obs["text"] = result
         return result
     def step_callback(step_log):
@@ -658,13 +701,28 @@ def run_agent_chat(cfg):
                 _log(f"MODEL REQUEST log error: {e}")
             return ck
+    # Single-shot agent: if the one model call doesn't end in final_answer(),
+    # smolagents would normally make an EXTRA model call (provide_final_answer) to
+    # synthesize one. We override that to return the last tool observation
+    # deterministically — keeping the agent to EXACTLY ONE model call, and never
+    # corrupting exact tool output (dates/numbers) the way a weak model would.
+    class _SingleShotAgent(CodeAgent):
+        def provide_final_answer(self, task, *args, **kwargs):
+            from smolagents.models import ChatMessage, MessageRole
+            text = (_last_obs.get("text") or "").strip()
+            if not text:
+                text = ("I couldn't complete that in one step. Please rephrase, or give "
+                        "a specific URL/API to call.")
+            _log(f"single-shot fallback (no model call) → {text[:80]}")
+            return ChatMessage(role=MessageRole.ASSISTANT, content=text)
     try:
         model = _LoggingModel(
             model_id=coding_model_id,
             api_base=coding_base_url,
             api_key=agent_api_key,
         )
-        agent = CodeAgent(
+        agent = _SingleShotAgent(
             tools=[http_request, web_search, get_current_datetime],
             model=model,
             max_steps=max_steps,
@@ -674,15 +732,14 @@ def run_agent_chat(cfg):
         )
         with contextlib.redirect_stdout(sys.stderr):
             result = agent.run(task_with_hint)
-        # Summarize with the AGENT (coding) model that ran the tools — not the chat
-        # model — so it faithfully reports exact tool output (dates/numbers) instead
-        # of paraphrasing and corrupting it. Falls back to the chat model when no
-        # dedicated coding server is configured (coding_* default to agent_*).
-        final_text = _summarize_result(
-            task_text, str(result).strip(),
-            coding_base_url, agent_api_key, coding_model_id
-        )
-        _log(f"done: {len(final_text)} chars")
+        # Deterministic final formatting — NO summarizer model call. The agent's
+        # final_answer (or the single-shot fallback above) already holds exact tool
+        # output; we just strip the internal hint tags we appended to tool results so
+        # they don't leak to the user. This permanently fixes the date-corruption a
+        # weak summarizer model used to introduce.
+        _emit({"type": "step", "text": "Composing answer…"})
+        final_text = _strip_tool_tags(str(result).strip()) or "[No result]"
+        _log(f"done (deterministic, no summarizer call): {len(final_text)} chars")
         _emit({"type": "final", "text": final_text})
     except Exception as e:  # noqa: BLE001
         _log(f"agent error: {e}")

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tiens.nguyen/gonext-local-worker",
-  "version": "1.0.90",
+  "version": "1.0.92",
   "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
   "type": "module",
   "license": "MIT",