npm - @tiens.nguyen/gonext-local-worker - Versions diffs - 1.0.84 → 1.0.86 - Mend

@tiens.nguyen/gonext-local-worker 1.0.84 → 1.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/gonext_agent_chat.py +85 -19
package/package.json +1 -1

package/gonext_agent_chat.py CHANGED Viewed

@@ -302,28 +302,64 @@ def run_agent_chat(cfg):
         f"codeModel={coding_model_id!r} codeBase={coding_base_url!r} maxSteps={max_steps}"
     )
-    # Build task from the conversation history.
-    # Include prior USER messages as context so the agent has conversational memory,
-    # but exclude prior ASSISTANT messages (they contain raw HTTP/thinking content
-    # that confuses small models).
-    task_text = ""
-    prior_user_msgs = []
-    for m in messages:
-        role = m.get("role", "")
-        content = m.get("content", "")
-        if role == "user":
-            if task_text:
-                prior_user_msgs.append(task_text)
-            task_text = content
-    if not task_text:
+    # Build the task from the conversation history. We include the FULL conversation
+    # (both user AND assistant turns) so the agent remembers what it already did —
+    # e.g. data it fetched on a previous turn. Assistant turns are condensed (drop
+    # <think> reasoning; clip long raw HTTP dumps), and we keep the most recent turns
+    # within a character budget so we never overflow the model's context window.
+    # ~8000 chars ≈ 2k tokens, tiny against Qwen2.5-Coder-7B's 32k context, leaving
+    # ample room for smolagents' own system prompt + step memory (HTTP observations).
+    HISTORY_CHAR_BUDGET = 8000
+    think_re = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
+    def _condense(role, content):
+        text = (content or "").strip()
+        if role == "assistant":
+            text = think_re.sub("", text).strip()
+            # Raw HTTP dumps add little conversational value — keep only a snippet.
+            if text.startswith("HTTP "):
+                text = text[:500]
+        return text
+    # The latest user message is the current task; everything before it is history.
+    last_user_idx = -1
+    for i, m in enumerate(messages):
+        if m.get("role") == "user":
+            last_user_idx = i
+    if last_user_idx < 0:
         _emit({"type": "final", "text": "[No user message found in history]"})
         return
+    task_text = (messages[last_user_idx].get("content") or "").strip()
-    if prior_user_msgs:
-        context = "\n".join(f"- {t[:300]}" for t in prior_user_msgs[-4:])
-        task_text = f"Conversation context (previous user messages):\n{context}\n\nCurrent task: {task_text}"
+    # Walk prior turns newest-first, keeping condensed lines until the budget is
+    # spent, then restore chronological (oldest→newest) order.
+    history_lines = []
+    used = 0
+    for m in reversed(messages[:last_user_idx]):
+        role = m.get("role", "")
+        if role not in ("user", "assistant"):
+            continue
+        text = _condense(role, m.get("content", ""))
+        if not text:
+            continue
+        line = f"{'User' if role == 'user' else 'Assistant'}: {text}"
+        if used + len(line) > HISTORY_CHAR_BUDGET:
+            break
+        history_lines.append(line)
+        used += len(line)
+    history_lines.reverse()
+    if history_lines:
+        convo = "\n".join(history_lines)
+        task_text = (
+            "Conversation so far (oldest to newest):\n"
+            f"{convo}\n\nCurrent task: {task_text}"
+        )
+    _log(
+        f"history: {len(history_lines)} prior turn(s), {used} chars "
+        f"(budget {HISTORY_CHAR_BUDGET})"
+    )
     _log(f"task={task_text[:120]!r}")
     # Route: ask the model if this task needs HTTP tool use.
@@ -450,8 +486,38 @@ def run_agent_chat(cfg):
         _log(f"step {step_num}: {text[:200]}")
         _emit({"type": "step", "text": text})
+    # Wrap the model so we can see EXACTLY what smolagents posts to the model
+    # server on every step — including its own system prompt, the task we passed,
+    # and any step memory it accumulates. completion_kwargs["messages"] here is the
+    # literal messages array sent to /v1/chat/completions.
+    class _LoggingModel(OpenAIServerModel):
+        def _prepare_completion_kwargs(self, *args, **kwargs):
+            ck = super()._prepare_completion_kwargs(*args, **kwargs)
+            try:
+                msgs = ck.get("messages", []) or []
+                _log(f"=== MODEL REQUEST: {len(msgs)} message(s) sent to the model ===")
+                for i, m in enumerate(msgs):
+                    role = m.get("role") if isinstance(m, dict) else getattr(m, "role", "?")
+                    content = (
+                        m.get("content") if isinstance(m, dict)
+                        else getattr(m, "content", "")
+                    )
+                    if isinstance(content, list):
+                        text = " ".join(
+                            (c.get("text", "") if isinstance(c, dict) else str(c))
+                            for c in content
+                        )
+                    else:
+                        text = str(content)
+                    text = text.replace("\n", " ")
+                    _log(f"  [{i}] {role} ({len(text)} chars): {text[:600]}")
+                _log("=== END MODEL REQUEST ===")
+            except Exception as e:  # noqa: BLE001
+                _log(f"MODEL REQUEST log error: {e}")
+            return ck
     try:
-        model = OpenAIServerModel(
+        model = _LoggingModel(
             model_id=coding_model_id,
             api_base=coding_base_url,
             api_key=agent_api_key,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tiens.nguyen/gonext-local-worker",
-  "version": "1.0.84",
+  "version": "1.0.86",
   "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
   "type": "module",
   "license": "MIT",