@tiens.nguyen/gonext-local-worker 1.0.84 → 1.0.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -302,28 +302,64 @@ def run_agent_chat(cfg):
302
302
  f"codeModel={coding_model_id!r} codeBase={coding_base_url!r} maxSteps={max_steps}"
303
303
  )
304
304
 
305
- # Build task from the conversation history.
306
- # Include prior USER messages as context so the agent has conversational memory,
307
- # but exclude prior ASSISTANT messages (they contain raw HTTP/thinking content
308
- # that confuses small models).
309
- task_text = ""
310
- prior_user_msgs = []
311
- for m in messages:
312
- role = m.get("role", "")
313
- content = m.get("content", "")
314
- if role == "user":
315
- if task_text:
316
- prior_user_msgs.append(task_text)
317
- task_text = content
318
-
319
- if not task_text:
305
+ # Build the task from the conversation history. We include the FULL conversation
306
+ # (both user AND assistant turns) so the agent remembers what it already did —
307
+ # e.g. data it fetched on a previous turn. Assistant turns are condensed (drop
308
+ # <think> reasoning; clip long raw HTTP dumps), and we keep the most recent turns
309
+ # within a character budget so we never overflow the model's context window.
310
+ # ~8000 chars ≈ 2k tokens, tiny against Qwen2.5-Coder-7B's 32k context, leaving
311
+ # ample room for smolagents' own system prompt + step memory (HTTP observations).
312
+ HISTORY_CHAR_BUDGET = 8000
313
+ think_re = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
314
+
315
+ def _condense(role, content):
316
+ text = (content or "").strip()
317
+ if role == "assistant":
318
+ text = think_re.sub("", text).strip()
319
+ # Raw HTTP dumps add little conversational value — keep only a snippet.
320
+ if text.startswith("HTTP "):
321
+ text = text[:500]
322
+ return text
323
+
324
+ # The latest user message is the current task; everything before it is history.
325
+ last_user_idx = -1
326
+ for i, m in enumerate(messages):
327
+ if m.get("role") == "user":
328
+ last_user_idx = i
329
+ if last_user_idx < 0:
320
330
  _emit({"type": "final", "text": "[No user message found in history]"})
321
331
  return
332
+ task_text = (messages[last_user_idx].get("content") or "").strip()
322
333
 
323
- if prior_user_msgs:
324
- context = "\n".join(f"- {t[:300]}" for t in prior_user_msgs[-4:])
325
- task_text = f"Conversation context (previous user messages):\n{context}\n\nCurrent task: {task_text}"
334
+ # Walk prior turns newest-first, keeping condensed lines until the budget is
335
+ # spent, then restore chronological (oldest→newest) order.
336
+ history_lines = []
337
+ used = 0
338
+ for m in reversed(messages[:last_user_idx]):
339
+ role = m.get("role", "")
340
+ if role not in ("user", "assistant"):
341
+ continue
342
+ text = _condense(role, m.get("content", ""))
343
+ if not text:
344
+ continue
345
+ line = f"{'User' if role == 'user' else 'Assistant'}: {text}"
346
+ if used + len(line) > HISTORY_CHAR_BUDGET:
347
+ break
348
+ history_lines.append(line)
349
+ used += len(line)
350
+ history_lines.reverse()
351
+
352
+ if history_lines:
353
+ convo = "\n".join(history_lines)
354
+ task_text = (
355
+ "Conversation so far (oldest to newest):\n"
356
+ f"{convo}\n\nCurrent task: {task_text}"
357
+ )
326
358
 
359
+ _log(
360
+ f"history: {len(history_lines)} prior turn(s), {used} chars "
361
+ f"(budget {HISTORY_CHAR_BUDGET})"
362
+ )
327
363
  _log(f"task={task_text[:120]!r}")
328
364
 
329
365
  # Route: ask the model if this task needs HTTP tool use.
@@ -450,8 +486,38 @@ def run_agent_chat(cfg):
450
486
  _log(f"step {step_num}: {text[:200]}")
451
487
  _emit({"type": "step", "text": text})
452
488
 
489
+ # Wrap the model so we can see EXACTLY what smolagents posts to the model
490
+ # server on every step — including its own system prompt, the task we passed,
491
+ # and any step memory it accumulates. completion_kwargs["messages"] here is the
492
+ # literal messages array sent to /v1/chat/completions.
493
+ class _LoggingModel(OpenAIServerModel):
494
+ def _prepare_completion_kwargs(self, *args, **kwargs):
495
+ ck = super()._prepare_completion_kwargs(*args, **kwargs)
496
+ try:
497
+ msgs = ck.get("messages", []) or []
498
+ _log(f"=== MODEL REQUEST: {len(msgs)} message(s) sent to the model ===")
499
+ for i, m in enumerate(msgs):
500
+ role = m.get("role") if isinstance(m, dict) else getattr(m, "role", "?")
501
+ content = (
502
+ m.get("content") if isinstance(m, dict)
503
+ else getattr(m, "content", "")
504
+ )
505
+ if isinstance(content, list):
506
+ text = " ".join(
507
+ (c.get("text", "") if isinstance(c, dict) else str(c))
508
+ for c in content
509
+ )
510
+ else:
511
+ text = str(content)
512
+ text = text.replace("\n", " ")
513
+ _log(f" [{i}] {role} ({len(text)} chars): {text[:600]}")
514
+ _log("=== END MODEL REQUEST ===")
515
+ except Exception as e: # noqa: BLE001
516
+ _log(f"MODEL REQUEST log error: {e}")
517
+ return ck
518
+
453
519
  try:
454
- model = OpenAIServerModel(
520
+ model = _LoggingModel(
455
521
  model_id=coding_model_id,
456
522
  api_base=coding_base_url,
457
523
  api_key=agent_api_key,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.84",
3
+ "version": "1.0.86",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",