@tiens.nguyen/gonext-local-worker 1.0.84 → 1.0.86
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gonext_agent_chat.py +85 -19
- package/package.json +1 -1
package/gonext_agent_chat.py
CHANGED
|
@@ -302,28 +302,64 @@ def run_agent_chat(cfg):
|
|
|
302
302
|
f"codeModel={coding_model_id!r} codeBase={coding_base_url!r} maxSteps={max_steps}"
|
|
303
303
|
)
|
|
304
304
|
|
|
305
|
-
# Build task from the conversation history.
|
|
306
|
-
#
|
|
307
|
-
#
|
|
308
|
-
#
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
for
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
305
|
+
# Build the task from the conversation history. We include the FULL conversation
|
|
306
|
+
# (both user AND assistant turns) so the agent remembers what it already did —
|
|
307
|
+
# e.g. data it fetched on a previous turn. Assistant turns are condensed (drop
|
|
308
|
+
# <think> reasoning; clip long raw HTTP dumps), and we keep the most recent turns
|
|
309
|
+
# within a character budget so we never overflow the model's context window.
|
|
310
|
+
# ~8000 chars ≈ 2k tokens, tiny against Qwen2.5-Coder-7B's 32k context, leaving
|
|
311
|
+
# ample room for smolagents' own system prompt + step memory (HTTP observations).
|
|
312
|
+
HISTORY_CHAR_BUDGET = 8000
|
|
313
|
+
think_re = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
|
|
314
|
+
|
|
315
|
+
def _condense(role, content):
|
|
316
|
+
text = (content or "").strip()
|
|
317
|
+
if role == "assistant":
|
|
318
|
+
text = think_re.sub("", text).strip()
|
|
319
|
+
# Raw HTTP dumps add little conversational value — keep only a snippet.
|
|
320
|
+
if text.startswith("HTTP "):
|
|
321
|
+
text = text[:500]
|
|
322
|
+
return text
|
|
323
|
+
|
|
324
|
+
# The latest user message is the current task; everything before it is history.
|
|
325
|
+
last_user_idx = -1
|
|
326
|
+
for i, m in enumerate(messages):
|
|
327
|
+
if m.get("role") == "user":
|
|
328
|
+
last_user_idx = i
|
|
329
|
+
if last_user_idx < 0:
|
|
320
330
|
_emit({"type": "final", "text": "[No user message found in history]"})
|
|
321
331
|
return
|
|
332
|
+
task_text = (messages[last_user_idx].get("content") or "").strip()
|
|
322
333
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
334
|
+
# Walk prior turns newest-first, keeping condensed lines until the budget is
|
|
335
|
+
# spent, then restore chronological (oldest→newest) order.
|
|
336
|
+
history_lines = []
|
|
337
|
+
used = 0
|
|
338
|
+
for m in reversed(messages[:last_user_idx]):
|
|
339
|
+
role = m.get("role", "")
|
|
340
|
+
if role not in ("user", "assistant"):
|
|
341
|
+
continue
|
|
342
|
+
text = _condense(role, m.get("content", ""))
|
|
343
|
+
if not text:
|
|
344
|
+
continue
|
|
345
|
+
line = f"{'User' if role == 'user' else 'Assistant'}: {text}"
|
|
346
|
+
if used + len(line) > HISTORY_CHAR_BUDGET:
|
|
347
|
+
break
|
|
348
|
+
history_lines.append(line)
|
|
349
|
+
used += len(line)
|
|
350
|
+
history_lines.reverse()
|
|
351
|
+
|
|
352
|
+
if history_lines:
|
|
353
|
+
convo = "\n".join(history_lines)
|
|
354
|
+
task_text = (
|
|
355
|
+
"Conversation so far (oldest to newest):\n"
|
|
356
|
+
f"{convo}\n\nCurrent task: {task_text}"
|
|
357
|
+
)
|
|
326
358
|
|
|
359
|
+
_log(
|
|
360
|
+
f"history: {len(history_lines)} prior turn(s), {used} chars "
|
|
361
|
+
f"(budget {HISTORY_CHAR_BUDGET})"
|
|
362
|
+
)
|
|
327
363
|
_log(f"task={task_text[:120]!r}")
|
|
328
364
|
|
|
329
365
|
# Route: ask the model if this task needs HTTP tool use.
|
|
@@ -450,8 +486,38 @@ def run_agent_chat(cfg):
|
|
|
450
486
|
_log(f"step {step_num}: {text[:200]}")
|
|
451
487
|
_emit({"type": "step", "text": text})
|
|
452
488
|
|
|
489
|
+
# Wrap the model so we can see EXACTLY what smolagents posts to the model
|
|
490
|
+
# server on every step — including its own system prompt, the task we passed,
|
|
491
|
+
# and any step memory it accumulates. completion_kwargs["messages"] here is the
|
|
492
|
+
# literal messages array sent to /v1/chat/completions.
|
|
493
|
+
class _LoggingModel(OpenAIServerModel):
|
|
494
|
+
def _prepare_completion_kwargs(self, *args, **kwargs):
|
|
495
|
+
ck = super()._prepare_completion_kwargs(*args, **kwargs)
|
|
496
|
+
try:
|
|
497
|
+
msgs = ck.get("messages", []) or []
|
|
498
|
+
_log(f"=== MODEL REQUEST: {len(msgs)} message(s) sent to the model ===")
|
|
499
|
+
for i, m in enumerate(msgs):
|
|
500
|
+
role = m.get("role") if isinstance(m, dict) else getattr(m, "role", "?")
|
|
501
|
+
content = (
|
|
502
|
+
m.get("content") if isinstance(m, dict)
|
|
503
|
+
else getattr(m, "content", "")
|
|
504
|
+
)
|
|
505
|
+
if isinstance(content, list):
|
|
506
|
+
text = " ".join(
|
|
507
|
+
(c.get("text", "") if isinstance(c, dict) else str(c))
|
|
508
|
+
for c in content
|
|
509
|
+
)
|
|
510
|
+
else:
|
|
511
|
+
text = str(content)
|
|
512
|
+
text = text.replace("\n", " ")
|
|
513
|
+
_log(f" [{i}] {role} ({len(text)} chars): {text[:600]}")
|
|
514
|
+
_log("=== END MODEL REQUEST ===")
|
|
515
|
+
except Exception as e: # noqa: BLE001
|
|
516
|
+
_log(f"MODEL REQUEST log error: {e}")
|
|
517
|
+
return ck
|
|
518
|
+
|
|
453
519
|
try:
|
|
454
|
-
model =
|
|
520
|
+
model = _LoggingModel(
|
|
455
521
|
model_id=coding_model_id,
|
|
456
522
|
api_base=coding_base_url,
|
|
457
523
|
api_key=agent_api_key,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tiens.nguyen/gonext-local-worker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.86",
|
|
4
4
|
"description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|