@tiens.nguyen/gonext-local-worker 1.0.85 → 1.0.87
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/gonext_agent_chat.py +88 -20
- package/package.json +1 -1
package/gonext_agent_chat.py
CHANGED
|
@@ -302,29 +302,67 @@ def run_agent_chat(cfg):
|
|
|
302
302
|
f"codeModel={coding_model_id!r} codeBase={coding_base_url!r} maxSteps={max_steps}"
|
|
303
303
|
)
|
|
304
304
|
|
|
305
|
-
# Build task from the conversation history.
|
|
306
|
-
#
|
|
307
|
-
#
|
|
308
|
-
#
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
for
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
305
|
+
# Build the task from the conversation history. We include the FULL conversation
|
|
306
|
+
# (both user AND assistant turns) so the agent remembers what it already did —
|
|
307
|
+
# e.g. data it fetched on a previous turn. Assistant turns are condensed (drop
|
|
308
|
+
# <think> reasoning; clip long raw HTTP dumps), and we keep the most recent turns
|
|
309
|
+
# within a character budget so we never overflow the model's context window.
|
|
310
|
+
# ~8000 chars ≈ 2k tokens, tiny against Qwen2.5-Coder-7B's 32k context, leaving
|
|
311
|
+
# ample room for smolagents' own system prompt + step memory (HTTP observations).
|
|
312
|
+
HISTORY_CHAR_BUDGET = 8000
|
|
313
|
+
think_re = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
|
|
314
|
+
|
|
315
|
+
def _condense(role, content):
|
|
316
|
+
text = (content or "").strip()
|
|
317
|
+
if role == "assistant":
|
|
318
|
+
text = think_re.sub("", text).strip()
|
|
319
|
+
# Raw HTTP dumps add little conversational value — keep only a snippet.
|
|
320
|
+
if text.startswith("HTTP "):
|
|
321
|
+
text = text[:500]
|
|
322
|
+
return text
|
|
323
|
+
|
|
324
|
+
# The latest user message is the current task; everything before it is history.
|
|
325
|
+
last_user_idx = -1
|
|
326
|
+
for i, m in enumerate(messages):
|
|
327
|
+
if m.get("role") == "user":
|
|
328
|
+
last_user_idx = i
|
|
329
|
+
if last_user_idx < 0:
|
|
320
330
|
_emit({"type": "final", "text": "[No user message found in history]"})
|
|
321
331
|
return
|
|
332
|
+
task_text = (messages[last_user_idx].get("content") or "").strip()
|
|
322
333
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
334
|
+
# Walk prior turns newest-first, keeping condensed lines until the budget is
|
|
335
|
+
# spent, then restore chronological (oldest→newest) order.
|
|
336
|
+
history_lines = []
|
|
337
|
+
used = 0
|
|
338
|
+
for m in reversed(messages[:last_user_idx]):
|
|
339
|
+
role = m.get("role", "")
|
|
340
|
+
if role not in ("user", "assistant"):
|
|
341
|
+
continue
|
|
342
|
+
text = _condense(role, m.get("content", ""))
|
|
343
|
+
if not text:
|
|
344
|
+
continue
|
|
345
|
+
line = f"{'User' if role == 'user' else 'Assistant'}: {text}"
|
|
346
|
+
if used + len(line) > HISTORY_CHAR_BUDGET:
|
|
347
|
+
break
|
|
348
|
+
history_lines.append(line)
|
|
349
|
+
used += len(line)
|
|
350
|
+
history_lines.reverse()
|
|
351
|
+
|
|
352
|
+
if history_lines:
|
|
353
|
+
convo = "\n".join(history_lines)
|
|
354
|
+
task_text = (
|
|
355
|
+
"Conversation so far (oldest to newest):\n"
|
|
356
|
+
f"{convo}\n\nCurrent task: {task_text}"
|
|
357
|
+
)
|
|
326
358
|
|
|
327
|
-
_log(
|
|
359
|
+
_log(
|
|
360
|
+
f"history: {len(history_lines)} prior turn(s), {used} chars "
|
|
361
|
+
f"(budget {HISTORY_CHAR_BUDGET}) — exact turns sent to the agent below:"
|
|
362
|
+
)
|
|
363
|
+
for j, ln in enumerate(history_lines):
|
|
364
|
+
_log(f" history[{j}]: {ln[:240]}")
|
|
365
|
+
_log(f"current task (latest user message): {task_text.rsplit('Current task: ', 1)[-1][:240]!r}")
|
|
328
366
|
|
|
329
367
|
# Route: ask the model if this task needs HTTP tool use.
|
|
330
368
|
needs_agent = _route(task_text, agent_base_url, agent_api_key, agent_model_id)
|
|
@@ -450,8 +488,38 @@ def run_agent_chat(cfg):
|
|
|
450
488
|
_log(f"step {step_num}: {text[:200]}")
|
|
451
489
|
_emit({"type": "step", "text": text})
|
|
452
490
|
|
|
491
|
+
# Wrap the model so we can see EXACTLY what smolagents posts to the model
|
|
492
|
+
# server on every step — including its own system prompt, the task we passed,
|
|
493
|
+
# and any step memory it accumulates. completion_kwargs["messages"] here is the
|
|
494
|
+
# literal messages array sent to /v1/chat/completions.
|
|
495
|
+
class _LoggingModel(OpenAIServerModel):
|
|
496
|
+
def _prepare_completion_kwargs(self, *args, **kwargs):
|
|
497
|
+
ck = super()._prepare_completion_kwargs(*args, **kwargs)
|
|
498
|
+
try:
|
|
499
|
+
msgs = ck.get("messages", []) or []
|
|
500
|
+
_log(f"=== MODEL REQUEST: {len(msgs)} message(s) sent to the model ===")
|
|
501
|
+
for i, m in enumerate(msgs):
|
|
502
|
+
role = m.get("role") if isinstance(m, dict) else getattr(m, "role", "?")
|
|
503
|
+
content = (
|
|
504
|
+
m.get("content") if isinstance(m, dict)
|
|
505
|
+
else getattr(m, "content", "")
|
|
506
|
+
)
|
|
507
|
+
if isinstance(content, list):
|
|
508
|
+
text = " ".join(
|
|
509
|
+
(c.get("text", "") if isinstance(c, dict) else str(c))
|
|
510
|
+
for c in content
|
|
511
|
+
)
|
|
512
|
+
else:
|
|
513
|
+
text = str(content)
|
|
514
|
+
text = text.replace("\n", " ")
|
|
515
|
+
_log(f" [{i}] {role} ({len(text)} chars): {text[:600]}")
|
|
516
|
+
_log("=== END MODEL REQUEST ===")
|
|
517
|
+
except Exception as e: # noqa: BLE001
|
|
518
|
+
_log(f"MODEL REQUEST log error: {e}")
|
|
519
|
+
return ck
|
|
520
|
+
|
|
453
521
|
try:
|
|
454
|
-
model =
|
|
522
|
+
model = _LoggingModel(
|
|
455
523
|
model_id=coding_model_id,
|
|
456
524
|
api_base=coding_base_url,
|
|
457
525
|
api_key=agent_api_key,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tiens.nguyen/gonext-local-worker",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.87",
|
|
4
4
|
"description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|