@tiens.nguyen/gonext-local-worker 1.0.90 → 1.0.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -215,8 +215,8 @@ def _summarise_step(step_log):
215
215
  else:
216
216
  parts.append(f"→ Error: {err[:120]}")
217
217
 
218
- label = f"Step {step_num}: " if step_num is not None else ""
219
- return label + (" | ".join(parts) if parts else "thinking…")
218
+ # No numeric "Step N:" prefix show only the semantic action.
219
+ return (" | ".join(parts) if parts else "thinking…")
220
220
 
221
221
 
222
222
  # Keywords that strongly indicate the user wants to make an HTTP/network request,
@@ -241,9 +241,13 @@ def _route(task_text: str, base_url: str, api_key: str, model_id: str) -> bool:
241
241
  Fast-path: if the user explicitly mentions network/request keywords → agent.
242
242
  Otherwise: ask the model to classify.
243
243
  """
244
+ # Show the routing stage in the web Thinking panel.
245
+ _emit({"type": "step", "text": "Routing your request…"})
246
+
244
247
  # Fast-path: explicit HTTP/network intent overrides the model classifier.
245
248
  if _AGENT_KEYWORDS.search(task_text):
246
249
  _log(f"router → YES (keyword match)")
250
+ _emit({"type": "step", "text": "→ Agent mode (needs tools)"})
247
251
  return True
248
252
 
249
253
  try:
@@ -271,9 +275,12 @@ def _route(task_text: str, base_url: str, api_key: str, model_id: str) -> bool:
271
275
  )
272
276
  answer = (resp.choices[0].message.content or "").strip().upper()
273
277
  _log(f"router → {answer!r} (model)")
274
- return answer.startswith("Y")
278
+ is_agent = answer.startswith("Y")
279
+ _emit({"type": "step", "text": "→ Agent mode (needs tools)" if is_agent else "→ Chat reply"})
280
+ return is_agent
275
281
  except Exception as e: # noqa: BLE001
276
282
  _log(f"router error: {e} — defaulting to agent")
283
+ _emit({"type": "step", "text": "→ Agent mode (needs tools)"})
277
284
  return True
278
285
 
279
286
 
@@ -339,6 +346,18 @@ def _plain_reply(messages: list, base_url: str, api_key: str, model_id: str) ->
339
346
  return f"[Error: {e}]"
340
347
 
341
348
 
349
+ def _strip_tool_tags(text: str) -> str:
350
+ """Remove the internal hint tags we append to tool output (e.g. '[SUCCESS …]',
351
+ '[NOTE: …]', 'Note: This URL failed …') so they never leak into the user reply."""
352
+ out = []
353
+ for ln in (text or "").splitlines():
354
+ s = ln.strip()
355
+ if s.startswith("[SUCCESS") or s.startswith("[NOTE:") or s.startswith("Note: This URL failed"):
356
+ continue
357
+ out.append(ln)
358
+ return "\n".join(out).strip()
359
+
360
+
342
361
  def run_agent_chat(cfg):
343
362
  try:
344
363
  from smolagents import CodeAgent, OpenAIServerModel, tool
@@ -373,7 +392,11 @@ def run_agent_chat(cfg):
373
392
  else:
374
393
  coding_base_url = agent_base_url
375
394
  coding_model_id = agent_model_id
376
- max_steps = int(cfg.get("maxSteps") or 5)
395
+ # Strict single-shot: exactly ONE agent model call per message. The single code
396
+ # block must call a tool AND final_answer together — no multi-step ReAct loop.
397
+ # If the model fails to call final_answer, the max-steps fallback below returns
398
+ # the last tool observation deterministically (no extra model call).
399
+ max_steps = 1
377
400
 
378
401
  _log(
379
402
  f"start model={agent_model_id!r} base={agent_base_url!r} "
@@ -451,6 +474,7 @@ def run_agent_chat(cfg):
451
474
 
452
475
  if not needs_agent:
453
476
  _log("router: plain chat (no HTTP needed)")
477
+ _emit({"type": "step", "text": "Composing a reply…"})
454
478
  answer = _plain_reply(messages, agent_base_url, agent_api_key, agent_model_id)
455
479
  _log(f"plain reply: {len(answer)} chars")
456
480
  _emit({"type": "final", "text": answer})
@@ -458,20 +482,23 @@ def run_agent_chat(cfg):
458
482
 
459
483
  # Agent path — from here all step events go into <think>.
460
484
  _log("router: agent (HTTP tool use needed)")
461
- _emit({"type": "step", "text": "Planning HTTP request…"})
485
+ _emit({"type": "step", "text": "Choosing a tool…"})
462
486
 
463
487
  # Prepend explicit tool instructions so small models pick the right tool, never
464
488
  # fabricate URLs/responses, and always terminate with final_answer().
465
489
  from datetime import datetime as _dt_now
466
490
  now_str = _dt_now.now().astimezone().strftime("%A, %d %B %Y, %H:%M %Z")
467
491
  tool_hint = (
468
- f"Current date/time: {now_str}.\n\n"
492
+ "YOU HAVE EXACTLY ONE TURN. Read the TASK above. In a single code block, call "
493
+ "the ONE tool that fits THAT task, then pass its result to final_answer(). "
494
+ "Do not plan multiple steps.\n\n"
469
495
  "You have THREE tools:\n"
470
496
  " 1. http_request(method, url, headers='', body='', username='', password='') — "
471
497
  "call a SPECIFIC known API/URL.\n"
472
- " 2. web_search(query) — look up facts when you do NOT already have a real URL. "
473
- "Returns a summary + source.\n"
474
- " 3. get_current_datetime(timezone='') — current date/time (no HTTP needed).\n"
498
+ " 2. web_search(query) — look up facts about a person, place, thing, or topic "
499
+ "when you do NOT already have a real URL. Returns a summary + source.\n"
500
+ f" 3. get_current_datetime(timezone='') — current date/time ONLY (now: {now_str}). "
501
+ "Use this ONLY when the task explicitly asks for the date or time.\n"
475
502
  "\n"
476
503
  "http_request RETURN FORMAT: 'HTTP 200\\n{body}' — first line is 'HTTP <code>', body follows.\n"
477
504
  "\n"
@@ -485,9 +512,10 @@ def run_agent_chat(cfg):
485
512
  " response = http_request('GET', url, headers='{\"Authorization\": \"Bearer TOKEN\"}')\n"
486
513
  " final_answer(response)\n"
487
514
  "\n"
488
- "CHOOSING A TOOL:\n"
489
- "- Date/time question -> get_current_datetime(); do NOT use http_request.\n"
490
- "- 'find' / 'look up' / 'what is' / general knowledge -> web_search(query).\n"
515
+ "CHOOSING A TOOL (match the TASK, not these examples):\n"
516
+ "- ONLY a date/time question (e.g. 'what is the date today') -> get_current_datetime().\n"
517
+ "- 'who is' / 'what is' / 'tell me about' / a person / place / topic / general "
518
+ "knowledge -> web_search(query).\n"
491
519
  "- A specific known API/URL was given -> http_request().\n"
492
520
  "\n"
493
521
  "RULES:\n"
@@ -499,11 +527,23 @@ def run_agent_chat(cfg):
499
527
  "- If a tool returns 'Error:' or HTTP 4xx/5xx, try a DIFFERENT approach, not the same URL.\n"
500
528
  "- Do NOT put final_answer outside the code block.\n\n"
501
529
  )
502
- task_with_hint = tool_hint + "Task: " + task_text
530
+ # Lead with the TASK so the weak model anchors on what's actually being asked —
531
+ # not on the tool reference below. (Previously the hint led with the date, and the
532
+ # 3B model treated every message as a date question.)
533
+ task_with_hint = (
534
+ "TASK (answer THIS, choose the tool that fits it):\n"
535
+ f"{task_text}\n\n"
536
+ "----- TOOL REFERENCE -----\n"
537
+ + tool_hint
538
+ )
503
539
 
504
540
  # Track URLs that have already failed so we don't retry dead endpoints across steps.
505
541
  _failed_urls: set = set()
506
542
 
543
+ # Remember the last tool output so the single-shot fallback + the deterministic
544
+ # final formatting can report exactly what a tool returned (no extra model call).
545
+ _last_obs: dict = {"text": ""}
546
+
507
547
  @tool
508
548
  def http_request(method: str, url: str, headers: str = "", body: str = "",
509
549
  username: str = "", password: str = "") -> str:
@@ -561,6 +601,7 @@ def run_agent_chat(cfg):
561
601
  result = result + "\n[SUCCESS — call final_answer(response) now, do not parse or retry]"
562
602
  _emit({"type": "step", "text": f"HTTP {method.upper()} {url} → {status_line}"})
563
603
  _log(f"http_request {method.upper()} {url} → {result[:80]}")
604
+ _last_obs["text"] = result
564
605
  return result
565
606
 
566
607
  @tool
@@ -582,6 +623,7 @@ def run_agent_chat(cfg):
582
623
  out = now.strftime("%A, %d %B %Y, %H:%M:%S %Z")
583
624
  _emit({"type": "step", "text": f"Current date/time → {out}"})
584
625
  _log(f"get_current_datetime({timezone!r}) → {out}")
626
+ _last_obs["text"] = out
585
627
  return out
586
628
 
587
629
  @tool
@@ -597,6 +639,7 @@ def run_agent_chat(cfg):
597
639
  _emit({"type": "step", "text": f"Searching the web → {query[:80]}"})
598
640
  result = _web_search_impl(query)
599
641
  _log(f"web_search {query[:60]!r} → {result[:80]}")
642
+ _last_obs["text"] = result
600
643
  return result
601
644
 
602
645
  def step_callback(step_log):
@@ -658,13 +701,28 @@ def run_agent_chat(cfg):
658
701
  _log(f"MODEL REQUEST log error: {e}")
659
702
  return ck
660
703
 
704
+ # Single-shot agent: if the one model call doesn't end in final_answer(),
705
+ # smolagents would normally make an EXTRA model call (provide_final_answer) to
706
+ # synthesize one. We override that to return the last tool observation
707
+ # deterministically — keeping the agent to EXACTLY ONE model call, and never
708
+ # corrupting exact tool output (dates/numbers) the way a weak model would.
709
+ class _SingleShotAgent(CodeAgent):
710
+ def provide_final_answer(self, task, *args, **kwargs):
711
+ from smolagents.models import ChatMessage, MessageRole
712
+ text = (_last_obs.get("text") or "").strip()
713
+ if not text:
714
+ text = ("I couldn't complete that in one step. Please rephrase, or give "
715
+ "a specific URL/API to call.")
716
+ _log(f"single-shot fallback (no model call) → {text[:80]}")
717
+ return ChatMessage(role=MessageRole.ASSISTANT, content=text)
718
+
661
719
  try:
662
720
  model = _LoggingModel(
663
721
  model_id=coding_model_id,
664
722
  api_base=coding_base_url,
665
723
  api_key=agent_api_key,
666
724
  )
667
- agent = CodeAgent(
725
+ agent = _SingleShotAgent(
668
726
  tools=[http_request, web_search, get_current_datetime],
669
727
  model=model,
670
728
  max_steps=max_steps,
@@ -674,15 +732,14 @@ def run_agent_chat(cfg):
674
732
  )
675
733
  with contextlib.redirect_stdout(sys.stderr):
676
734
  result = agent.run(task_with_hint)
677
- # Summarize with the AGENT (coding) model that ran the tools — not the chat
678
- # model so it faithfully reports exact tool output (dates/numbers) instead
679
- # of paraphrasing and corrupting it. Falls back to the chat model when no
680
- # dedicated coding server is configured (coding_* default to agent_*).
681
- final_text = _summarize_result(
682
- task_text, str(result).strip(),
683
- coding_base_url, agent_api_key, coding_model_id
684
- )
685
- _log(f"done: {len(final_text)} chars")
735
+ # Deterministic final formatting NO summarizer model call. The agent's
736
+ # final_answer (or the single-shot fallback above) already holds exact tool
737
+ # output; we just strip the internal hint tags we appended to tool results so
738
+ # they don't leak to the user. This permanently fixes the date-corruption a
739
+ # weak summarizer model used to introduce.
740
+ _emit({"type": "step", "text": "Composing answer…"})
741
+ final_text = _strip_tool_tags(str(result).strip()) or "[No result]"
742
+ _log(f"done (deterministic, no summarizer call): {len(final_text)} chars")
686
743
  _emit({"type": "final", "text": final_text})
687
744
  except Exception as e: # noqa: BLE001
688
745
  _log(f"agent error: {e}")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tiens.nguyen/gonext-local-worker",
3
- "version": "1.0.90",
3
+ "version": "1.0.92",
4
4
  "description": "Polls GoNext cloud API for async local LLM jobs and runs them against Ollama/OpenAI-compatible servers on this Mac",
5
5
  "type": "module",
6
6
  "license": "MIT",