abstractagent 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +1,34 @@
1
- """AbstractRuntime adapter for ReAct-like agents."""
1
+ """AbstractRuntime adapter for canonical ReAct agents.
2
+
3
+ This adapter implements a deterministic ReAct loop:
4
+
5
+ init → reason → parse → (act → observe → reason)* → done
6
+
7
+ Policy (for now):
8
+ - Do NOT truncate ReAct loop context (history/scratchpad).
9
+ - Do NOT cap tool-steps to tiny token budgets.
10
+ - Do NOT require "FINAL:" markers or other termination hacks.
11
+
12
+ The loop continues whenever the model emits tool calls.
13
+ It ends only when the model emits **no tool calls** and provides an answer.
14
+ """
2
15
 
3
16
  from __future__ import annotations
4
17
 
5
18
  import hashlib
6
19
  import json
20
+ import re
7
21
  from typing import Any, Callable, Dict, List, Optional
8
22
 
9
23
  from abstractcore.tools import ToolCall
10
24
  from abstractruntime import Effect, EffectType, RunState, StepPlan, WorkflowSpec
11
25
  from abstractruntime.core.vars import ensure_limits, ensure_namespaces
12
- from abstractruntime.memory.active_context import ActiveContextPolicy
13
26
 
27
+ from .generation_params import runtime_llm_params
28
+ from .media import extract_media_from_context
14
29
  from ..logic.react import ReActLogic
15
30
 
31
+
16
32
  def _new_message(
17
33
  ctx: Any,
18
34
  *,
@@ -42,12 +58,47 @@ def _new_message(
42
58
  }
43
59
 
44
60
 
45
- def ensure_react_vars(run: RunState) -> tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
46
- """Ensure namespaced vars exist and migrate legacy flat keys in-place.
61
+ def _new_assistant_message_with_tool_calls(
62
+ ctx: Any,
63
+ *,
64
+ content: str,
65
+ tool_calls: List[ToolCall],
66
+ metadata: Optional[Dict[str, Any]] = None,
67
+ ) -> Dict[str, Any]:
68
+ """Create an assistant message that preserves tool call metadata for OpenAI transcripts."""
69
+
70
+ msg = _new_message(ctx, role="assistant", content=content, metadata=metadata)
71
+
72
+ tc_payload: list[dict[str, Any]] = []
73
+ for i, tc in enumerate(tool_calls):
74
+ if not isinstance(tc, ToolCall):
75
+ continue
76
+ name = str(tc.name or "").strip()
77
+ if not name:
78
+ continue
79
+ call_id = tc.call_id
80
+ call_id_str = str(call_id).strip() if call_id is not None else ""
81
+ if not call_id_str:
82
+ call_id_str = f"call_{i+1}"
83
+ args = tc.arguments if isinstance(tc.arguments, dict) else {}
84
+ tc_payload.append(
85
+ {
86
+ "type": "function",
87
+ "id": call_id_str,
88
+ "function": {"name": name, "arguments": json.dumps(args, ensure_ascii=False)},
89
+ }
90
+ )
91
+
92
+ if tc_payload:
93
+ msg["tool_calls"] = tc_payload
94
+ return msg
95
+
96
+
97
+ def ensure_react_vars(
98
+ run: RunState,
99
+ ) -> tuple[Dict[str, Any], Dict[str, Any], Dict[str, Any], Dict[str, Any], Dict[str, Any]]:
100
+ """Ensure namespaced vars exist and migrate legacy flat keys in-place."""
47
101
 
48
- Returns:
49
- Tuple of (context, scratchpad, runtime_ns, temp, limits) dicts.
50
- """
51
102
  ensure_namespaces(run.vars)
52
103
  limits = ensure_limits(run.vars)
53
104
  context = run.vars["context"]
@@ -75,6 +126,9 @@ def ensure_react_vars(run: RunState) -> tuple[Dict[str, Any], Dict[str, Any], Di
75
126
  if not isinstance(runtime_ns.get("inbox"), list):
76
127
  runtime_ns["inbox"] = []
77
128
 
129
+ if not isinstance(scratchpad.get("cycles"), list):
130
+ scratchpad["cycles"] = []
131
+
78
132
  iteration = scratchpad.get("iteration")
79
133
  if not isinstance(iteration, int):
80
134
  try:
@@ -90,13 +144,9 @@ def ensure_react_vars(run: RunState) -> tuple[Dict[str, Any], Dict[str, Any], Di
90
144
  scratchpad["max_iterations"] = int(max_iterations)
91
145
  except (TypeError, ValueError):
92
146
  scratchpad["max_iterations"] = 25
93
-
94
147
  if scratchpad["max_iterations"] < 1:
95
148
  scratchpad["max_iterations"] = 1
96
149
 
97
- # Track whether any external tools were actually executed during this run.
98
- # This is used to reliably trigger a final "synthesis" pass so the agent
99
- # returns a user-facing answer instead of echoing tool observations.
100
150
  used_tools = scratchpad.get("used_tools")
101
151
  if not isinstance(used_tools, bool):
102
152
  scratchpad["used_tools"] = bool(used_tools) if used_tools is not None else False
@@ -111,6 +161,462 @@ def _compute_toolset_id(tool_specs: List[Dict[str, Any]]) -> str:
111
161
  return f"ts_{digest}"
112
162
 
113
163
 
164
+ def _tool_call_signature(name: str, args: Any) -> str:
165
+ def _abbrev(v: Any, *, max_chars: int = 140) -> str:
166
+ if v is None:
167
+ return ""
168
+ s = str(v)
169
+ if len(s) <= max_chars:
170
+ return s
171
+ return f"{s[: max(0, max_chars - 1)]}…"
172
+
173
+ def _hash_str(s: str) -> str:
174
+ try:
175
+ return hashlib.sha256(s.encode("utf-8")).hexdigest()[:12]
176
+ except Exception:
177
+ return "sha256_err"
178
+
179
+ n = str(name or "").strip() or "tool"
180
+ if not isinstance(args, dict) or not args:
181
+ return f"{n}()"
182
+
183
+ # Special-case common large-argument tools so the system prompt doesn't explode.
184
+ if n == "write_file":
185
+ fp = args.get("file_path") if isinstance(args.get("file_path"), str) else args.get("path")
186
+ mode = args.get("mode") if isinstance(args.get("mode"), str) else "w"
187
+ content = args.get("content")
188
+ if isinstance(content, str):
189
+ tag = f"<str len={len(content)} sha256={_hash_str(content)}>"
190
+ else:
191
+ tag = "<str len=0>"
192
+ return f"write_file(file_path={_abbrev(fp)!r}, mode={_abbrev(mode)!r}, content={tag})"
193
+
194
+ if n == "edit_file":
195
+ fp = args.get("file_path") if isinstance(args.get("file_path"), str) else args.get("path")
196
+ edits = args.get("edits")
197
+ n_edits = len(edits) if isinstance(edits, list) else 0
198
+ return f"edit_file(file_path={_abbrev(fp)!r}, edits={n_edits})"
199
+
200
+ if n == "fetch_url":
201
+ url = args.get("url")
202
+ include_full = args.get("include_full_content")
203
+ return f"fetch_url(url={_abbrev(url)!r}, include_full_content={include_full})"
204
+
205
+ if n == "web_search":
206
+ q = args.get("query")
207
+ num = args.get("num_results")
208
+ return f"web_search(query={_abbrev(q)!r}, num_results={num})"
209
+
210
+ if n == "execute_command":
211
+ cmd = args.get("command")
212
+ return f"execute_command(command={_abbrev(cmd, max_chars=220)!r})"
213
+
214
+ # Generic, but bounded: hash long strings to avoid leaking large blobs into the prompt.
215
+ summarized: Dict[str, Any] = {}
216
+ for k, v in args.items():
217
+ if isinstance(v, str) and len(v) > 160:
218
+ summarized[str(k)] = f"<str len={len(v)} sha256={_hash_str(v)}>"
219
+ else:
220
+ summarized[str(k)] = v
221
+ try:
222
+ arg_str = json.dumps(summarized, ensure_ascii=False, sort_keys=True)
223
+ except Exception:
224
+ arg_str = str(summarized)
225
+ arg_str = _abbrev(arg_str, max_chars=260)
226
+ return f"{n}({arg_str})"
227
+
228
+
229
+ def _tool_call_fingerprint(name: str, args: Any) -> str:
230
+ """Return a stable, bounded fingerprint for tool-call repeat detection.
231
+
232
+ Important: do not embed large string blobs (file contents / web pages) in the fingerprint.
233
+ """
234
+
235
+ def _hash_str(s: str) -> str:
236
+ try:
237
+ return hashlib.sha256(s.encode("utf-8")).hexdigest()
238
+ except Exception:
239
+ return "sha256_err"
240
+
241
+ def _canon(v: Any) -> Any:
242
+ if v is None or isinstance(v, (bool, int, float)):
243
+ return v
244
+ if isinstance(v, str):
245
+ if len(v) <= 200:
246
+ return v
247
+ return {"_type": "str", "len": len(v), "sha256": _hash_str(v)[:16]}
248
+ if isinstance(v, list):
249
+ return [_canon(x) for x in v[:25]]
250
+ if isinstance(v, dict):
251
+ out: Dict[str, Any] = {}
252
+ for k in sorted(v.keys(), key=lambda x: str(x)):
253
+ out[str(k)] = _canon(v.get(k))
254
+ return out
255
+ return {"_type": type(v).__name__}
256
+
257
+ payload = {"name": str(name or "").strip(), "args": _canon(args if isinstance(args, dict) else {})}
258
+ try:
259
+ raw = json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
260
+ except Exception:
261
+ raw = str(payload)
262
+ try:
263
+ return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:16]
264
+ except Exception:
265
+ return "fingerprint_err"
266
+
267
+
268
+ _FINALISH_RE = re.compile(
269
+ r"(?i)\b(final answer|here is|here['’]s|here are|below is|below are|done|completed|in summary|summary|result)\b"
270
+ )
271
+
272
+ _WAITING_RE = re.compile(
273
+ r"(?i)\b("
274
+ r"let me know|your next step|what would you like|tell me|"
275
+ r"i can help|i'm ready|i am ready|"
276
+ r"i'll wait|i will wait|waiting for|"
277
+ r"no tool calls?"
278
+ r")\b"
279
+ )
280
+
281
+ _DEFERRED_ACTION_INTENT_RE = re.compile(
282
+ # Only treat as "missing tool calls" when the model *commits to acting*
283
+ # (first-person intent) rather than providing a final answer.
284
+ r"(?i)\b(i will|i['’]?ll|let me|i am going to|i['’]?m going to|i need to)\b"
285
+ )
286
+
287
+ _DEFERRED_ACTION_VERB_RE = re.compile(
288
+ # Verbs that typically imply external actions (tools/files/web/edits).
289
+ r"(?i)\b(read|open|search|list|skim|inspect|explore|scan|run|execute|edit|fetch|download|creat(?:e|ing))\b"
290
+ )
291
+
292
+ _TOOL_CALL_MARKERS = ("<function_call>", "<tool_call>", "<|tool_call|>", "```tool_code")
293
+
294
+
295
+ def _contains_tool_call_markup(text: str) -> bool:
296
+ s = str(text or "")
297
+ if not s.strip():
298
+ return False
299
+ low = s.lower()
300
+ return any(m in low for m in _TOOL_CALL_MARKERS)
301
+
302
+
303
+ _TOOL_CALL_STRIP_RE = re.compile(
304
+ r"(?is)"
305
+ r"<function_call>\s*.*?\s*</function_call>|"
306
+ r"<tool_call>\s*.*?\s*</tool_call>|"
307
+ r"<\|tool_call\|>.*?<\|/tool_call\|>|"
308
+ r"```tool_code\s*.*?```"
309
+ )
310
+
311
+
312
+ def _strip_tool_call_markup(text: str) -> str:
313
+ raw = str(text or "")
314
+ if not raw.strip():
315
+ return ""
316
+ try:
317
+ return _TOOL_CALL_STRIP_RE.sub("", raw)
318
+ except Exception:
319
+ return raw
320
+
321
+
322
+ def _looks_like_deferred_action(text: str) -> bool:
323
+ """Return True when the model claims it will take actions but emits no tool calls.
324
+
325
+ This is intentionally conservative: false positives waste iterations and can "force"
326
+ unnecessary tool calls. It should only trigger when the assistant message strongly
327
+ suggests it is about to act (not answer).
328
+ """
329
+ s = str(text or "").strip()
330
+ if not s:
331
+ return False
332
+ # If the model is explicitly waiting for user direction, that's a valid final response.
333
+ if _WAITING_RE.search(s):
334
+ return False
335
+ # Common “final answer” framing (incl. typographic apostrophes).
336
+ if _FINALISH_RE.search(s):
337
+ return False
338
+ # If the model already produced a structured answer (headings/sections), don't retry.
339
+ if re.search(r"(?m)^(#{1,6}\s+\\S|\\*\\*\\S)", s):
340
+ return False
341
+ # Must contain first-person intent *and* an action-ish verb.
342
+ if not _DEFERRED_ACTION_INTENT_RE.search(s):
343
+ return False
344
+ if not _DEFERRED_ACTION_VERB_RE.search(s):
345
+ return False
346
+ return True
347
+
348
+
349
+ def _push_inbox(runtime_ns: Dict[str, Any], content: str) -> None:
350
+ if not isinstance(runtime_ns, dict):
351
+ return
352
+ inbox = runtime_ns.get("inbox")
353
+ if not isinstance(inbox, list):
354
+ inbox = []
355
+ runtime_ns["inbox"] = inbox
356
+ inbox.append({"role": "system", "content": str(content or "")})
357
+
358
+
359
+ def _drain_inbox(runtime_ns: Dict[str, Any]) -> str:
360
+ inbox = runtime_ns.get("inbox")
361
+ if not isinstance(inbox, list) or not inbox:
362
+ return ""
363
+ parts: list[str] = []
364
+ for m in inbox:
365
+ if not isinstance(m, dict):
366
+ continue
367
+ c = m.get("content")
368
+ if isinstance(c, str) and c.strip():
369
+ parts.append(c.strip())
370
+ runtime_ns["inbox"] = []
371
+ return "\n".join(parts).strip()
372
+
373
+
374
+ def _boolish(value: Any) -> bool:
375
+ """Best-effort coercion for runtime flags (bool/int/str)."""
376
+ if isinstance(value, bool):
377
+ return value
378
+ if isinstance(value, (int, float)) and not isinstance(value, bool):
379
+ return value != 0
380
+ if isinstance(value, str):
381
+ return value.strip().lower() in {"1", "true", "yes", "y", "on", "enabled"}
382
+ return False
383
+
384
+ def _system_prompt_override(runtime_ns: Dict[str, Any]) -> Optional[str]:
385
+ raw = runtime_ns.get("system_prompt") if isinstance(runtime_ns, dict) else None
386
+ if isinstance(raw, str) and raw.strip():
387
+ return raw.strip()
388
+ return None
389
+
390
+
391
+ def _system_prompt_extra(runtime_ns: Dict[str, Any]) -> Optional[str]:
392
+ raw = runtime_ns.get("system_prompt_extra") if isinstance(runtime_ns, dict) else None
393
+ if isinstance(raw, str) and raw.strip():
394
+ return raw.strip()
395
+ return None
396
+
397
+
398
+ def _compose_system_prompt(runtime_ns: Dict[str, Any], *, base: str) -> str:
399
+ override = _system_prompt_override(runtime_ns)
400
+ extra = _system_prompt_extra(runtime_ns)
401
+ sys = override if override is not None else base
402
+ if extra:
403
+ sys = f"{sys.rstrip()}\n\nAdditional system instructions:\n{extra}"
404
+ return sys.strip()
405
+
406
+
407
+ def _max_output_tokens(runtime_ns: Dict[str, Any], limits: Dict[str, Any]) -> Optional[int]:
408
+ # Canonical limit: _limits.max_output_tokens (None = unset).
409
+ raw = None
410
+ if isinstance(limits, dict) and "max_output_tokens" in limits:
411
+ raw = limits.get("max_output_tokens")
412
+ if raw is None and isinstance(runtime_ns, dict):
413
+ raw = runtime_ns.get("max_output_tokens")
414
+ if raw is None:
415
+ return None
416
+ try:
417
+ val = int(raw)
418
+ except Exception:
419
+ return None
420
+ return val if val > 0 else None
421
+
422
+
423
+ def _render_cycles_for_system_prompt(scratchpad: Dict[str, Any]) -> str:
424
+ cycles = scratchpad.get("cycles")
425
+ if not isinstance(cycles, list) or not cycles:
426
+ return ""
427
+
428
+ # Keep the system prompt bounded: tool outputs can be very large (fetch_url/web_search).
429
+ max_cycles = 6
430
+ max_thought_chars = 600
431
+ max_obs_chars = 220
432
+
433
+ view = [c for c in cycles if isinstance(c, dict)]
434
+ if len(view) > max_cycles:
435
+ view = view[-max_cycles:]
436
+
437
+ lines: list[str] = []
438
+ for c in view:
439
+ i = c.get("i")
440
+ thought = str(c.get("thought") or "").strip()
441
+ if len(thought) > max_thought_chars:
442
+ thought = f"{thought[: max(0, max_thought_chars - 1)]}…"
443
+ tcs = c.get("tool_calls")
444
+ obs = c.get("observations")
445
+ if i is None:
446
+ continue
447
+ lines.append(f"[cycle {i}]")
448
+ if thought:
449
+ lines.append(f"thought: {thought}")
450
+ if isinstance(tcs, list) and tcs:
451
+ sigs: list[str] = []
452
+ for tc in tcs:
453
+ if isinstance(tc, dict):
454
+ sigs.append(_tool_call_signature(tc.get("name", ""), tc.get("arguments")))
455
+ if sigs:
456
+ lines.append("actions:")
457
+ for s in sigs:
458
+ lines.append(f"- {s}")
459
+ if isinstance(obs, list) and obs:
460
+ lines.append("observations:")
461
+ for o in obs:
462
+ if not isinstance(o, dict):
463
+ continue
464
+ name = str(o.get("name") or "tool")
465
+ ok = bool(o.get("success"))
466
+ out = o.get("output")
467
+ err = o.get("error")
468
+ if not ok:
469
+ text = str(err or out or "").strip()
470
+ else:
471
+ if isinstance(out, dict):
472
+ # Prefer metadata-ish fields; do not dump full `rendered` bodies into the prompt.
473
+ url = out.get("url") if isinstance(out.get("url"), str) else None
474
+ status = out.get("status_code") if out.get("status_code") is not None else None
475
+ content_type = out.get("content_type") if isinstance(out.get("content_type"), str) else None
476
+ rendered = out.get("rendered") if isinstance(out.get("rendered"), str) else None
477
+ rendered_len = len(rendered) if isinstance(rendered, str) else None
478
+ parts: list[str] = []
479
+ if url:
480
+ parts.append(f"url={url}")
481
+ if status is not None:
482
+ parts.append(f"status={status}")
483
+ if content_type:
484
+ parts.append(f"type={content_type}")
485
+ if rendered_len is not None:
486
+ parts.append(f"rendered_len={rendered_len}")
487
+ text = ", ".join(parts) if parts else f"keys={list(out.keys())[:8]}"
488
+ else:
489
+ text = str(out or "").strip()
490
+ if len(text) > max_obs_chars:
491
+ text = f"{text[: max(0, max_obs_chars - 1)]}…"
492
+ lines.append(f"- [{name}] {'OK' if ok else 'ERR'}: {text}")
493
+ lines.append("")
494
+ return "\n".join(lines).strip()
495
+
496
+
497
+ def _render_cycles_for_conclusion_prompt(scratchpad: Dict[str, Any]) -> str:
498
+ cycles = scratchpad.get("cycles")
499
+ if not isinstance(cycles, list) or not cycles:
500
+ return ""
501
+
502
+ # The conclusion prompt should have access to the full loop trace, but still needs
503
+ # to be bounded (tool outputs may be huge).
504
+ max_cycles = 25
505
+ max_thought_chars = 900
506
+ max_obs_chars = 360
507
+
508
+ view = [c for c in cycles if isinstance(c, dict)]
509
+ total = len(view)
510
+ if total > max_cycles:
511
+ view = view[-max_cycles:]
512
+
513
+ lines: list[str] = []
514
+ if total > len(view):
515
+ lines.append(f"(showing last {len(view)} of {total} cycles)")
516
+ lines.append("")
517
+
518
+ for c in view:
519
+ i = c.get("i")
520
+ if i is None:
521
+ continue
522
+ lines.append(f"[cycle {i}]")
523
+
524
+ thought = str(c.get("thought") or "").strip()
525
+ if len(thought) > max_thought_chars:
526
+ thought = f"{thought[: max(0, max_thought_chars - 1)]}…"
527
+ if thought:
528
+ lines.append(f"thought: {thought}")
529
+
530
+ tcs = c.get("tool_calls")
531
+ if isinstance(tcs, list) and tcs:
532
+ sigs: list[str] = []
533
+ for tc in tcs:
534
+ if isinstance(tc, dict):
535
+ sigs.append(_tool_call_signature(tc.get("name", ""), tc.get("arguments")))
536
+ if sigs:
537
+ lines.append("actions:")
538
+ for s in sigs:
539
+ lines.append(f"- {s}")
540
+
541
+ obs = c.get("observations")
542
+ if isinstance(obs, list) and obs:
543
+ lines.append("observations:")
544
+ for o in obs:
545
+ if not isinstance(o, dict):
546
+ continue
547
+ name = str(o.get("name") or "tool")
548
+ ok = bool(o.get("success"))
549
+ out = o.get("output")
550
+ err = o.get("error")
551
+ if not ok:
552
+ text = str(err or out or "").strip()
553
+ else:
554
+ if isinstance(out, dict):
555
+ url = out.get("url") if isinstance(out.get("url"), str) else None
556
+ status = out.get("status_code") if out.get("status_code") is not None else None
557
+ content_type = out.get("content_type") if isinstance(out.get("content_type"), str) else None
558
+ rendered = out.get("rendered") if isinstance(out.get("rendered"), str) else None
559
+ rendered_len = len(rendered) if isinstance(rendered, str) else None
560
+ parts: list[str] = []
561
+ if url:
562
+ parts.append(f"url={url}")
563
+ if status is not None:
564
+ parts.append(f"status={status}")
565
+ if content_type:
566
+ parts.append(f"type={content_type}")
567
+ if rendered_len is not None:
568
+ parts.append(f"rendered_len={rendered_len}")
569
+ text = ", ".join(parts) if parts else f"keys={list(out.keys())[:8]}"
570
+ else:
571
+ text = str(out or "").strip()
572
+ if len(text) > max_obs_chars:
573
+ text = f"{text[: max(0, max_obs_chars - 1)]}…"
574
+ lines.append(f"- [{name}] {'OK' if ok else 'ERR'}: {text}")
575
+
576
+ lines.append("")
577
+
578
+ return "\n".join(lines).strip()
579
+
580
+
581
+ def _render_final_report(task: str, scratchpad: Dict[str, Any]) -> str:
582
+ cycles = scratchpad.get("cycles")
583
+ if not isinstance(cycles, list):
584
+ cycles = []
585
+ lines: list[str] = []
586
+ lines.append(f"task: {task}")
587
+ lines.append(f"cycles: {len([c for c in cycles if isinstance(c, dict)])}")
588
+ lines.append("")
589
+ for c in cycles:
590
+ if not isinstance(c, dict):
591
+ continue
592
+ i = c.get("i")
593
+ lines.append(f"cycle {i}")
594
+ thought = str(c.get("thought") or "").strip()
595
+ if thought:
596
+ lines.append(f"- thought: {thought}")
597
+ tcs = c.get("tool_calls")
598
+ if isinstance(tcs, list) and tcs:
599
+ lines.append("- actions:")
600
+ for tc in tcs:
601
+ if not isinstance(tc, dict):
602
+ continue
603
+ lines.append(f" - {_tool_call_signature(tc.get('name',''), tc.get('arguments'))}")
604
+ obs = c.get("observations")
605
+ if isinstance(obs, list) and obs:
606
+ lines.append("- observations:")
607
+ for o in obs:
608
+ if not isinstance(o, dict):
609
+ continue
610
+ name = str(o.get("name") or "tool")
611
+ ok = bool(o.get("success"))
612
+ out = o.get("output")
613
+ err = o.get("error")
614
+ text = str(out if ok else (err or out) or "").strip()
615
+ lines.append(f" - [{name}] {'OK' if ok else 'ERR'}: {text}")
616
+ lines.append("")
617
+ return "\n".join(lines).strip()
618
+
619
+
114
620
  def create_react_workflow(
115
621
  *,
116
622
  logic: ReActLogic,
@@ -127,7 +633,6 @@ def create_react_workflow(
127
633
  on_step(step, data)
128
634
 
129
635
  def _current_tool_defs() -> list[Any]:
130
- """Return the current tool definitions from the logic (dynamic)."""
131
636
  defs = getattr(logic, "tools", None)
132
637
  if not isinstance(defs, list):
133
638
  try:
@@ -148,7 +653,6 @@ def create_react_workflow(
148
653
  if isinstance(allowed_tools, list):
149
654
  allow = [str(t).strip() for t in allowed_tools if isinstance(t, str) and t.strip()]
150
655
  return allow if allow else []
151
- # Default allowlist: all tools currently known to the logic (deduped, order preserved).
152
656
  out: list[str] = []
153
657
  seen: set[str] = set()
154
658
  for t in _current_tool_defs():
@@ -160,7 +664,6 @@ def create_react_workflow(
160
664
  return out
161
665
 
162
666
  def _normalize_allowlist(raw: Any) -> list[str]:
163
- items: list[Any]
164
667
  if isinstance(raw, list):
165
668
  items = raw
166
669
  elif isinstance(raw, tuple):
@@ -170,26 +673,20 @@ def create_react_workflow(
170
673
  else:
171
674
  items = []
172
675
 
676
+ current = _tool_by_name()
173
677
  out: list[str] = []
174
678
  seen: set[str] = set()
175
- current = _tool_by_name()
176
679
  for t in items:
177
680
  if not isinstance(t, str):
178
681
  continue
179
682
  name = t.strip()
180
- if not name:
181
- continue
182
- if name in seen:
183
- continue
184
- # Only accept tool names known to the workflow's logic (dynamic).
185
- if name not in current:
683
+ if not name or name in seen or name not in current:
186
684
  continue
187
685
  seen.add(name)
188
686
  out.append(name)
189
687
  return out
190
688
 
191
689
  def _effective_allowlist(runtime_ns: Dict[str, Any]) -> list[str]:
192
- # Allow runtime vars to override tool selection (Visual Agent tools pin).
193
690
  if isinstance(runtime_ns, dict) and "allowed_tools" in runtime_ns:
194
691
  normalized = _normalize_allowlist(runtime_ns.get("allowed_tools"))
195
692
  runtime_ns["allowed_tools"] = normalized
@@ -205,344 +702,218 @@ def create_react_workflow(
205
702
  out.append(tool)
206
703
  return out
207
704
 
208
- def _system_prompt(runtime_ns: Dict[str, Any]) -> Optional[str]:
209
- raw = runtime_ns.get("system_prompt") if isinstance(runtime_ns, dict) else None
210
- if isinstance(raw, str) and raw.strip():
705
+ def _tool_prompt_examples_enabled(runtime_ns: Dict[str, Any]) -> bool:
706
+ raw = runtime_ns.get("tool_prompt_examples") if isinstance(runtime_ns, dict) else None
707
+ if raw is None:
708
+ return True
709
+ if isinstance(raw, bool):
211
710
  return raw
212
- return None
711
+ if isinstance(raw, (int, float)):
712
+ return bool(raw)
713
+ if isinstance(raw, str):
714
+ lowered = raw.strip().lower()
715
+ if lowered in {"0", "false", "no", "off", "disabled"}:
716
+ return False
717
+ if lowered in {"1", "true", "yes", "on", "enabled"}:
718
+ return True
719
+ return True
213
720
 
214
- def _sanitize_llm_messages(messages: Any, *, limits: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]:
215
- """Convert runtime-owned message dicts into OpenAI-style {role, content, ...}.
721
+ def _materialize_tool_specs(defs: list[Any], *, include_examples: bool) -> list[dict[str, Any]]:
722
+ out: list[dict[str, Any]] = []
723
+ for t in defs:
724
+ try:
725
+ d = t.to_dict()
726
+ except Exception:
727
+ continue
728
+ if isinstance(d, dict):
729
+ if not include_examples:
730
+ d = dict(d)
731
+ d.pop("examples", None)
732
+ out.append(d)
733
+ return out
216
734
 
217
- Runtime messages can include extra metadata fields (`timestamp`, `metadata`) that many providers
218
- will reject. Keep only the fields the LLM API expects.
219
- """
735
+ def _sanitize_llm_messages(messages: Any) -> List[Dict[str, Any]]:
220
736
  if not isinstance(messages, list) or not messages:
221
737
  return []
222
- # Keep the LLM-visible context bounded even if the durable history contains large
223
- # tool outputs or code dumps.
224
- def _limit_int(key: str, default: int) -> int:
225
- if not isinstance(limits, dict):
226
- return default
227
- try:
228
- return int(limits.get(key, default))
229
- except Exception:
230
- return default
231
- max_message_chars = _limit_int("max_message_chars", -1)
232
- max_tool_message_chars = _limit_int("max_tool_message_chars", -1)
233
-
234
- def _truncate(text: str, *, max_chars: int) -> str:
235
- if max_chars <= 0:
236
- return text
237
- if len(text) <= max_chars:
238
- return text
239
- suffix = f"\n… (truncated, {len(text):,} chars total)"
240
- keep = max_chars - len(suffix)
241
- if keep < 200:
242
- keep = max_chars
243
- suffix = ""
244
- return text[:keep].rstrip() + suffix
245
-
246
- out: List[Dict[str, str]] = []
738
+ out: List[Dict[str, Any]] = []
739
+
740
+ def _sanitize_tool_calls(raw: Any) -> Optional[list[dict[str, Any]]]:
741
+ if not isinstance(raw, list) or not raw:
742
+ return None
743
+ cleaned: list[dict[str, Any]] = []
744
+ for i, tc in enumerate(raw):
745
+ if not isinstance(tc, dict):
746
+ continue
747
+ tc_type = str(tc.get("type") or "function")
748
+ if tc_type != "function":
749
+ continue
750
+ call_id = tc.get("id")
751
+ call_id_str = str(call_id).strip() if call_id is not None else ""
752
+ if not call_id_str:
753
+ call_id_str = f"call_{i+1}"
754
+ fn = tc.get("function") if isinstance(tc.get("function"), dict) else {}
755
+ name = str(fn.get("name") or "").strip()
756
+ if not name:
757
+ continue
758
+ args = fn.get("arguments")
759
+ if isinstance(args, dict):
760
+ args_str = json.dumps(args, ensure_ascii=False)
761
+ else:
762
+ args_str = "" if args is None else str(args)
763
+ cleaned.append({"type": "function", "id": call_id_str, "function": {"name": name, "arguments": args_str}})
764
+ return cleaned or None
765
+
247
766
  for m in messages:
248
767
  if not isinstance(m, dict):
249
768
  continue
250
769
  role = str(m.get("role") or "").strip()
251
- content = m.get("content")
252
- if not role or content is None:
770
+ if not role:
253
771
  continue
254
- content_str = str(content)
255
- if not content_str.strip():
772
+ content = m.get("content")
773
+ content_str = "" if content is None else str(content)
774
+ tool_calls_raw = m.get("tool_calls")
775
+ tool_calls = _sanitize_tool_calls(tool_calls_raw)
776
+
777
+ # Assistant tool-calls messages may legitimately have empty content, but must still be included.
778
+ if not content_str.strip() and not (role == "assistant" and tool_calls):
256
779
  continue
257
- limit = max_tool_message_chars if role == "tool" else max_message_chars
258
- entry: Dict[str, str] = {"role": role, "content": _truncate(content_str, max_chars=limit)}
780
+
781
+ entry: Dict[str, Any] = {"role": role, "content": content_str}
259
782
  if role == "tool":
260
783
  meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
261
784
  call_id = meta.get("call_id") if isinstance(meta, dict) else None
262
785
  if call_id is not None and str(call_id).strip():
263
- # OpenAI-compatible servers accept `tool_call_id` for tool messages.
264
786
  entry["tool_call_id"] = str(call_id).strip()
787
+ elif role == "assistant" and tool_calls:
788
+ entry["tool_calls"] = tool_calls
265
789
  out.append(entry)
266
790
  return out
267
791
 
268
- def _flag(runtime_ns: Dict[str, Any], key: str, *, default: bool = False) -> bool:
269
- if not isinstance(runtime_ns, dict) or key not in runtime_ns:
270
- return bool(default)
271
- val = runtime_ns.get(key)
272
- if isinstance(val, bool):
273
- return val
274
- if isinstance(val, (int, float)):
275
- return bool(val)
276
- if isinstance(val, str):
277
- lowered = val.strip().lower()
278
- if lowered in ("1", "true", "yes", "on", "enabled"):
279
- return True
280
- if lowered in ("0", "false", "no", "off", "disabled"):
281
- return False
282
- return bool(default)
283
-
284
- def _int(runtime_ns: Dict[str, Any], key: str, *, default: int) -> int:
285
- if not isinstance(runtime_ns, dict) or key not in runtime_ns:
286
- return int(default)
287
- val = runtime_ns.get(key)
288
- try:
289
- return int(val) # type: ignore[arg-type]
290
- except Exception:
291
- return int(default)
292
-
293
- def _extract_plan_update(content: str) -> Optional[str]:
294
- """Extract a plan update block from model content (best-effort).
295
-
296
- Convention (prompted in Plan mode): the model appends a final section:
297
-
298
- Plan Update:
299
- - [ ] ...
300
- - [x] ...
301
- """
302
- if not isinstance(content, str) or not content.strip():
303
- return None
304
- import re
305
-
306
- lines = content.splitlines()
307
- header_idx: Optional[int] = None
308
- for i, line in enumerate(lines):
309
- if re.match(r"(?i)^\s*plan\s*update\s*:\s*$", line.strip()):
310
- header_idx = i
311
- if header_idx is None:
312
- return None
313
- plan_lines = lines[header_idx + 1 :]
314
- while plan_lines and not plan_lines[0].strip():
315
- plan_lines.pop(0)
316
- plan_text = "\n".join(plan_lines).strip()
317
- if not plan_text:
318
- return None
319
- # Require at least one bullet/numbered line to avoid accidental captures.
320
- if not re.search(r"(?m)^\s*(?:[-*]|\d+\.)\s+", plan_text):
321
- return None
322
- return plan_text
792
+ builtin_effect_tools = {
793
+ "ask_user",
794
+ "recall_memory",
795
+ "inspect_vars",
796
+ "remember",
797
+ "remember_note",
798
+ "compact_memory",
799
+ "delegate_agent",
800
+ }
323
801
 
324
802
  def init_node(run: RunState, ctx) -> StepPlan:
325
803
  context, scratchpad, runtime_ns, _, limits = ensure_react_vars(run)
804
+
326
805
  scratchpad["iteration"] = 0
327
806
  limits["current_iteration"] = 0
328
807
 
808
+ # Disable runtime-level input trimming for ReAct loops.
809
+ if isinstance(runtime_ns, dict):
810
+ runtime_ns.setdefault("disable_input_trimming", True)
811
+ # Disable all truncation/capping knobs for ReAct runs (policy: full context for now).
812
+ # These can be re-enabled later once correctness is proven.
813
+ if isinstance(limits, dict):
814
+ limits["max_output_tokens"] = None
815
+ limits["max_input_tokens"] = None
816
+ limits["max_history_messages"] = -1
817
+ limits["max_message_chars"] = -1
818
+ limits["max_tool_message_chars"] = -1
819
+
329
820
  task = str(context.get("task", "") or "")
330
821
  context["task"] = task
331
- messages = context["messages"]
822
+ msgs = context.get("messages")
823
+ if not isinstance(msgs, list):
824
+ msgs = []
825
+ context["messages"] = msgs
332
826
 
333
- if task and (not messages or messages[-1].get("role") != "user" or messages[-1].get("content") != task):
334
- messages.append(_new_message(ctx, role="user", content=task))
827
+ if task and (not msgs or msgs[-1].get("role") != "user" or msgs[-1].get("content") != task):
828
+ msgs.append(_new_message(ctx, role="user", content=task))
335
829
 
336
- # Ensure toolset metadata is present for audit/debug.
337
830
  allow = _effective_allowlist(runtime_ns)
338
831
  allowed_defs = _allowed_tool_defs(allow)
339
- tool_specs = [t.to_dict() for t in allowed_defs]
832
+ include_examples = _tool_prompt_examples_enabled(runtime_ns)
833
+ tool_specs = _materialize_tool_specs(allowed_defs, include_examples=include_examples)
340
834
  runtime_ns["tool_specs"] = tool_specs
341
835
  runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
342
836
  runtime_ns.setdefault("allowed_tools", allow)
343
- runtime_ns.setdefault("inbox", [])
344
837
 
345
- emit("init", {"task": task})
346
- if _flag(runtime_ns, "plan_mode", default=False) and not isinstance(scratchpad.get("plan"), str):
347
- return StepPlan(node_id="init", next_node="plan")
838
+ scratchpad.setdefault("cycles", [])
348
839
  return StepPlan(node_id="init", next_node="reason")
349
840
 
350
- def plan_node(run: RunState, ctx) -> StepPlan:
351
- context, scratchpad, runtime_ns, _, _ = ensure_react_vars(run)
352
- task = str(context.get("task", "") or "")
353
-
354
- allow = _effective_allowlist(runtime_ns)
355
-
356
- prompt = (
357
- "You are preparing a high-level execution plan for the user's request.\n"
358
- "Return a concise TODO list (5–12 steps) that is actionable and verifiable.\n"
359
- "Do not call tools yet. Do not include role prefixes like 'assistant:'.\n\n"
360
- f"User request:\n{task}\n\n"
361
- "Plan (markdown checklist):\n"
362
- "- [ ] ...\n"
363
- )
364
-
365
- emit("plan_request", {"tools": allow})
366
-
367
- payload: Dict[str, Any] = {"prompt": prompt, "params": {"temperature": 0.2}}
368
- sys = _system_prompt(runtime_ns)
369
- if isinstance(sys, str) and sys.strip():
370
- payload["system_prompt"] = sys
371
- eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
372
- eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
373
- if isinstance(eff_provider, str) and eff_provider.strip():
374
- payload["provider"] = eff_provider.strip()
375
- if isinstance(eff_model, str) and eff_model.strip():
376
- payload["model"] = eff_model.strip()
377
-
378
- return StepPlan(
379
- node_id="plan",
380
- effect=Effect(
381
- type=EffectType.LLM_CALL,
382
- payload=payload,
383
- result_key="_temp.plan_llm_response",
384
- ),
385
- next_node="plan_parse",
386
- )
387
-
388
- def plan_parse_node(run: RunState, ctx) -> StepPlan:
389
- context, scratchpad, _, temp, _ = ensure_react_vars(run)
390
- resp = temp.get("plan_llm_response", {})
391
- if not isinstance(resp, dict):
392
- resp = {}
393
- plan_text = resp.get("content")
394
- plan = "" if plan_text is None else str(plan_text).strip()
395
- if not plan and isinstance(resp.get("data"), dict):
396
- plan = json.dumps(resp.get("data"), ensure_ascii=False, indent=2).strip()
397
-
398
- scratchpad["plan"] = plan
399
- temp.pop("plan_llm_response", None)
400
-
401
- if plan:
402
- context["messages"].append(_new_message(ctx, role="assistant", content=plan, metadata={"kind": "plan"}))
403
- emit("plan", {"plan": plan})
404
- return StepPlan(node_id="plan_parse", next_node="reason")
405
-
406
841
  def reason_node(run: RunState, ctx) -> StepPlan:
407
- context, scratchpad, runtime_ns, _, limits = ensure_react_vars(run)
842
+ context, scratchpad, runtime_ns, temp, limits = ensure_react_vars(run)
408
843
 
409
- # Read from _limits (canonical) with fallback to scratchpad (backward compat)
410
- if "current_iteration" in limits:
411
- iteration = int(limits.get("current_iteration", 0) or 0)
412
- max_iterations = int(limits.get("max_iterations", 25) or 25)
413
- else:
414
- # Backward compatibility: use scratchpad
415
- iteration = int(scratchpad.get("iteration", 0) or 0)
416
- max_iterations = int(scratchpad.get("max_iterations") or 25)
844
+ # Durable resume safety:
845
+ # - tool definitions can change across restarts (env/toolset swaps, staged deploy swaps)
846
+ # - allowlists can be edited at runtime by hosts
847
+ # `tool_specs` must match the effective allowlist + current tool defs, otherwise the LLM may
848
+ # see tools it cannot execute ("tool not allowed") or see stale schemas (signature mismatch).
849
+ try:
850
+ if isinstance(runtime_ns, dict):
851
+ allow = _effective_allowlist(runtime_ns)
852
+ allowed_defs = _allowed_tool_defs(allow)
853
+ include_examples = _tool_prompt_examples_enabled(runtime_ns)
854
+ refreshed_specs = _materialize_tool_specs(allowed_defs, include_examples=include_examples)
855
+ refreshed_id = _compute_toolset_id(refreshed_specs)
856
+ prev_id = str(runtime_ns.get("toolset_id") or "")
857
+ prev_specs = runtime_ns.get("tool_specs")
858
+ if refreshed_id != prev_id or not isinstance(prev_specs, list):
859
+ runtime_ns["tool_specs"] = refreshed_specs
860
+ runtime_ns["toolset_id"] = refreshed_id
861
+ runtime_ns.setdefault("allowed_tools", allow)
862
+ except Exception:
863
+ pass
417
864
 
865
+ max_iterations = int(limits.get("max_iterations", 0) or scratchpad.get("max_iterations", 25) or 25)
418
866
  if max_iterations < 1:
419
867
  max_iterations = 1
420
868
 
421
- if iteration >= max_iterations:
869
+ iteration = int(scratchpad.get("iteration", 0) or 0) + 1
870
+ if iteration > max_iterations:
422
871
  return StepPlan(node_id="reason", next_node="max_iterations")
423
872
 
424
- # Update both for transition period
425
- scratchpad["iteration"] = iteration + 1
426
- limits["current_iteration"] = iteration + 1
873
+ scratchpad["iteration"] = iteration
874
+ limits["current_iteration"] = iteration
427
875
 
428
876
  task = str(context.get("task", "") or "")
429
- messages_view = ActiveContextPolicy.select_active_messages_for_llm_from_run(run)
877
+ messages_view = list(context.get("messages") or [])
430
878
 
431
- # Refresh tool metadata BEFORE rendering Active Memory so token fitting stays accurate
432
- # (even though we do not render a "Tools (session)" block into Active Memory prompts).
433
- allow = _effective_allowlist(runtime_ns)
434
- allowed_defs = _allowed_tool_defs(allow)
435
- tool_specs = [t.to_dict() for t in allowed_defs]
436
- include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
437
- if not include_examples:
438
- tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
439
- runtime_ns["tool_specs"] = tool_specs
440
- runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
441
- runtime_ns.setdefault("allowed_tools", allow)
442
-
443
- inbox = runtime_ns.get("inbox", [])
444
- guidance = ""
445
- if isinstance(inbox, list) and inbox:
446
- inbox_messages = [str(m.get("content", "") or "") for m in inbox if isinstance(m, dict)]
447
- guidance = " | ".join([m for m in inbox_messages if m])
448
- runtime_ns["inbox"] = []
879
+ guidance = _drain_inbox(runtime_ns)
449
880
  req = logic.build_request(
450
881
  task=task,
451
882
  messages=messages_view,
452
883
  guidance=guidance,
453
- iteration=iteration + 1,
884
+ iteration=iteration,
454
885
  max_iterations=max_iterations,
455
- vars=run.vars, # Pass vars for _limits access
886
+ vars=run.vars,
456
887
  )
457
888
 
458
- emit("reason", {"iteration": iteration + 1, "max_iterations": max_iterations, "has_guidance": bool(guidance)})
889
+ emit("reason", {"iteration": iteration, "max_iterations": max_iterations, "has_guidance": bool(guidance)})
459
890
 
460
- # Provide the selected active-context messages as proper chat messages (sanitized).
461
- #
462
- # IMPORTANT: When we send `messages`, do not also send a non-empty `prompt`.
463
- # Some providers/servers will append `prompt` as an extra user message even when the
464
- # current request is already present in `messages`, which duplicates user turns and
465
- # wastes context budget.
466
891
  payload: Dict[str, Any] = {"prompt": ""}
467
- payload["messages"] = _sanitize_llm_messages(messages_view, limits=limits)
468
- tools_payload = list(tool_specs)
469
- if tools_payload:
470
- payload["tools"] = tools_payload
471
- sys = _system_prompt(runtime_ns) or req.system_prompt
472
- if isinstance(sys, str) and sys.strip():
473
- payload["system_prompt"] = sys
474
- # Provider/model can be configured statically (create_react_workflow args)
475
- # or injected dynamically through durable vars in `_runtime` (Visual Agent pins).
476
- eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
477
- eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
478
- if isinstance(eff_provider, str) and eff_provider.strip():
479
- payload["provider"] = eff_provider.strip()
480
- if isinstance(eff_model, str) and eff_model.strip():
481
- payload["model"] = eff_model.strip()
482
- params: Dict[str, Any] = {}
483
- if req.max_tokens is not None:
484
- params["max_tokens"] = req.max_tokens
485
- # Tool calling is formatting-sensitive; bias toward deterministic output when tools are present.
486
- params["temperature"] = 0.2 if tools_payload else 0.7
487
- payload["params"] = params
488
-
489
- return StepPlan(
490
- node_id="reason",
491
- effect=Effect(
492
- type=EffectType.LLM_CALL,
493
- payload=payload,
494
- result_key="_temp.llm_response",
495
- ),
496
- next_node="parse",
497
- )
498
-
499
- def tool_retry_minimal_node(run: RunState, ctx) -> StepPlan:
500
- """Recovery path when the model fabricates `observation[...]` logs instead of calling tools.
501
-
502
- This intentionally sends a minimal prompt (no History/Scratchpad) to reduce
503
- long-context contamination and force either a real tool call or a direct answer.
504
- """
505
- context, scratchpad, runtime_ns, temp, _ = ensure_react_vars(run)
506
- task = str(context.get("task", "") or "")
892
+ sanitized_messages = _sanitize_llm_messages(messages_view)
893
+ if sanitized_messages:
894
+ payload["messages"] = sanitized_messages
895
+ else:
896
+ # Ensure LLM_CALL contract is satisfied even for one-shot runs where callers
897
+ # provide only `context.task` and no `context.messages`.
898
+ task_text = str(task or "").strip()
899
+ if task_text:
900
+ payload["prompt"] = task_text
901
+ media = extract_media_from_context(context)
902
+ if media:
903
+ payload["media"] = media
904
+
905
+ tool_specs = runtime_ns.get("tool_specs") if isinstance(runtime_ns, dict) else None
906
+ if isinstance(tool_specs, list) and tool_specs:
907
+ payload["tools"] = list(tool_specs)
507
908
 
508
- allow = _effective_allowlist(runtime_ns)
509
- allowed_defs = _allowed_tool_defs(allow)
510
- tool_specs = [t.to_dict() for t in allowed_defs]
511
- include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
512
- if not include_examples:
513
- tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
514
- runtime_ns["tool_specs"] = tool_specs
515
- runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
516
- runtime_ns.setdefault("allowed_tools", allow)
517
- # Reuse the canonical agent rules from ReActLogic (but do not include history in prompt).
518
- sys_req = logic.build_request(task=task, messages=[], guidance="", iteration=0, max_iterations=0, vars=run.vars)
519
-
520
- bad_excerpt = str(temp.get("tool_retry_bad_content") or "").strip()
521
- temp.pop("tool_retry_bad_content", None)
522
- if len(bad_excerpt) > 240:
523
- bad_excerpt = bad_excerpt[:240].rstrip() + "…"
524
-
525
- prompt = (
526
- "Task:\n"
527
- f"{task}\n\n"
528
- "Your previous message was invalid: it contained fabricated `observation[...]` tool logs, but no tool was called.\n\n"
529
- "Now do ONE of the following:\n"
530
- "1) If you need more information to answer correctly, CALL ONE OR MORE TOOLS now using the required tool call format.\n"
531
- "2) If you can answer without tools, answer directly WITHOUT mentioning any tool calls or observations.\n\n"
532
- "Rules:\n"
533
- "- Do NOT write `observation[` anywhere.\n"
534
- "- Do NOT fabricate tool results.\n"
535
- "- If you call tools, output ONLY tool call block(s) (no extra text).\n"
536
- "- You MAY batch multiple tool calls by repeating the tool-call block once per call (prefer independent calls).\n"
537
- )
538
- if bad_excerpt:
539
- prompt += f"\nBad output excerpt (do not copy):\n{bad_excerpt}\n"
540
-
541
- payload: Dict[str, Any] = {"prompt": prompt}
542
- if tool_specs:
543
- payload["tools"] = tool_specs
544
- sys = _system_prompt(runtime_ns) or sys_req.system_prompt
545
- if isinstance(sys, str) and sys.strip():
909
+ sys_base = str(req.system_prompt or "").strip()
910
+ sys = _compose_system_prompt(runtime_ns, base=sys_base)
911
+ # Append scratchpad only when not using a full override prompt.
912
+ if _system_prompt_override(runtime_ns) is None:
913
+ scratch_txt = _render_cycles_for_system_prompt(scratchpad)
914
+ if scratch_txt:
915
+ sys = f"{sys.rstrip()}\n\n## Scratchpad (ReAct cycles so far)\n{scratch_txt}".strip()
916
+ if sys:
546
917
  payload["system_prompt"] = sys
547
918
 
548
919
  eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
@@ -552,303 +923,205 @@ def create_react_workflow(
552
923
  if isinstance(eff_model, str) and eff_model.strip():
553
924
  payload["model"] = eff_model.strip()
554
925
 
555
- payload["params"] = {"temperature": 0.2}
556
-
557
- emit("tool_retry_minimal", {"tools": allow, "has_excerpt": bool(bad_excerpt)})
558
- return StepPlan(
559
- node_id="tool_retry_minimal",
560
- effect=Effect(
561
- type=EffectType.LLM_CALL,
562
- payload=payload,
563
- result_key="_temp.llm_response",
564
- ),
565
- next_node="parse",
566
- )
567
-
568
- def empty_response_retry_node(run: RunState, ctx) -> StepPlan:
569
- """Recovery path when the model returns an empty message (no content, no tool calls).
570
-
571
- This is treated as an invalid agent step. We re-prompt with the original task plus
572
- recent tool evidence and explicitly require either tool calls or a substantive answer.
573
- """
574
- context, scratchpad, runtime_ns, _, _ = ensure_react_vars(run)
575
- task = str(context.get("task", "") or "")
576
-
577
- allow = _effective_allowlist(runtime_ns)
578
- allowed_defs = _allowed_tool_defs(allow)
579
- tool_specs = [t.to_dict() for t in allowed_defs]
580
- include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
581
- if not include_examples:
582
- tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
583
- runtime_ns["tool_specs"] = tool_specs
584
- runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
585
- runtime_ns.setdefault("allowed_tools", allow)
586
-
587
- # Include recent tool outputs and user messages as evidence (bounded).
588
- messages = list(context.get("messages") or [])
589
- evidence_lines: list[str] = []
590
- tool_count = 0
591
- user_count = 0
592
- for m in reversed(messages):
593
- if not isinstance(m, dict):
594
- continue
595
- role = m.get("role")
596
- content = m.get("content")
597
- if role == "tool" and isinstance(content, str) and content.strip():
598
- evidence_lines.append(content.strip())
599
- tool_count += 1
600
- elif role == "user" and isinstance(content, str) and content.strip():
601
- # Avoid duplicating the original task.
602
- if content.strip() != task.strip():
603
- evidence_lines.append(content.strip())
604
- user_count += 1
605
- if tool_count >= 6 and user_count >= 2:
606
- break
607
- evidence_lines.reverse()
608
- evidence = "\n\n".join(evidence_lines) if evidence_lines else "(no prior evidence captured)"
609
-
610
- # Build a strong corrective prompt. Prefer tools; allow a direct answer if truly possible.
611
- prompt = (
612
- "The previous assistant message was EMPTY (no content and no tool calls). This is invalid.\n"
613
- "Recover by continuing the task using the evidence below.\n\n"
614
- f"Task:\n{task}\n\n"
615
- f"Evidence (recent tool outputs + user messages):\n{evidence}\n\n"
616
- "Now do EXACTLY ONE of the following:\n"
617
- "1) CALL one or more tools to make progress (preferred).\n"
618
- "2) If you already have enough evidence, provide a concise final answer.\n\n"
619
- "Rules:\n"
620
- "- Do not output an empty message.\n"
621
- "- Do not ask the user a question in plain text; use the `ask_user` tool.\n"
622
- "- If you call tools, include the tool call(s) directly (no preamble).\n"
623
- )
624
-
625
- payload: Dict[str, Any] = {"prompt": prompt}
626
- if tool_specs:
627
- payload["tools"] = list(tool_specs)
628
- sys = _system_prompt(runtime_ns)
629
- if isinstance(sys, str) and sys.strip():
630
- payload["system_prompt"] = sys
631
- eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
632
- eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
633
- if isinstance(eff_provider, str) and eff_provider.strip():
634
- payload["provider"] = eff_provider.strip()
635
- if isinstance(eff_model, str) and eff_model.strip():
636
- payload["model"] = eff_model.strip()
637
- payload["params"] = {"temperature": 0.2}
926
+ params: Dict[str, Any] = {}
927
+ max_out = _max_output_tokens(runtime_ns, limits)
928
+ if isinstance(max_out, int) and max_out > 0:
929
+ params["max_tokens"] = max_out
930
+ # Tool calling is formatting-sensitive; bias toward a lower temperature when tools are present,
931
+ # unless the caller explicitly sets `_runtime.temperature`.
932
+ default_temp = 0.2 if isinstance(tool_specs, list) and tool_specs else 0.7
933
+ payload["params"] = runtime_llm_params(runtime_ns, extra=params, default_temperature=default_temp)
638
934
 
639
- emit("empty_response_retry", {"tools": allow, "evidence": bool(evidence_lines)})
640
935
  return StepPlan(
641
- node_id="empty_response_retry",
936
+ node_id="reason",
642
937
  effect=Effect(type=EffectType.LLM_CALL, payload=payload, result_key="_temp.llm_response"),
643
938
  next_node="parse",
644
939
  )
645
940
 
646
941
  def parse_node(run: RunState, ctx) -> StepPlan:
647
- context, scratchpad, runtime_ns, temp, _ = ensure_react_vars(run)
942
+ context, scratchpad, runtime_ns, temp, limits = ensure_react_vars(run)
648
943
  response = temp.get("llm_response", {})
649
- content, tool_calls = logic.parse_response(response)
650
-
651
- def _sanitize_tool_call_content(text: str) -> str:
652
- """Remove tool-transcript markers from assistant content before persisting to history.
653
-
654
- Some OSS models may include internal transcript artifacts (e.g. fabricated
655
- `observation[...]` lines) or embed the tool call itself inside the message
656
- (`Action:` blocks). We keep only the user-facing prose that appears *before*
657
- such markers so the runtime doesn't persist fabricated logs into context.
658
- """
659
- if not isinstance(text, str) or not text.strip():
660
- return ""
661
- out_lines: list[str] = []
662
- for line in text.splitlines():
663
- lowered = line.lstrip().lower()
664
- if lowered.startswith("observation["):
665
- break
666
- if lowered.startswith("action:"):
667
- break
668
- if lowered.startswith("<|tool_call|>") or lowered.startswith("<tool_call>"):
669
- break
670
- if lowered.startswith("```tool_call") or lowered.startswith("```tool_code"):
671
- break
672
- out_lines.append(line)
673
- return "\n".join(out_lines).rstrip()
674
944
 
675
- def _should_retry_for_missing_tool_call(text: str) -> bool:
676
- if not isinstance(text, str) or not text.strip():
677
- return False
678
- # Some models echo our internal History formatting (e.g. `observation[web_search] (success): ...`)
679
- # as transcript lines. Treat only *line-start* occurrences as suspicious (avoid false positives
680
- # in JSON/code blocks), and only use this signal when no tools have actually run yet.
681
- for line in text.splitlines():
682
- if line.lstrip().lower().startswith("observation["):
683
- return True
684
- return False
685
-
686
- def _extract_final_answer(text: str) -> tuple[bool, str]:
687
- """Return (is_explicit_final, stripped_answer)."""
688
- if not isinstance(text, str) or not text.strip():
689
- return False, ""
690
- s = text.lstrip()
691
- if s.upper().startswith("FINAL:"):
692
- return True, s[len("FINAL:") :].lstrip()
693
- return False, text
945
+ content, tool_calls = logic.parse_response(response)
946
+ finish_reason = ""
947
+ if isinstance(response, dict):
948
+ fr = response.get("finish_reason")
949
+ finish_reason = str(fr or "").strip().lower() if fr is not None else ""
694
950
 
951
+ cycle_i = int(scratchpad.get("iteration", 0) or 0)
952
+ max_iterations = int(limits.get("max_iterations", 0) or scratchpad.get("max_iterations", 25) or 25)
953
+ if max_iterations < 1:
954
+ max_iterations = 1
955
+ reasoning_text = ""
956
+ try:
957
+ if isinstance(response, dict):
958
+ rc = response.get("reasoning")
959
+ if rc is None:
960
+ rc = response.get("reasoning_content")
961
+ reasoning_text = str(rc or "")
962
+ except Exception:
963
+ reasoning_text = ""
695
964
  emit(
696
965
  "parse",
697
966
  {
967
+ "iteration": cycle_i,
968
+ "max_iterations": max_iterations,
698
969
  "has_tool_calls": bool(tool_calls),
699
- "content": content,
700
- "tool_calls": [{"name": tc.name, "arguments": tc.arguments, "call_id": tc.call_id} for tc in tool_calls],
970
+ "content": str(content or ""),
971
+ "reasoning": reasoning_text,
701
972
  },
702
973
  )
703
- temp.pop("llm_response", None)
704
-
705
- # Reset retry counter on any successful tool-call detection.
706
- if tool_calls:
707
- scratchpad["tool_retry_count"] = 0
708
- scratchpad["tool_retry_minimal_used"] = False
974
+ cycle: Dict[str, Any] = {"i": cycle_i, "thought": content, "tool_calls": [], "observations": []}
975
+ cycles = scratchpad.get("cycles")
976
+ if isinstance(cycles, list):
977
+ cycles.append(cycle)
978
+ else:
979
+ scratchpad["cycles"] = [cycle]
709
980
 
710
981
  if tool_calls:
711
- clean = _sanitize_tool_call_content(content)
712
- if clean.strip():
713
- context["messages"].append(_new_message(ctx, role="assistant", content=clean))
714
- if _flag(runtime_ns, "plan_mode", default=False):
715
- updated = _extract_plan_update(clean)
716
- if isinstance(updated, str) and updated.strip():
717
- scratchpad["plan"] = updated.strip()
718
- temp["pending_tool_calls"] = [tc.__dict__ for tc in tool_calls]
719
- return StepPlan(node_id="parse", next_node="act")
982
+ cycle["tool_calls"] = [tc.__dict__ for tc in tool_calls]
720
983
 
721
- # Empty response is an invalid step: recover with a bounded retry that carries evidence.
722
- if not isinstance(content, str) or not content.strip():
984
+ # Loop guard: some models may repeat the exact same tool calls (including side effects)
985
+ # even after receiving successful observations. Skip executing duplicates to avoid
986
+ # repeatedly overwriting files or re-running commands.
723
987
  try:
724
- empty_retries = int(scratchpad.get("empty_response_retry_count") or 0)
988
+ side_effect_tools = {
989
+ "write_file",
990
+ "edit_file",
991
+ "execute_command",
992
+ # Comms tools (side-effectful; avoid duplicate sends).
993
+ "send_email",
994
+ "send_whatsapp_message",
995
+ "send_telegram_message",
996
+ "send_telegram_artifact",
997
+ }
998
+ has_side_effect = any(
999
+ isinstance(getattr(tc, "name", None), str) and str(getattr(tc, "name") or "").strip() in side_effect_tools
1000
+ for tc in tool_calls
1001
+ )
1002
+
1003
+ if has_side_effect:
1004
+ cycles_list = scratchpad.get("cycles")
1005
+ prev_cycle: Optional[Dict[str, Any]] = None
1006
+ if isinstance(cycles_list, list) and len(cycles_list) >= 2:
1007
+ for c in reversed(cycles_list[:-1]):
1008
+ if not isinstance(c, dict):
1009
+ continue
1010
+ prev_tcs = c.get("tool_calls")
1011
+ if isinstance(prev_tcs, list) and prev_tcs:
1012
+ prev_cycle = c
1013
+ break
1014
+
1015
+ def _cycle_fps(c: Dict[str, Any]) -> list[str]:
1016
+ tcs2 = c.get("tool_calls")
1017
+ if not isinstance(tcs2, list) or not tcs2:
1018
+ return []
1019
+ fps: list[str] = []
1020
+ for tc in tcs2:
1021
+ if not isinstance(tc, dict):
1022
+ continue
1023
+ fps.append(_tool_call_fingerprint(tc.get("name", ""), tc.get("arguments")))
1024
+ return fps
1025
+
1026
+ def _cycle_obs_all_ok(c: Dict[str, Any]) -> bool:
1027
+ obs2 = c.get("observations")
1028
+ if not isinstance(obs2, list) or not obs2:
1029
+ return False
1030
+ for o in obs2:
1031
+ if not isinstance(o, dict):
1032
+ return False
1033
+ if o.get("success") is not True:
1034
+ return False
1035
+ return True
1036
+
1037
+ if prev_cycle is not None and _cycle_obs_all_ok(prev_cycle):
1038
+ prev_fps = _cycle_fps(prev_cycle)
1039
+ cur_fps = [_tool_call_fingerprint(tc.name, tc.arguments) for tc in tool_calls]
1040
+ if prev_fps and prev_fps == cur_fps:
1041
+ _push_inbox(
1042
+ runtime_ns,
1043
+ "You are repeating the exact same tool calls as the previous cycle, and they already succeeded.\n"
1044
+ "Do NOT execute them again (to avoid duplicate side effects).\n"
1045
+ "Instead, use the existing tool outputs and provide the final answer with NO tool calls.",
1046
+ )
1047
+ emit("parse_repeat_tool_calls", {"cycle": cycle_i, "count": len(tool_calls)})
1048
+ temp["pending_tool_calls"] = []
1049
+ return StepPlan(node_id="parse", next_node="reason")
725
1050
  except Exception:
726
- empty_retries = 0
1051
+ pass
727
1052
 
728
- if empty_retries < 2:
729
- scratchpad["empty_response_retry_count"] = empty_retries + 1
730
- emit("parse_retry_empty_response", {"retries": empty_retries + 1})
731
- return StepPlan(node_id="parse", next_node="empty_response_retry")
1053
+ # Keep tool transcript in context for OpenAI-compatible tool calling.
1054
+ context["messages"].append(
1055
+ _new_assistant_message_with_tool_calls(
1056
+ ctx,
1057
+ content="", # thought is stored in scratchpad (not user-visible history)
1058
+ tool_calls=tool_calls,
1059
+ metadata={"kind": "tool_calls", "cycle": cycle_i},
1060
+ )
1061
+ )
1062
+ temp["pending_tool_calls"] = [tc.__dict__ for tc in tool_calls]
1063
+ emit("parse_tool_calls", {"count": len(tool_calls)})
1064
+ return StepPlan(node_id="parse", next_node="act")
732
1065
 
733
- safe = (
734
- "I can't proceed: the model repeatedly returned empty outputs (no content, no tool calls).\n"
735
- "Please retry, reduce context, or switch models."
1066
+ # If the model hit an output limit, treat the step as incomplete and continue.
1067
+ if finish_reason in {"length", "max_tokens"}:
1068
+ _push_inbox(
1069
+ runtime_ns,
1070
+ "Your previous response hit an output token limit before producing a complete tool call.\n"
1071
+ "Retry now: emit ONLY the next tool call(s) needed to make progress.\n"
1072
+ "Keep tool call arguments small (avoid large file contents / giant JSON blobs) to prevent tool-call truncation.\n"
1073
+ "For large files, create a small skeleton first, then refine via multiple smaller edits/tool calls.\n"
1074
+ "Do not write a long plan before tool calls.",
736
1075
  )
737
- context["messages"].append(_new_message(ctx, role="assistant", content=safe, metadata={"kind": "error"}))
738
- temp["final_answer"] = safe
739
- temp["pending_tool_calls"] = []
740
- scratchpad["empty_response_retry_count"] = 0
741
- return StepPlan(node_id="parse", next_node="maybe_review")
1076
+ emit("parse_retry_truncated", {"cycle": cycle_i})
1077
+ return StepPlan(node_id="parse", next_node="reason")
742
1078
 
743
- # If the model appears to have produced a fake "observation[tool]" transcript instead of
744
- # calling tools, give it one corrective retry before treating the message as final.
745
- if not bool(scratchpad.get("used_tools")) and _should_retry_for_missing_tool_call(content):
746
- try:
747
- retries = int(scratchpad.get("tool_retry_count") or 0)
748
- except Exception:
749
- retries = 0
750
- if retries < 2:
751
- scratchpad["tool_retry_count"] = retries + 1
752
- inbox = runtime_ns.get("inbox")
753
- if not isinstance(inbox, list):
754
- inbox = []
755
- runtime_ns["inbox"] = inbox
756
- inbox.append(
757
- {
758
- "role": "system",
759
- "content": (
760
- "You wrote an `observation[...]` line, but no tool was actually called.\n"
761
- "Do NOT fabricate tool outputs.\n"
762
- "If you need to search/fetch/read/write, CALL a tool now using the required tool call format.\n"
763
- "Never output `observation[...]` markers; those are context-only."
764
- ),
765
- }
766
- )
767
- emit("parse_retry_missing_tool_call", {"retries": retries + 1})
768
- return StepPlan(node_id="parse", next_node="reason")
769
-
770
- # If the model still fails after retries, attempt a single minimal-context recovery call
771
- # instead of accepting a fabricated transcript as the final answer.
772
- if not bool(scratchpad.get("tool_retry_minimal_used")):
773
- scratchpad["tool_retry_minimal_used"] = True
774
- scratchpad["tool_retry_count"] = 0
775
- temp["tool_retry_bad_content"] = content
776
- emit("parse_retry_minimal_context", {"retries": retries})
777
- return StepPlan(node_id="parse", next_node="tool_retry_minimal")
778
-
779
- safe = (
780
- "I can't proceed safely: the model repeatedly produced fabricated `observation[...]` tool logs instead of calling tools.\n"
781
- "Please retry, reduce context, or switch models."
1079
+ if not isinstance(content, str) or not content.strip():
1080
+ _push_inbox(runtime_ns, "Your previous response was empty. Continue the task.")
1081
+ emit("parse_retry_empty", {"cycle": cycle_i})
1082
+ return StepPlan(node_id="parse", next_node="reason")
1083
+
1084
+ # Followthrough heuristic: retry when the model claims it will take actions but emits no tool calls.
1085
+ # Default ON (disable with `_runtime.check_plan=false`).
1086
+ raw_check_plan = runtime_ns.get("check_plan") if isinstance(runtime_ns, dict) else None
1087
+ check_plan = True if raw_check_plan is None else _boolish(raw_check_plan)
1088
+ if check_plan and cycle_i < max_iterations and _looks_like_deferred_action(content):
1089
+ _push_inbox(
1090
+ runtime_ns,
1091
+ "You said you would take an action, but you did not call any tools.\n"
1092
+ "If you need to act, call the next tool now (emit ONLY the next tool call(s)).\n"
1093
+ "If you are already done, provide the final answer with NO tool calls.",
782
1094
  )
783
- context["messages"].append(_new_message(ctx, role="assistant", content=safe, metadata={"kind": "error"}))
784
- temp["final_answer"] = safe
785
- scratchpad["tool_retry_count"] = 0
786
- return StepPlan(node_id="parse", next_node="maybe_review")
787
-
788
- final_raw = _sanitize_tool_call_content(content)
789
- if not final_raw.strip():
790
- final_raw = str(content or "").strip()
791
-
792
- is_final, final_text = _extract_final_answer(final_raw)
793
- if is_final:
794
- if final_text:
795
- context["messages"].append(_new_message(ctx, role="assistant", content=final_text))
796
- if _flag(runtime_ns, "plan_mode", default=False):
797
- updated = _extract_plan_update(final_text)
798
- if isinstance(updated, str) and updated.strip():
799
- scratchpad["plan"] = updated.strip()
800
- temp["final_answer"] = final_text or "No answer provided"
801
- temp["pending_tool_calls"] = []
802
- scratchpad["tool_retry_count"] = 0
803
- return StepPlan(node_id="parse", next_node="maybe_review")
804
-
805
- # Default: treat as a normal final answer even if it lacks an explicit FINAL marker.
806
- final = final_raw
807
- if final:
808
- context["messages"].append(_new_message(ctx, role="assistant", content=final))
809
- if _flag(runtime_ns, "plan_mode", default=False):
810
- updated = _extract_plan_update(final)
811
- if isinstance(updated, str) and updated.strip():
812
- scratchpad["plan"] = updated.strip()
813
-
814
- temp["final_answer"] = final or "No answer provided"
815
- temp["pending_tool_calls"] = []
816
- scratchpad["tool_retry_count"] = 0
817
- scratchpad["empty_response_retry_count"] = 0
818
- return StepPlan(node_id="parse", next_node="maybe_review")
1095
+ emit("parse_retry_plan_only", {"cycle": cycle_i})
1096
+ return StepPlan(node_id="parse", next_node="reason")
1097
+
1098
+ # Final answer: stop the loop.
1099
+ answer = str(content).strip()
1100
+ temp["final_answer"] = answer
1101
+ emit("parse_final", {"cycle": cycle_i})
1102
+ return StepPlan(node_id="parse", next_node="done")
819
1103
 
820
1104
  def act_node(run: RunState, ctx) -> StepPlan:
821
- # Treat `_temp.pending_tool_calls` as a durable queue.
822
- # This avoids dropping calls when schema-only tools (ask_user/memory/etc.) are interleaved
823
- # with normal tools, and avoids re-asking the same question due to missing context.
824
- context, scratchpad, runtime_ns, temp, _ = ensure_react_vars(run)
825
- raw_queue = temp.get("pending_tool_calls", [])
826
- if not isinstance(raw_queue, list) or not raw_queue:
827
- temp["pending_tool_calls"] = []
828
- return StepPlan(node_id="act", next_node="reason")
1105
+ context, scratchpad, runtime_ns, temp, limits = ensure_react_vars(run)
829
1106
 
830
- allow = _effective_allowlist(runtime_ns)
831
- builtin_effect_tools = {
832
- "ask_user",
833
- "recall_memory",
834
- "inspect_vars",
835
- "remember",
836
- "remember_note",
837
- "compact_memory",
838
- }
839
-
840
- # Normalize queue items and assign stable call_ids once so splitting into batches does not
841
- # introduce duplicate ids.
842
- tool_queue: List[Dict[str, Any]] = []
843
- for idx, item in enumerate(raw_queue, start=1):
844
- if isinstance(item, ToolCall):
845
- d: Dict[str, Any] = {"name": item.name, "arguments": item.arguments, "call_id": item.call_id}
846
- elif isinstance(item, dict):
847
- d = dict(item)
1107
+ pending = temp.get("pending_tool_calls", [])
1108
+ if not isinstance(pending, list):
1109
+ pending = []
1110
+
1111
+ cycle_i = int(scratchpad.get("iteration", 0) or 0)
1112
+ max_iterations = int(limits.get("max_iterations", 0) or scratchpad.get("max_iterations", 25) or 25)
1113
+ if max_iterations < 1:
1114
+ max_iterations = 1
1115
+
1116
+ tool_queue: list[Dict[str, Any]] = []
1117
+ for idx, tc in enumerate(pending):
1118
+ if isinstance(tc, ToolCall):
1119
+ d = tc.__dict__
1120
+ elif isinstance(tc, dict):
1121
+ d = dict(tc)
848
1122
  else:
849
1123
  continue
850
- call_id = str(d.get("call_id") or "").strip()
851
- if not call_id:
1124
+ if "call_id" not in d or not d.get("call_id"):
852
1125
  d["call_id"] = str(idx)
853
1126
  tool_queue.append(d)
854
1127
 
@@ -856,12 +1129,12 @@ def create_react_workflow(
856
1129
  temp["pending_tool_calls"] = []
857
1130
  return StepPlan(node_id="act", next_node="reason")
858
1131
 
1132
+ allow = _effective_allowlist(runtime_ns)
1133
+
859
1134
  def _is_builtin(tc: Dict[str, Any]) -> bool:
860
1135
  name = tc.get("name")
861
1136
  return isinstance(name, str) and name in builtin_effect_tools
862
1137
 
863
- # Execute one schema-only builtin (if it is next), otherwise execute the longest contiguous
864
- # prefix of normal tools. Leave the remainder queued for subsequent act/observe cycles.
865
1138
  if _is_builtin(tool_queue[0]):
866
1139
  tc = tool_queue[0]
867
1140
  name = str(tc.get("name") or "").strip()
@@ -869,7 +1142,6 @@ def create_react_workflow(
869
1142
  if not isinstance(args, dict):
870
1143
  args = {}
871
1144
 
872
- # Pop the builtin from the queue.
873
1145
  temp["pending_tool_calls"] = list(tool_queue[1:])
874
1146
 
875
1147
  if name and name not in allow:
@@ -892,18 +1164,11 @@ def create_react_workflow(
892
1164
  choices = args.get("choices")
893
1165
  choices = list(choices) if isinstance(choices, list) else None
894
1166
 
895
- # Persist the asked question in the durable message history so both the main model
896
- # and the reviewer can see what was asked (and avoid re-asking).
897
1167
  msgs = context.get("messages")
898
1168
  if isinstance(msgs, list):
899
- content = f"[Agent question]: {question}"
900
- last = msgs[-1] if msgs else None
901
- last_role = last.get("role") if isinstance(last, dict) else None
902
- last_meta = last.get("metadata") if isinstance(last, dict) else None
903
- last_kind = last_meta.get("kind") if isinstance(last_meta, dict) else None
904
- last_content = last.get("content") if isinstance(last, dict) else None
905
- if not (last_role == "assistant" and last_kind == "ask_user_prompt" and str(last_content or "") == content):
906
- msgs.append(_new_message(ctx, role="assistant", content=content, metadata={"kind": "ask_user_prompt"}))
1169
+ msgs.append(
1170
+ _new_message(ctx, role="assistant", content=f"[Agent question]: {question}", metadata={"kind": "ask_user_prompt"})
1171
+ )
907
1172
 
908
1173
  emit("ask_user", {"question": question, "choices": choices or []})
909
1174
  return StepPlan(
@@ -964,26 +1229,78 @@ def create_react_workflow(
964
1229
  payload = dict(args)
965
1230
  payload.setdefault("tool_name", "compact_memory")
966
1231
  payload.setdefault("call_id", tc.get("call_id") or "compact")
967
- emit(
968
- "memory_compact",
969
- {
970
- "preserve_recent": payload.get("preserve_recent"),
971
- "mode": payload.get("compression_mode"),
972
- "focus": payload.get("focus"),
973
- },
974
- )
1232
+ emit("memory_compact", {"preserve_recent": payload.get("preserve_recent"), "mode": payload.get("compression_mode")})
975
1233
  return StepPlan(
976
1234
  node_id="act",
977
1235
  effect=Effect(type=EffectType.MEMORY_COMPACT, payload=payload, result_key="_temp.tool_results"),
978
1236
  next_node="observe",
979
1237
  )
980
1238
 
981
- # Unknown builtin: continue with the queue (best-effort).
982
- if temp.get("pending_tool_calls"):
983
- return StepPlan(node_id="act", next_node="act")
984
- return StepPlan(node_id="act", next_node="reason")
1239
+ if name == "delegate_agent":
1240
+ delegated_task = str(args.get("task") or "").strip()
1241
+ delegated_context = str(args.get("context") or "").strip()
1242
+
1243
+ tools_raw = args.get("tools")
1244
+ if tools_raw is None:
1245
+ # Inherit the current allowlist, but avoid recursive delegation and avoid waiting on ask_user
1246
+ # unless explicitly enabled.
1247
+ child_allow = [t for t in allow if t not in {"delegate_agent", "ask_user"}]
1248
+ else:
1249
+ child_allow = _normalize_allowlist(tools_raw)
1250
+
1251
+ if not delegated_task:
1252
+ temp["tool_results"] = {
1253
+ "results": [
1254
+ {
1255
+ "call_id": str(tc.get("call_id") or ""),
1256
+ "name": "delegate_agent",
1257
+ "success": False,
1258
+ "output": None,
1259
+ "error": "delegate_agent requires a non-empty task",
1260
+ }
1261
+ ]
1262
+ }
1263
+ return StepPlan(node_id="act", next_node="observe")
1264
+
1265
+ combined_task = delegated_task
1266
+ if delegated_context:
1267
+ combined_task = f"{delegated_task}\n\nContext:\n{delegated_context}"
1268
+
1269
+ sub_vars: Dict[str, Any] = {
1270
+ "context": {"task": combined_task, "messages": []},
1271
+ "_runtime": {
1272
+ "allowed_tools": list(child_allow),
1273
+ "system_prompt_extra": (
1274
+ "You are a delegated sub-agent.\n"
1275
+ "- Focus ONLY on the delegated task.\n"
1276
+ "- Use ONLY the allowed tools when needed.\n"
1277
+ "- Do not ask the user questions; if blocked, state assumptions and proceed.\n"
1278
+ "- Return a concise result suitable for the parent agent to act on.\n"
1279
+ ),
1280
+ },
1281
+ "_limits": {"max_iterations": 10},
1282
+ }
1283
+
1284
+ payload = {
1285
+ "workflow_id": str(getattr(run, "workflow_id", "") or "react_agent"),
1286
+ "vars": sub_vars,
1287
+ "async": False,
1288
+ "include_traces": False,
1289
+ # Tool-mode wrapper so the parent receives a normal tool observation (no run failure on child failure).
1290
+ "wrap_as_tool_result": True,
1291
+ "tool_name": "delegate_agent",
1292
+ "call_id": str(tc.get("call_id") or ""),
1293
+ }
1294
+ emit("delegate_agent", {"tools": list(child_allow), "call_id": payload.get("call_id")})
1295
+ return StepPlan(
1296
+ node_id="act",
1297
+ effect=Effect(type=EffectType.START_SUBWORKFLOW, payload=payload, result_key="_temp.tool_results"),
1298
+ next_node="observe",
1299
+ )
1300
+
1301
+ # Unknown builtin: continue.
1302
+ return StepPlan(node_id="act", next_node="act" if temp.get("pending_tool_calls") else "reason")
985
1303
 
986
- # Normal tools: execute contiguous prefix until the next builtin.
987
1304
  batch: List[Dict[str, Any]] = []
988
1305
  for tc in tool_queue:
989
1306
  if _is_builtin(tc):
@@ -993,23 +1310,25 @@ def create_react_workflow(
993
1310
  remaining = tool_queue[len(batch) :]
994
1311
  temp["pending_tool_calls"] = list(remaining)
995
1312
 
996
- # Emit observability events for the batch.
997
- for tc in batch:
998
- emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")})
999
-
1000
1313
  formatted_calls: List[Dict[str, Any]] = []
1001
1314
  for tc in batch:
1315
+ emit(
1316
+ "act",
1317
+ {
1318
+ "iteration": cycle_i,
1319
+ "max_iterations": max_iterations,
1320
+ "tool": tc.get("name", ""),
1321
+ "args": tc.get("arguments", {}),
1322
+ "call_id": str(tc.get("call_id") or ""),
1323
+ },
1324
+ )
1002
1325
  formatted_calls.append(
1003
1326
  {"name": tc.get("name", ""), "arguments": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")}
1004
1327
  )
1005
1328
 
1006
1329
  return StepPlan(
1007
1330
  node_id="act",
1008
- effect=Effect(
1009
- type=EffectType.TOOL_CALLS,
1010
- payload={"tool_calls": formatted_calls, "allowed_tools": list(allow)},
1011
- result_key="_temp.tool_results",
1012
- ),
1331
+ effect=Effect(type=EffectType.TOOL_CALLS, payload={"tool_calls": formatted_calls, "allowed_tools": list(allow)}, result_key="_temp.tool_results"),
1013
1332
  next_node="observe",
1014
1333
  )
1015
1334
 
@@ -1022,10 +1341,19 @@ def create_react_workflow(
1022
1341
  results = tool_results.get("results", [])
1023
1342
  if not isinstance(results, list):
1024
1343
  results = []
1344
+
1025
1345
  if results:
1026
1346
  scratchpad["used_tools"] = True
1027
1347
 
1028
- # Prefer a tool-supplied human/LLM-friendly rendering when present.
1348
+ # Attach observations to the most recent cycle.
1349
+ cycles = scratchpad.get("cycles")
1350
+ last_cycle: Optional[Dict[str, Any]] = None
1351
+ if isinstance(cycles, list):
1352
+ for c in reversed(cycles):
1353
+ if isinstance(c, dict) and int(c.get("i") or -1) == int(scratchpad.get("iteration") or -1):
1354
+ last_cycle = c
1355
+ break
1356
+
1029
1357
  def _display(v: Any) -> str:
1030
1358
  if isinstance(v, dict):
1031
1359
  rendered = v.get("rendered")
@@ -1033,6 +1361,7 @@ def create_react_workflow(
1033
1361
  return rendered.strip()
1034
1362
  return "" if v is None else str(v)
1035
1363
 
1364
+ obs_list: list[dict[str, Any]] = []
1036
1365
  for r in results:
1037
1366
  if not isinstance(r, dict):
1038
1367
  continue
@@ -1042,13 +1371,8 @@ def create_react_workflow(
1042
1371
  error = r.get("error", "")
1043
1372
  display = _display(output)
1044
1373
  if not success:
1045
- # Preserve structured outputs for provenance, but show a clean string to the LLM/UI.
1046
1374
  display = _display(output) if isinstance(output, dict) else str(error or output)
1047
- rendered = logic.format_observation(
1048
- name=name,
1049
- output=display,
1050
- success=success,
1051
- )
1375
+ rendered = logic.format_observation(name=name, output=display, success=success)
1052
1376
  emit("observe", {"tool": name, "success": success, "result": rendered})
1053
1377
 
1054
1378
  context["messages"].append(
@@ -1056,300 +1380,31 @@ def create_react_workflow(
1056
1380
  ctx,
1057
1381
  role="tool",
1058
1382
  content=rendered,
1059
- metadata={
1060
- "name": name,
1061
- "call_id": r.get("call_id"),
1062
- "success": success,
1063
- },
1383
+ metadata={"name": name, "call_id": r.get("call_id"), "success": success},
1064
1384
  )
1065
1385
  )
1066
1386
 
1387
+ obs_list.append(
1388
+ {
1389
+ "call_id": r.get("call_id"),
1390
+ "name": name,
1391
+ "success": success,
1392
+ "output": output,
1393
+ "error": error,
1394
+ "rendered": rendered,
1395
+ }
1396
+ )
1397
+
1398
+ if last_cycle is not None:
1399
+ last_cycle["observations"] = obs_list
1400
+
1067
1401
  temp.pop("tool_results", None)
1068
- # Reset verifier/review rounds after executing tools. This enables repeated
1069
- # verify→act→observe cycles without immediately hitting review_max_rounds.
1070
- scratchpad["review_count"] = 0
1071
1402
  pending = temp.get("pending_tool_calls", [])
1072
1403
  if isinstance(pending, list) and pending:
1073
1404
  return StepPlan(node_id="observe", next_node="act")
1074
1405
  temp["pending_tool_calls"] = []
1075
1406
  return StepPlan(node_id="observe", next_node="reason")
1076
1407
 
1077
- def maybe_review_node(run: RunState, ctx) -> StepPlan:
1078
- _, scratchpad, runtime_ns, _, _ = ensure_react_vars(run)
1079
-
1080
- if not _flag(runtime_ns, "review_mode", default=False):
1081
- return StepPlan(node_id="maybe_review", next_node="done")
1082
-
1083
- max_rounds = _int(runtime_ns, "review_max_rounds", default=1)
1084
- if max_rounds < 0:
1085
- max_rounds = 0
1086
- count = scratchpad.get("review_count")
1087
- try:
1088
- count_int = int(count or 0)
1089
- except Exception:
1090
- count_int = 0
1091
-
1092
- if count_int >= max_rounds:
1093
- return StepPlan(node_id="maybe_review", next_node="done")
1094
-
1095
- scratchpad["review_count"] = count_int + 1
1096
- return StepPlan(node_id="maybe_review", next_node="review")
1097
-
1098
- def review_node(run: RunState, ctx) -> StepPlan:
1099
- context, scratchpad, runtime_ns, _, limits = ensure_react_vars(run)
1100
-
1101
- task = str(context.get("task", "") or "")
1102
- plan = scratchpad.get("plan")
1103
- plan_text = str(plan).strip() if isinstance(plan, str) and plan.strip() else "(no plan)"
1104
-
1105
- allow = _effective_allowlist(runtime_ns)
1106
-
1107
- def _truncate_block(text: str, *, max_chars: int) -> str:
1108
- s = str(text or "")
1109
- if max_chars <= 0:
1110
- return s
1111
- if len(s) <= max_chars:
1112
- return s
1113
- suffix = f"\n… (truncated, {len(s):,} chars total)"
1114
- keep = max_chars - len(suffix)
1115
- if keep < 200:
1116
- keep = max_chars
1117
- suffix = ""
1118
- return s[:keep].rstrip() + suffix
1119
-
1120
- def _format_allowed_tools() -> str:
1121
- # Prefer the already-computed tool_specs (created in reason_node) to avoid
1122
- # re-materializing tool definitions and to keep formatting stable.
1123
- specs = runtime_ns.get("tool_specs")
1124
- if not isinstance(specs, list) or not specs:
1125
- defs = _allowed_tool_defs(allow)
1126
- specs = [t.to_dict() for t in defs]
1127
- lines: list[str] = []
1128
- for spec in specs:
1129
- if not isinstance(spec, dict):
1130
- continue
1131
- name = str(spec.get("name") or "").strip()
1132
- if not name:
1133
- continue
1134
- params = spec.get("parameters")
1135
- props = params.get("properties", {}) if isinstance(params, dict) else {}
1136
- keys = sorted([k for k in props.keys() if isinstance(k, str)])
1137
- if keys:
1138
- lines.append(f"- {name}({', '.join(keys)})")
1139
- else:
1140
- lines.append(f"- {name}()")
1141
- return "\n".join(lines) if lines else "(no tools available)"
1142
-
1143
- # Include recent tool outputs for evidence-based review.
1144
- messages = list(context.get("messages") or [])
1145
- tool_msgs: list[str] = []
1146
- try:
1147
- tool_limit = int(limits.get("review_max_tool_output_chars", -1))
1148
- except Exception:
1149
- tool_limit = -1
1150
- try:
1151
- answer_limit = int(limits.get("review_max_answer_chars", -1))
1152
- except Exception:
1153
- answer_limit = -1
1154
-
1155
- for m in reversed(messages):
1156
- if not isinstance(m, dict) or m.get("role") != "tool":
1157
- continue
1158
- content = m.get("content")
1159
- if isinstance(content, str) and content.strip():
1160
- tool_msgs.append(_truncate_block(content.strip(), max_chars=tool_limit))
1161
- if len(tool_msgs) >= 8:
1162
- break
1163
- tool_msgs.reverse()
1164
- observations = "\n\n".join(tool_msgs) if tool_msgs else "(no tool outputs)"
1165
-
1166
- # Include recent user messages (especially ask_user responses) so the reviewer can
1167
- # avoid re-asking questions the user already answered.
1168
- try:
1169
- user_limit = int(limits.get("review_max_user_message_chars", -1))
1170
- except Exception:
1171
- user_limit = -1
1172
-
1173
- user_msgs: list[str] = []
1174
- ask_prompts: list[str] = []
1175
- for m in reversed(messages):
1176
- if not isinstance(m, dict):
1177
- continue
1178
- role = m.get("role")
1179
- content = m.get("content")
1180
- if role == "user" and isinstance(content, str) and content.strip():
1181
- if content.strip() != task.strip():
1182
- user_msgs.append(_truncate_block(content.strip(), max_chars=user_limit))
1183
- if len(user_msgs) >= 4:
1184
- break
1185
- for m in reversed(messages):
1186
- if not isinstance(m, dict):
1187
- continue
1188
- if m.get("role") != "assistant":
1189
- continue
1190
- meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
1191
- if not isinstance(meta, dict) or meta.get("kind") != "ask_user_prompt":
1192
- continue
1193
- content = m.get("content")
1194
- if isinstance(content, str) and content.strip():
1195
- ask_prompts.append(_truncate_block(content.strip(), max_chars=user_limit))
1196
- if len(ask_prompts) >= 4:
1197
- break
1198
-
1199
- user_msgs.reverse()
1200
- ask_prompts.reverse()
1201
- user_context = "\n\n".join(user_msgs) if user_msgs else "(no additional user messages)"
1202
- asked_context = "\n\n".join(ask_prompts) if ask_prompts else "(no ask_user prompts recorded)"
1203
-
1204
- # The verifier should primarily judge based on tool outputs. Only include an answer
1205
- # excerpt when we have no tool evidence (pure Q&A runs).
1206
- answer_raw = str(run.vars.get("_temp", {}).get("final_answer") or "")
1207
- answer_excerpt = ""
1208
- if not tool_msgs and answer_raw.strip():
1209
- answer_excerpt = _truncate_block(answer_raw.strip(), max_chars=answer_limit)
1210
-
1211
- prompt = (
1212
- "You are a verifier. Review whether the user's request has been fully satisfied.\n"
1213
- "Be strict: only count actions that are supported by the tool outputs.\n"
1214
- "If anything is missing, propose the NEXT ACTIONS.\n"
1215
- "Prefer returning `next_tool_calls` over `next_prompt`.\n"
1216
- "Return JSON ONLY.\n\n"
1217
- f"User request:\n{task}\n\n"
1218
- f"Plan:\n{plan_text}\n\n"
1219
- f"Recent ask_user prompts:\n{asked_context}\n\n"
1220
- f"Recent user messages:\n{user_context}\n\n"
1221
- + (f"Current answer (excerpt):\n{answer_excerpt}\n\n" if answer_excerpt else "")
1222
- + f"Tool outputs:\n{observations}\n\n"
1223
- f"Allowed tools:\n{_format_allowed_tools()}\n\n"
1224
- )
1225
-
1226
- schema = {
1227
- "type": "object",
1228
- "properties": {
1229
- "complete": {"type": "boolean"},
1230
- "missing": {"type": "array", "items": {"type": "string"}},
1231
- "next_prompt": {"type": "string"},
1232
- "next_tool_calls": {
1233
- "type": "array",
1234
- "items": {
1235
- "type": "object",
1236
- "properties": {
1237
- "name": {"type": "string"},
1238
- "arguments": {"type": "object"},
1239
- },
1240
- "required": ["name", "arguments"],
1241
- "additionalProperties": False,
1242
- },
1243
- },
1244
- },
1245
- "required": ["complete", "missing", "next_prompt", "next_tool_calls"],
1246
- "additionalProperties": False,
1247
- }
1248
-
1249
- emit("review_request", {"tool_messages": len(tool_msgs)})
1250
-
1251
- payload: Dict[str, Any] = {
1252
- "prompt": prompt,
1253
- "response_schema": schema,
1254
- "response_schema_name": "ReActVerifier",
1255
- "params": {"temperature": 0.2},
1256
- }
1257
- sys = _system_prompt(runtime_ns)
1258
- if sys is not None:
1259
- payload["system_prompt"] = sys
1260
- eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
1261
- eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
1262
- if isinstance(eff_provider, str) and eff_provider.strip():
1263
- payload["provider"] = eff_provider.strip()
1264
- if isinstance(eff_model, str) and eff_model.strip():
1265
- payload["model"] = eff_model.strip()
1266
-
1267
- return StepPlan(
1268
- node_id="review",
1269
- effect=Effect(
1270
- type=EffectType.LLM_CALL,
1271
- payload=payload,
1272
- result_key="_temp.review_llm_response",
1273
- ),
1274
- next_node="review_parse",
1275
- )
1276
-
1277
- def review_parse_node(run: RunState, ctx) -> StepPlan:
1278
- _, _, runtime_ns, temp, _ = ensure_react_vars(run)
1279
- resp = temp.get("review_llm_response", {})
1280
- if not isinstance(resp, dict):
1281
- resp = {}
1282
-
1283
- data = resp.get("data")
1284
- if data is None and isinstance(resp.get("content"), str):
1285
- try:
1286
- data = json.loads(resp["content"])
1287
- except Exception:
1288
- data = None
1289
- if not isinstance(data, dict):
1290
- data = {}
1291
-
1292
- complete = bool(data.get("complete"))
1293
- missing = data.get("missing") if isinstance(data.get("missing"), list) else []
1294
- next_prompt = data.get("next_prompt")
1295
- next_prompt_text = str(next_prompt or "").strip()
1296
- next_tool_calls_raw = data.get("next_tool_calls")
1297
- next_tool_calls: list[dict[str, Any]] = []
1298
- if isinstance(next_tool_calls_raw, list):
1299
- for item in next_tool_calls_raw:
1300
- if not isinstance(item, dict):
1301
- continue
1302
- name = str(item.get("name") or "").strip()
1303
- args = item.get("arguments")
1304
- if not isinstance(args, dict):
1305
- args = {}
1306
- if name:
1307
- next_tool_calls.append({"name": name, "arguments": args})
1308
-
1309
- emit("review", {"complete": complete, "missing": missing})
1310
- temp.pop("review_llm_response", None)
1311
-
1312
- if complete:
1313
- return StepPlan(node_id="review_parse", next_node="done")
1314
-
1315
- if next_tool_calls:
1316
- temp["pending_tool_calls"] = next_tool_calls
1317
- emit("review_tool_calls", {"count": len(next_tool_calls)})
1318
- return StepPlan(node_id="review_parse", next_node="act")
1319
-
1320
- # Behavioral validation: if incomplete but no tool calls, re-ask reviewer once with stricter rules.
1321
- if not complete and not next_tool_calls:
1322
- try:
1323
- retry_count = int(runtime_ns.get("review_retry_count") or 0)
1324
- except Exception:
1325
- retry_count = 0
1326
- if retry_count < 1:
1327
- runtime_ns["review_retry_count"] = retry_count + 1
1328
- inbox = runtime_ns.get("inbox")
1329
- if not isinstance(inbox, list):
1330
- inbox = []
1331
- runtime_ns["inbox"] = inbox
1332
- inbox.append(
1333
- {
1334
- "content": (
1335
- "[Review] Your last review output was not actionable. "
1336
- "If incomplete, you MUST return at least one `next_tool_call` "
1337
- "(use `ask_user` if you need clarification). Return JSON only."
1338
- )
1339
- }
1340
- )
1341
- emit("review_retry_unactionable", {"retry": retry_count + 1})
1342
- return StepPlan(node_id="review_parse", next_node="review")
1343
-
1344
- runtime_ns["review_retry_count"] = 0
1345
- if next_prompt_text:
1346
- inbox = runtime_ns.get("inbox")
1347
- if not isinstance(inbox, list):
1348
- inbox = []
1349
- runtime_ns["inbox"] = inbox
1350
- inbox.append({"content": f"[Review] {next_prompt_text}"})
1351
- return StepPlan(node_id="review_parse", next_node="reason")
1352
-
1353
1408
  def handle_user_response_node(run: RunState, ctx) -> StepPlan:
1354
1409
  context, _, _, temp, _ = ensure_react_vars(run)
1355
1410
  user_response = temp.get("user_response", {})
@@ -1358,9 +1413,7 @@ def create_react_workflow(
1358
1413
  response_text = str(user_response.get("response", "") or "")
1359
1414
  emit("user_response", {"response": response_text})
1360
1415
 
1361
- context["messages"].append(
1362
- _new_message(ctx, role="user", content=f"[User response]: {response_text}")
1363
- )
1416
+ context["messages"].append(_new_message(ctx, role="user", content=f"[User response]: {response_text}"))
1364
1417
  temp.pop("user_response", None)
1365
1418
 
1366
1419
  if temp.get("pending_tool_calls"):
@@ -1369,14 +1422,11 @@ def create_react_workflow(
1369
1422
 
1370
1423
  def done_node(run: RunState, ctx) -> StepPlan:
1371
1424
  context, scratchpad, _, temp, limits = ensure_react_vars(run)
1425
+ task = str(context.get("task", "") or "")
1372
1426
  answer = str(temp.get("final_answer") or "No answer provided")
1373
- emit("done", {"answer": answer})
1374
1427
 
1375
- # Prefer _limits.current_iteration, fall back to scratchpad
1376
- iterations = int(limits.get("current_iteration", 0) or scratchpad.get("iteration", 0) or 0)
1428
+ emit("done", {"answer": answer})
1377
1429
 
1378
- # Persist the final user-facing answer into the conversation history so it shows up
1379
- # in /history and becomes part of the next run's seed context.
1380
1430
  messages = context.get("messages")
1381
1431
  if isinstance(messages, list):
1382
1432
  last = messages[-1] if messages else None
@@ -1385,32 +1435,164 @@ def create_react_workflow(
1385
1435
  if last_role != "assistant" or str(last_content or "") != answer:
1386
1436
  messages.append(_new_message(ctx, role="assistant", content=answer, metadata={"kind": "final_answer"}))
1387
1437
 
1438
+ iterations = int(limits.get("current_iteration", 0) or scratchpad.get("iteration", 0) or 0)
1439
+ report = _render_final_report(task, scratchpad)
1440
+
1388
1441
  return StepPlan(
1389
1442
  node_id="done",
1390
1443
  complete_output={
1391
1444
  "answer": answer,
1445
+ "report": report,
1392
1446
  "iterations": iterations,
1393
1447
  "messages": list(context.get("messages") or []),
1448
+ "scratchpad": dict(scratchpad),
1394
1449
  },
1395
1450
  )
1396
1451
 
1397
1452
  def max_iterations_node(run: RunState, ctx) -> StepPlan:
1398
- context, scratchpad, _, _, limits = ensure_react_vars(run)
1399
-
1400
- # Prefer _limits, fall back to scratchpad
1453
+ context, scratchpad, runtime_ns, temp, limits = ensure_react_vars(run)
1401
1454
  max_iterations = int(limits.get("max_iterations", 0) or scratchpad.get("max_iterations", 25) or 25)
1402
1455
  if max_iterations < 1:
1403
1456
  max_iterations = 1
1404
1457
  emit("max_iterations", {"iterations": max_iterations})
1405
1458
 
1406
- messages = list(context.get("messages") or [])
1407
- last_content = messages[-1]["content"] if messages else "Max iterations reached"
1459
+ # Deterministic conclusion: when we hit the iteration cap, run one tool-free LLM call
1460
+ # to synthesize a final report + next steps while the scratchpad is still in context.
1461
+ resp = temp.get("max_iterations_llm_response")
1462
+ if not isinstance(resp, dict):
1463
+ drained_guidance = _drain_inbox(runtime_ns)
1464
+ conclude_directive = (
1465
+ "You have reached the maximum allowed ReAct iterations.\n"
1466
+ "You MUST stop using tools now and provide a best-effort conclusion.\n\n"
1467
+ "In your response, include:\n"
1468
+ "1) A concise progress report (what you did + key observations).\n"
1469
+ "2) The best current answer you can give based on evidence.\n"
1470
+ "3) Remaining uncertainties / missing info.\n"
1471
+ "4) Next steps: exact actions to finish (files to inspect/edit, commands/tools to run, what to look for).\n\n"
1472
+ "Rules:\n"
1473
+ "- Do NOT call tools.\n"
1474
+ "- Do NOT output tool-call markup (e.g. <tool_call>...</tool_call>).\n"
1475
+ "- Do NOT mention internal scratchpads; just present the report.\n"
1476
+ "- Prefer bullet points and concrete next steps."
1477
+ )
1478
+
1479
+ task = str(context.get("task", "") or "")
1480
+ messages_view = list(context.get("messages") or [])
1481
+
1482
+ req = logic.build_request(
1483
+ task=task,
1484
+ messages=messages_view,
1485
+ guidance="",
1486
+ iteration=max_iterations,
1487
+ max_iterations=max_iterations,
1488
+ vars=run.vars,
1489
+ )
1490
+
1491
+ payload: Dict[str, Any] = {"prompt": ""}
1492
+ sanitized_messages = _sanitize_llm_messages(messages_view)
1493
+ if sanitized_messages:
1494
+ payload["messages"] = sanitized_messages
1495
+ else:
1496
+ task_text = str(task or "").strip()
1497
+ if task_text:
1498
+ payload["prompt"] = task_text
1499
+
1500
+ media = extract_media_from_context(context)
1501
+ if media:
1502
+ payload["media"] = media
1503
+
1504
+ sys_base = str(req.system_prompt or "").strip()
1505
+ sys = _compose_system_prompt(runtime_ns, base=sys_base)
1506
+ block_parts: list[str] = []
1507
+ if drained_guidance:
1508
+ block_parts.append(f"Host guidance:\n{drained_guidance}")
1509
+ block_parts.append(conclude_directive)
1510
+ sys = (f"{sys.rstrip()}\n\n## Max iterations reached\n" + "\n\n".join(block_parts)).strip()
1511
+ scratch_txt = _render_cycles_for_conclusion_prompt(scratchpad)
1512
+ if scratch_txt:
1513
+ sys = f"{sys.rstrip()}\n\n## Scratchpad (ReAct cycles so far)\n{scratch_txt}".strip()
1514
+ if sys:
1515
+ payload["system_prompt"] = sys
1516
+
1517
+ eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
1518
+ eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
1519
+ if isinstance(eff_provider, str) and eff_provider.strip():
1520
+ payload["provider"] = eff_provider.strip()
1521
+ if isinstance(eff_model, str) and eff_model.strip():
1522
+ payload["model"] = eff_model.strip()
1523
+
1524
+ params: Dict[str, Any] = {}
1525
+ max_out = _max_output_tokens(runtime_ns, limits)
1526
+ if isinstance(max_out, int) and max_out > 0:
1527
+ params["max_tokens"] = max_out
1528
+ payload["params"] = runtime_llm_params(runtime_ns, extra=params, default_temperature=0.2)
1529
+
1530
+ return StepPlan(
1531
+ node_id="max_iterations",
1532
+ effect=Effect(type=EffectType.LLM_CALL, payload=payload, result_key="_temp.max_iterations_llm_response"),
1533
+ next_node="max_iterations",
1534
+ )
1535
+
1536
+ # We have a conclusion LLM response. Parse it and complete the run.
1537
+ content, tool_calls = logic.parse_response(resp)
1538
+ answer = str(content or "").strip()
1539
+ temp.pop("max_iterations_llm_response", None)
1540
+
1541
+ # If the model still emitted tool calls, or if it leaked tool-call markup as plain text,
1542
+ # retry once with a stricter instruction.
1543
+ tool_tags = _contains_tool_call_markup(answer)
1544
+ if tool_calls or tool_tags:
1545
+ retries = int(temp.get("max_iterations_conclude_retries", 0) or 0)
1546
+ if retries < 1:
1547
+ temp["max_iterations_conclude_retries"] = retries + 1
1548
+ _push_inbox(
1549
+ runtime_ns,
1550
+ "You are out of iterations and tool use is disabled.\n"
1551
+ "Return ONLY the final report and next steps as plain text.\n"
1552
+ "Do NOT include any tool calls or tool-call markup (e.g. <tool_call>...</tool_call>).",
1553
+ )
1554
+ return StepPlan(node_id="max_iterations", next_node="max_iterations")
1555
+ # Last resort: strip any leaked tool markup so we don't persist it as the final answer.
1556
+ answer = _strip_tool_call_markup(answer).strip()
1557
+
1558
+ if not answer:
1559
+ # Fallback: avoid returning the last tool observation as the "answer".
1560
+ # Provide a deterministic report so users don't lose scratchpad context.
1561
+ scratch_view = _render_cycles_for_conclusion_prompt(scratchpad)
1562
+ parts = [
1563
+ "Max iterations reached.",
1564
+ "I could not produce a final assistant response in time.",
1565
+ ]
1566
+ if scratch_view:
1567
+ parts.append("## Progress (from scratchpad)\n" + scratch_view)
1568
+ parts.append(
1569
+ "## Next steps\n"
1570
+ "- Increase `max_iterations` and rerun, or use `/conclude` earlier to force a wrap-up.\n"
1571
+ "- If you need me to continue, re-run with a higher iteration budget and I will pick up from the report above."
1572
+ )
1573
+ answer = "\n\n".join(parts).strip()
1574
+
1575
+ # Persist final answer into the conversation history (so it shows up in /history and seeds next runs).
1576
+ messages = context.get("messages")
1577
+ if isinstance(messages, list):
1578
+ last = messages[-1] if messages else None
1579
+ last_role = last.get("role") if isinstance(last, dict) else None
1580
+ last_content = last.get("content") if isinstance(last, dict) else None
1581
+ if last_role != "assistant" or str(last_content or "") != answer:
1582
+ messages.append(_new_message(ctx, role="assistant", content=answer, metadata={"kind": "final_answer"}))
1583
+
1584
+ temp["final_answer"] = answer
1585
+ report = _render_final_report(str(context.get("task") or ""), scratchpad)
1586
+
1587
+ iterations = int(limits.get("current_iteration", 0) or scratchpad.get("iteration", 0) or max_iterations)
1408
1588
  return StepPlan(
1409
1589
  node_id="max_iterations",
1410
1590
  complete_output={
1411
- "answer": last_content,
1412
- "iterations": max_iterations,
1413
- "messages": messages,
1591
+ "answer": answer,
1592
+ "report": report,
1593
+ "iterations": iterations,
1594
+ "messages": list(context.get("messages") or []),
1595
+ "scratchpad": dict(scratchpad),
1414
1596
  },
1415
1597
  )
1416
1598
 
@@ -1419,18 +1601,11 @@ def create_react_workflow(
1419
1601
  entry_node="init",
1420
1602
  nodes={
1421
1603
  "init": init_node,
1422
- "plan": plan_node,
1423
- "plan_parse": plan_parse_node,
1424
1604
  "reason": reason_node,
1425
- "tool_retry_minimal": tool_retry_minimal_node,
1426
- "empty_response_retry": empty_response_retry_node,
1427
1605
  "parse": parse_node,
1428
1606
  "act": act_node,
1429
1607
  "observe": observe_node,
1430
1608
  "handle_user_response": handle_user_response_node,
1431
- "maybe_review": maybe_review_node,
1432
- "review": review_node,
1433
- "review_parse": review_parse_node,
1434
1609
  "done": done_node,
1435
1610
  "max_iterations": max_iterations_node,
1436
1611
  },