abstractagent 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,10 +9,10 @@ from typing import Any, Callable, Dict, List, Optional
9
9
  from abstractcore.tools import ToolCall
10
10
  from abstractruntime import Effect, EffectType, RunState, StepPlan, WorkflowSpec
11
11
  from abstractruntime.core.vars import ensure_limits, ensure_namespaces
12
+ from abstractruntime.memory.active_context import ActiveContextPolicy
12
13
 
13
14
  from ..logic.react import ReActLogic
14
15
 
15
-
16
16
  def _new_message(
17
17
  ctx: Any,
18
18
  *,
@@ -29,11 +29,16 @@ def _new_message(
29
29
 
30
30
  timestamp = datetime.now(timezone.utc).isoformat()
31
31
 
32
+ import uuid
33
+
34
+ meta = dict(metadata or {})
35
+ meta.setdefault("message_id", f"msg_{uuid.uuid4().hex}")
36
+
32
37
  return {
33
38
  "role": role,
34
39
  "content": content,
35
40
  "timestamp": timestamp,
36
- "metadata": metadata or {},
41
+ "metadata": meta,
37
42
  }
38
43
 
39
44
 
@@ -89,6 +94,13 @@ def ensure_react_vars(run: RunState) -> tuple[Dict[str, Any], Dict[str, Any], Di
89
94
  if scratchpad["max_iterations"] < 1:
90
95
  scratchpad["max_iterations"] = 1
91
96
 
97
+ # Track whether any external tools were actually executed during this run.
98
+ # This is used to reliably trigger a final "synthesis" pass so the agent
99
+ # returns a user-facing answer instead of echoing tool observations.
100
+ used_tools = scratchpad.get("used_tools")
101
+ if not isinstance(used_tools, bool):
102
+ scratchpad["used_tools"] = bool(used_tools) if used_tools is not None else False
103
+
92
104
  return context, scratchpad, runtime_ns, temp, limits
93
105
 
94
106
 
@@ -103,6 +115,10 @@ def create_react_workflow(
103
115
  *,
104
116
  logic: ReActLogic,
105
117
  on_step: Optional[Callable[[str, Dict[str, Any]], None]] = None,
118
+ workflow_id: str = "react_agent",
119
+ provider: Optional[str] = None,
120
+ model: Optional[str] = None,
121
+ allowed_tools: Optional[List[str]] = None,
106
122
  ) -> WorkflowSpec:
107
123
  """Adapt ReActLogic to an AbstractRuntime workflow."""
108
124
 
@@ -110,9 +126,200 @@ def create_react_workflow(
110
126
  if on_step:
111
127
  on_step(step, data)
112
128
 
113
- tool_defs = logic.tools
114
- tool_specs = [t.to_dict() for t in tool_defs]
115
- toolset_id = _compute_toolset_id(tool_specs)
129
+ def _current_tool_defs() -> list[Any]:
130
+ """Return the current tool definitions from the logic (dynamic)."""
131
+ defs = getattr(logic, "tools", None)
132
+ if not isinstance(defs, list):
133
+ try:
134
+ defs = list(defs) # type: ignore[arg-type]
135
+ except Exception:
136
+ defs = []
137
+ return [t for t in defs if getattr(t, "name", None)]
138
+
139
+ def _tool_by_name() -> dict[str, Any]:
140
+ out: dict[str, Any] = {}
141
+ for t in _current_tool_defs():
142
+ name = getattr(t, "name", None)
143
+ if isinstance(name, str) and name.strip():
144
+ out[name] = t
145
+ return out
146
+
147
+ def _default_allowlist() -> list[str]:
148
+ if isinstance(allowed_tools, list):
149
+ allow = [str(t).strip() for t in allowed_tools if isinstance(t, str) and t.strip()]
150
+ return allow if allow else []
151
+ # Default allowlist: all tools currently known to the logic (deduped, order preserved).
152
+ out: list[str] = []
153
+ seen: set[str] = set()
154
+ for t in _current_tool_defs():
155
+ name = getattr(t, "name", None)
156
+ if not isinstance(name, str) or not name.strip() or name in seen:
157
+ continue
158
+ seen.add(name)
159
+ out.append(name)
160
+ return out
161
+
162
+ def _normalize_allowlist(raw: Any) -> list[str]:
163
+ items: list[Any]
164
+ if isinstance(raw, list):
165
+ items = raw
166
+ elif isinstance(raw, tuple):
167
+ items = list(raw)
168
+ elif isinstance(raw, str):
169
+ items = [raw]
170
+ else:
171
+ items = []
172
+
173
+ out: list[str] = []
174
+ seen: set[str] = set()
175
+ current = _tool_by_name()
176
+ for t in items:
177
+ if not isinstance(t, str):
178
+ continue
179
+ name = t.strip()
180
+ if not name:
181
+ continue
182
+ if name in seen:
183
+ continue
184
+ # Only accept tool names known to the workflow's logic (dynamic).
185
+ if name not in current:
186
+ continue
187
+ seen.add(name)
188
+ out.append(name)
189
+ return out
190
+
191
+ def _effective_allowlist(runtime_ns: Dict[str, Any]) -> list[str]:
192
+ # Allow runtime vars to override tool selection (Visual Agent tools pin).
193
+ if isinstance(runtime_ns, dict) and "allowed_tools" in runtime_ns:
194
+ normalized = _normalize_allowlist(runtime_ns.get("allowed_tools"))
195
+ runtime_ns["allowed_tools"] = normalized
196
+ return normalized
197
+ return _normalize_allowlist(list(_default_allowlist()))
198
+
199
+ def _allowed_tool_defs(allow: list[str]) -> list[Any]:
200
+ out: list[Any] = []
201
+ current = _tool_by_name()
202
+ for name in allow:
203
+ tool = current.get(name)
204
+ if tool is not None:
205
+ out.append(tool)
206
+ return out
207
+
208
+ def _system_prompt(runtime_ns: Dict[str, Any]) -> Optional[str]:
209
+ raw = runtime_ns.get("system_prompt") if isinstance(runtime_ns, dict) else None
210
+ if isinstance(raw, str) and raw.strip():
211
+ return raw
212
+ return None
213
+
214
+ def _sanitize_llm_messages(messages: Any, *, limits: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]:
215
+ """Convert runtime-owned message dicts into OpenAI-style {role, content, ...}.
216
+
217
+ Runtime messages can include extra metadata fields (`timestamp`, `metadata`) that many providers
218
+ will reject. Keep only the fields the LLM API expects.
219
+ """
220
+ if not isinstance(messages, list) or not messages:
221
+ return []
222
+ # Keep the LLM-visible context bounded even if the durable history contains large
223
+ # tool outputs or code dumps.
224
+ def _limit_int(key: str, default: int) -> int:
225
+ if not isinstance(limits, dict):
226
+ return default
227
+ try:
228
+ return int(limits.get(key, default))
229
+ except Exception:
230
+ return default
231
+ max_message_chars = _limit_int("max_message_chars", -1)
232
+ max_tool_message_chars = _limit_int("max_tool_message_chars", -1)
233
+
234
+ def _truncate(text: str, *, max_chars: int) -> str:
235
+ if max_chars <= 0:
236
+ return text
237
+ if len(text) <= max_chars:
238
+ return text
239
+ suffix = f"\n… (truncated, {len(text):,} chars total)"
240
+ keep = max_chars - len(suffix)
241
+ if keep < 200:
242
+ keep = max_chars
243
+ suffix = ""
244
+ return text[:keep].rstrip() + suffix
245
+
246
+ out: List[Dict[str, str]] = []
247
+ for m in messages:
248
+ if not isinstance(m, dict):
249
+ continue
250
+ role = str(m.get("role") or "").strip()
251
+ content = m.get("content")
252
+ if not role or content is None:
253
+ continue
254
+ content_str = str(content)
255
+ if not content_str.strip():
256
+ continue
257
+ limit = max_tool_message_chars if role == "tool" else max_message_chars
258
+ entry: Dict[str, str] = {"role": role, "content": _truncate(content_str, max_chars=limit)}
259
+ if role == "tool":
260
+ meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
261
+ call_id = meta.get("call_id") if isinstance(meta, dict) else None
262
+ if call_id is not None and str(call_id).strip():
263
+ # OpenAI-compatible servers accept `tool_call_id` for tool messages.
264
+ entry["tool_call_id"] = str(call_id).strip()
265
+ out.append(entry)
266
+ return out
267
+
268
+ def _flag(runtime_ns: Dict[str, Any], key: str, *, default: bool = False) -> bool:
269
+ if not isinstance(runtime_ns, dict) or key not in runtime_ns:
270
+ return bool(default)
271
+ val = runtime_ns.get(key)
272
+ if isinstance(val, bool):
273
+ return val
274
+ if isinstance(val, (int, float)):
275
+ return bool(val)
276
+ if isinstance(val, str):
277
+ lowered = val.strip().lower()
278
+ if lowered in ("1", "true", "yes", "on", "enabled"):
279
+ return True
280
+ if lowered in ("0", "false", "no", "off", "disabled"):
281
+ return False
282
+ return bool(default)
283
+
284
+ def _int(runtime_ns: Dict[str, Any], key: str, *, default: int) -> int:
285
+ if not isinstance(runtime_ns, dict) or key not in runtime_ns:
286
+ return int(default)
287
+ val = runtime_ns.get(key)
288
+ try:
289
+ return int(val) # type: ignore[arg-type]
290
+ except Exception:
291
+ return int(default)
292
+
293
+ def _extract_plan_update(content: str) -> Optional[str]:
294
+ """Extract a plan update block from model content (best-effort).
295
+
296
+ Convention (prompted in Plan mode): the model appends a final section:
297
+
298
+ Plan Update:
299
+ - [ ] ...
300
+ - [x] ...
301
+ """
302
+ if not isinstance(content, str) or not content.strip():
303
+ return None
304
+ import re
305
+
306
+ lines = content.splitlines()
307
+ header_idx: Optional[int] = None
308
+ for i, line in enumerate(lines):
309
+ if re.match(r"(?i)^\s*plan\s*update\s*:\s*$", line.strip()):
310
+ header_idx = i
311
+ if header_idx is None:
312
+ return None
313
+ plan_lines = lines[header_idx + 1 :]
314
+ while plan_lines and not plan_lines[0].strip():
315
+ plan_lines.pop(0)
316
+ plan_text = "\n".join(plan_lines).strip()
317
+ if not plan_text:
318
+ return None
319
+ # Require at least one bullet/numbered line to avoid accidental captures.
320
+ if not re.search(r"(?m)^\s*(?:[-*]|\d+\.)\s+", plan_text):
321
+ return None
322
+ return plan_text
116
323
 
117
324
  def init_node(run: RunState, ctx) -> StepPlan:
118
325
  context, scratchpad, runtime_ns, _, limits = ensure_react_vars(run)
@@ -127,13 +334,75 @@ def create_react_workflow(
127
334
  messages.append(_new_message(ctx, role="user", content=task))
128
335
 
129
336
  # Ensure toolset metadata is present for audit/debug.
130
- runtime_ns.setdefault("tool_specs", tool_specs)
131
- runtime_ns.setdefault("toolset_id", toolset_id)
337
+ allow = _effective_allowlist(runtime_ns)
338
+ allowed_defs = _allowed_tool_defs(allow)
339
+ tool_specs = [t.to_dict() for t in allowed_defs]
340
+ runtime_ns["tool_specs"] = tool_specs
341
+ runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
342
+ runtime_ns.setdefault("allowed_tools", allow)
132
343
  runtime_ns.setdefault("inbox", [])
133
344
 
134
345
  emit("init", {"task": task})
346
+ if _flag(runtime_ns, "plan_mode", default=False) and not isinstance(scratchpad.get("plan"), str):
347
+ return StepPlan(node_id="init", next_node="plan")
135
348
  return StepPlan(node_id="init", next_node="reason")
136
349
 
350
+ def plan_node(run: RunState, ctx) -> StepPlan:
351
+ context, scratchpad, runtime_ns, _, _ = ensure_react_vars(run)
352
+ task = str(context.get("task", "") or "")
353
+
354
+ allow = _effective_allowlist(runtime_ns)
355
+
356
+ prompt = (
357
+ "You are preparing a high-level execution plan for the user's request.\n"
358
+ "Return a concise TODO list (5–12 steps) that is actionable and verifiable.\n"
359
+ "Do not call tools yet. Do not include role prefixes like 'assistant:'.\n\n"
360
+ f"User request:\n{task}\n\n"
361
+ "Plan (markdown checklist):\n"
362
+ "- [ ] ...\n"
363
+ )
364
+
365
+ emit("plan_request", {"tools": allow})
366
+
367
+ payload: Dict[str, Any] = {"prompt": prompt, "params": {"temperature": 0.2}}
368
+ sys = _system_prompt(runtime_ns)
369
+ if isinstance(sys, str) and sys.strip():
370
+ payload["system_prompt"] = sys
371
+ eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
372
+ eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
373
+ if isinstance(eff_provider, str) and eff_provider.strip():
374
+ payload["provider"] = eff_provider.strip()
375
+ if isinstance(eff_model, str) and eff_model.strip():
376
+ payload["model"] = eff_model.strip()
377
+
378
+ return StepPlan(
379
+ node_id="plan",
380
+ effect=Effect(
381
+ type=EffectType.LLM_CALL,
382
+ payload=payload,
383
+ result_key="_temp.plan_llm_response",
384
+ ),
385
+ next_node="plan_parse",
386
+ )
387
+
388
+ def plan_parse_node(run: RunState, ctx) -> StepPlan:
389
+ context, scratchpad, _, temp, _ = ensure_react_vars(run)
390
+ resp = temp.get("plan_llm_response", {})
391
+ if not isinstance(resp, dict):
392
+ resp = {}
393
+ plan_text = resp.get("content")
394
+ plan = "" if plan_text is None else str(plan_text).strip()
395
+ if not plan and isinstance(resp.get("data"), dict):
396
+ plan = json.dumps(resp.get("data"), ensure_ascii=False, indent=2).strip()
397
+
398
+ scratchpad["plan"] = plan
399
+ temp.pop("plan_llm_response", None)
400
+
401
+ if plan:
402
+ context["messages"].append(_new_message(ctx, role="assistant", content=plan, metadata={"kind": "plan"}))
403
+ emit("plan", {"plan": plan})
404
+ return StepPlan(node_id="plan_parse", next_node="reason")
405
+
137
406
  def reason_node(run: RunState, ctx) -> StepPlan:
138
407
  context, scratchpad, runtime_ns, _, limits = ensure_react_vars(run)
139
408
 
@@ -157,7 +426,19 @@ def create_react_workflow(
157
426
  limits["current_iteration"] = iteration + 1
158
427
 
159
428
  task = str(context.get("task", "") or "")
160
- messages = context["messages"]
429
+ messages_view = ActiveContextPolicy.select_active_messages_for_llm_from_run(run)
430
+
431
+ # Refresh tool metadata BEFORE rendering Active Memory so token fitting stays accurate
432
+ # (even though we do not render a "Tools (session)" block into Active Memory prompts).
433
+ allow = _effective_allowlist(runtime_ns)
434
+ allowed_defs = _allowed_tool_defs(allow)
435
+ tool_specs = [t.to_dict() for t in allowed_defs]
436
+ include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
437
+ if not include_examples:
438
+ tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
439
+ runtime_ns["tool_specs"] = tool_specs
440
+ runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
441
+ runtime_ns.setdefault("allowed_tools", allow)
161
442
 
162
443
  inbox = runtime_ns.get("inbox", [])
163
444
  guidance = ""
@@ -165,10 +446,9 @@ def create_react_workflow(
165
446
  inbox_messages = [str(m.get("content", "") or "") for m in inbox if isinstance(m, dict)]
166
447
  guidance = " | ".join([m for m in inbox_messages if m])
167
448
  runtime_ns["inbox"] = []
168
-
169
449
  req = logic.build_request(
170
450
  task=task,
171
- messages=messages,
451
+ messages=messages_view,
172
452
  guidance=guidance,
173
453
  iteration=iteration + 1,
174
454
  max_iterations=max_iterations,
@@ -177,9 +457,34 @@ def create_react_workflow(
177
457
 
178
458
  emit("reason", {"iteration": iteration + 1, "max_iterations": max_iterations, "has_guidance": bool(guidance)})
179
459
 
180
- payload = {"prompt": req.prompt, "tools": [t.to_dict() for t in req.tools]}
460
+ # Provide the selected active-context messages as proper chat messages (sanitized).
461
+ #
462
+ # IMPORTANT: When we send `messages`, do not also send a non-empty `prompt`.
463
+ # Some providers/servers will append `prompt` as an extra user message even when the
464
+ # current request is already present in `messages`, which duplicates user turns and
465
+ # wastes context budget.
466
+ payload: Dict[str, Any] = {"prompt": ""}
467
+ payload["messages"] = _sanitize_llm_messages(messages_view, limits=limits)
468
+ tools_payload = list(tool_specs)
469
+ if tools_payload:
470
+ payload["tools"] = tools_payload
471
+ sys = _system_prompt(runtime_ns) or req.system_prompt
472
+ if isinstance(sys, str) and sys.strip():
473
+ payload["system_prompt"] = sys
474
+ # Provider/model can be configured statically (create_react_workflow args)
475
+ # or injected dynamically through durable vars in `_runtime` (Visual Agent pins).
476
+ eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
477
+ eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
478
+ if isinstance(eff_provider, str) and eff_provider.strip():
479
+ payload["provider"] = eff_provider.strip()
480
+ if isinstance(eff_model, str) and eff_model.strip():
481
+ payload["model"] = eff_model.strip()
482
+ params: Dict[str, Any] = {}
181
483
  if req.max_tokens is not None:
182
- payload["params"] = {"max_tokens": req.max_tokens}
484
+ params["max_tokens"] = req.max_tokens
485
+ # Tool calling is formatting-sensitive; bias toward deterministic output when tools are present.
486
+ params["temperature"] = 0.2 if tools_payload else 0.7
487
+ payload["params"] = params
183
488
 
184
489
  return StepPlan(
185
490
  node_id="reason",
@@ -191,96 +496,525 @@ def create_react_workflow(
191
496
  next_node="parse",
192
497
  )
193
498
 
499
+ def tool_retry_minimal_node(run: RunState, ctx) -> StepPlan:
500
+ """Recovery path when the model fabricates `observation[...]` logs instead of calling tools.
501
+
502
+ This intentionally sends a minimal prompt (no History/Scratchpad) to reduce
503
+ long-context contamination and force either a real tool call or a direct answer.
504
+ """
505
+ context, scratchpad, runtime_ns, temp, _ = ensure_react_vars(run)
506
+ task = str(context.get("task", "") or "")
507
+
508
+ allow = _effective_allowlist(runtime_ns)
509
+ allowed_defs = _allowed_tool_defs(allow)
510
+ tool_specs = [t.to_dict() for t in allowed_defs]
511
+ include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
512
+ if not include_examples:
513
+ tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
514
+ runtime_ns["tool_specs"] = tool_specs
515
+ runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
516
+ runtime_ns.setdefault("allowed_tools", allow)
517
+ # Reuse the canonical agent rules from ReActLogic (but do not include history in prompt).
518
+ sys_req = logic.build_request(task=task, messages=[], guidance="", iteration=0, max_iterations=0, vars=run.vars)
519
+
520
+ bad_excerpt = str(temp.get("tool_retry_bad_content") or "").strip()
521
+ temp.pop("tool_retry_bad_content", None)
522
+ if len(bad_excerpt) > 240:
523
+ bad_excerpt = bad_excerpt[:240].rstrip() + "…"
524
+
525
+ prompt = (
526
+ "Task:\n"
527
+ f"{task}\n\n"
528
+ "Your previous message was invalid: it contained fabricated `observation[...]` tool logs, but no tool was called.\n\n"
529
+ "Now do ONE of the following:\n"
530
+ "1) If you need more information to answer correctly, CALL ONE OR MORE TOOLS now using the required tool call format.\n"
531
+ "2) If you can answer without tools, answer directly WITHOUT mentioning any tool calls or observations.\n\n"
532
+ "Rules:\n"
533
+ "- Do NOT write `observation[` anywhere.\n"
534
+ "- Do NOT fabricate tool results.\n"
535
+ "- If you call tools, output ONLY tool call block(s) (no extra text).\n"
536
+ "- You MAY batch multiple tool calls by repeating the tool-call block once per call (prefer independent calls).\n"
537
+ )
538
+ if bad_excerpt:
539
+ prompt += f"\nBad output excerpt (do not copy):\n{bad_excerpt}\n"
540
+
541
+ payload: Dict[str, Any] = {"prompt": prompt}
542
+ if tool_specs:
543
+ payload["tools"] = tool_specs
544
+ sys = _system_prompt(runtime_ns) or sys_req.system_prompt
545
+ if isinstance(sys, str) and sys.strip():
546
+ payload["system_prompt"] = sys
547
+
548
+ eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
549
+ eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
550
+ if isinstance(eff_provider, str) and eff_provider.strip():
551
+ payload["provider"] = eff_provider.strip()
552
+ if isinstance(eff_model, str) and eff_model.strip():
553
+ payload["model"] = eff_model.strip()
554
+
555
+ payload["params"] = {"temperature": 0.2}
556
+
557
+ emit("tool_retry_minimal", {"tools": allow, "has_excerpt": bool(bad_excerpt)})
558
+ return StepPlan(
559
+ node_id="tool_retry_minimal",
560
+ effect=Effect(
561
+ type=EffectType.LLM_CALL,
562
+ payload=payload,
563
+ result_key="_temp.llm_response",
564
+ ),
565
+ next_node="parse",
566
+ )
567
+
568
+ def empty_response_retry_node(run: RunState, ctx) -> StepPlan:
569
+ """Recovery path when the model returns an empty message (no content, no tool calls).
570
+
571
+ This is treated as an invalid agent step. We re-prompt with the original task plus
572
+ recent tool evidence and explicitly require either tool calls or a substantive answer.
573
+ """
574
+ context, scratchpad, runtime_ns, _, _ = ensure_react_vars(run)
575
+ task = str(context.get("task", "") or "")
576
+
577
+ allow = _effective_allowlist(runtime_ns)
578
+ allowed_defs = _allowed_tool_defs(allow)
579
+ tool_specs = [t.to_dict() for t in allowed_defs]
580
+ include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
581
+ if not include_examples:
582
+ tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
583
+ runtime_ns["tool_specs"] = tool_specs
584
+ runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
585
+ runtime_ns.setdefault("allowed_tools", allow)
586
+
587
+ # Include recent tool outputs and user messages as evidence (bounded).
588
+ messages = list(context.get("messages") or [])
589
+ evidence_lines: list[str] = []
590
+ tool_count = 0
591
+ user_count = 0
592
+ for m in reversed(messages):
593
+ if not isinstance(m, dict):
594
+ continue
595
+ role = m.get("role")
596
+ content = m.get("content")
597
+ if role == "tool" and isinstance(content, str) and content.strip():
598
+ evidence_lines.append(content.strip())
599
+ tool_count += 1
600
+ elif role == "user" and isinstance(content, str) and content.strip():
601
+ # Avoid duplicating the original task.
602
+ if content.strip() != task.strip():
603
+ evidence_lines.append(content.strip())
604
+ user_count += 1
605
+ if tool_count >= 6 and user_count >= 2:
606
+ break
607
+ evidence_lines.reverse()
608
+ evidence = "\n\n".join(evidence_lines) if evidence_lines else "(no prior evidence captured)"
609
+
610
+ # Build a strong corrective prompt. Prefer tools; allow a direct answer if truly possible.
611
+ prompt = (
612
+ "The previous assistant message was EMPTY (no content and no tool calls). This is invalid.\n"
613
+ "Recover by continuing the task using the evidence below.\n\n"
614
+ f"Task:\n{task}\n\n"
615
+ f"Evidence (recent tool outputs + user messages):\n{evidence}\n\n"
616
+ "Now do EXACTLY ONE of the following:\n"
617
+ "1) CALL one or more tools to make progress (preferred).\n"
618
+ "2) If you already have enough evidence, provide a concise final answer.\n\n"
619
+ "Rules:\n"
620
+ "- Do not output an empty message.\n"
621
+ "- Do not ask the user a question in plain text; use the `ask_user` tool.\n"
622
+ "- If you call tools, include the tool call(s) directly (no preamble).\n"
623
+ )
624
+
625
+ payload: Dict[str, Any] = {"prompt": prompt}
626
+ if tool_specs:
627
+ payload["tools"] = list(tool_specs)
628
+ sys = _system_prompt(runtime_ns)
629
+ if isinstance(sys, str) and sys.strip():
630
+ payload["system_prompt"] = sys
631
+ eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
632
+ eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
633
+ if isinstance(eff_provider, str) and eff_provider.strip():
634
+ payload["provider"] = eff_provider.strip()
635
+ if isinstance(eff_model, str) and eff_model.strip():
636
+ payload["model"] = eff_model.strip()
637
+ payload["params"] = {"temperature": 0.2}
638
+
639
+ emit("empty_response_retry", {"tools": allow, "evidence": bool(evidence_lines)})
640
+ return StepPlan(
641
+ node_id="empty_response_retry",
642
+ effect=Effect(type=EffectType.LLM_CALL, payload=payload, result_key="_temp.llm_response"),
643
+ next_node="parse",
644
+ )
645
+
194
646
  def parse_node(run: RunState, ctx) -> StepPlan:
195
- context, _, _, temp, _ = ensure_react_vars(run)
647
+ context, scratchpad, runtime_ns, temp, _ = ensure_react_vars(run)
196
648
  response = temp.get("llm_response", {})
197
649
  content, tool_calls = logic.parse_response(response)
198
650
 
199
- context["messages"].append(_new_message(ctx, role="assistant", content=content))
651
+ def _sanitize_tool_call_content(text: str) -> str:
652
+ """Remove tool-transcript markers from assistant content before persisting to history.
653
+
654
+ Some OSS models may include internal transcript artifacts (e.g. fabricated
655
+ `observation[...]` lines) or embed the tool call itself inside the message
656
+ (`Action:` blocks). We keep only the user-facing prose that appears *before*
657
+ such markers so the runtime doesn't persist fabricated logs into context.
658
+ """
659
+ if not isinstance(text, str) or not text.strip():
660
+ return ""
661
+ out_lines: list[str] = []
662
+ for line in text.splitlines():
663
+ lowered = line.lstrip().lower()
664
+ if lowered.startswith("observation["):
665
+ break
666
+ if lowered.startswith("action:"):
667
+ break
668
+ if lowered.startswith("<|tool_call|>") or lowered.startswith("<tool_call>"):
669
+ break
670
+ if lowered.startswith("```tool_call") or lowered.startswith("```tool_code"):
671
+ break
672
+ out_lines.append(line)
673
+ return "\n".join(out_lines).rstrip()
674
+
675
+ def _should_retry_for_missing_tool_call(text: str) -> bool:
676
+ if not isinstance(text, str) or not text.strip():
677
+ return False
678
+ # Some models echo our internal History formatting (e.g. `observation[web_search] (success): ...`)
679
+ # as transcript lines. Treat only *line-start* occurrences as suspicious (avoid false positives
680
+ # in JSON/code blocks), and only use this signal when no tools have actually run yet.
681
+ for line in text.splitlines():
682
+ if line.lstrip().lower().startswith("observation["):
683
+ return True
684
+ return False
685
+
686
+ def _extract_final_answer(text: str) -> tuple[bool, str]:
687
+ """Return (is_explicit_final, stripped_answer)."""
688
+ if not isinstance(text, str) or not text.strip():
689
+ return False, ""
690
+ s = text.lstrip()
691
+ if s.upper().startswith("FINAL:"):
692
+ return True, s[len("FINAL:") :].lstrip()
693
+ return False, text
200
694
 
201
695
  emit(
202
696
  "parse",
203
697
  {
204
698
  "has_tool_calls": bool(tool_calls),
205
- "content_preview": content[:100] if content else "(no content)",
699
+ "content": content,
700
+ "tool_calls": [{"name": tc.name, "arguments": tc.arguments, "call_id": tc.call_id} for tc in tool_calls],
206
701
  },
207
702
  )
208
703
  temp.pop("llm_response", None)
209
704
 
705
+ # Reset retry counter on any successful tool-call detection.
210
706
  if tool_calls:
707
+ scratchpad["tool_retry_count"] = 0
708
+ scratchpad["tool_retry_minimal_used"] = False
709
+
710
+ if tool_calls:
711
+ clean = _sanitize_tool_call_content(content)
712
+ if clean.strip():
713
+ context["messages"].append(_new_message(ctx, role="assistant", content=clean))
714
+ if _flag(runtime_ns, "plan_mode", default=False):
715
+ updated = _extract_plan_update(clean)
716
+ if isinstance(updated, str) and updated.strip():
717
+ scratchpad["plan"] = updated.strip()
211
718
  temp["pending_tool_calls"] = [tc.__dict__ for tc in tool_calls]
212
719
  return StepPlan(node_id="parse", next_node="act")
213
720
 
214
- temp["final_answer"] = content
215
- return StepPlan(node_id="parse", next_node="done")
721
+ # Empty response is an invalid step: recover with a bounded retry that carries evidence.
722
+ if not isinstance(content, str) or not content.strip():
723
+ try:
724
+ empty_retries = int(scratchpad.get("empty_response_retry_count") or 0)
725
+ except Exception:
726
+ empty_retries = 0
727
+
728
+ if empty_retries < 2:
729
+ scratchpad["empty_response_retry_count"] = empty_retries + 1
730
+ emit("parse_retry_empty_response", {"retries": empty_retries + 1})
731
+ return StepPlan(node_id="parse", next_node="empty_response_retry")
732
+
733
+ safe = (
734
+ "I can't proceed: the model repeatedly returned empty outputs (no content, no tool calls).\n"
735
+ "Please retry, reduce context, or switch models."
736
+ )
737
+ context["messages"].append(_new_message(ctx, role="assistant", content=safe, metadata={"kind": "error"}))
738
+ temp["final_answer"] = safe
739
+ temp["pending_tool_calls"] = []
740
+ scratchpad["empty_response_retry_count"] = 0
741
+ return StepPlan(node_id="parse", next_node="maybe_review")
742
+
743
+ # If the model appears to have produced a fake "observation[tool]" transcript instead of
744
+ # calling tools, give it one corrective retry before treating the message as final.
745
+ if not bool(scratchpad.get("used_tools")) and _should_retry_for_missing_tool_call(content):
746
+ try:
747
+ retries = int(scratchpad.get("tool_retry_count") or 0)
748
+ except Exception:
749
+ retries = 0
750
+ if retries < 2:
751
+ scratchpad["tool_retry_count"] = retries + 1
752
+ inbox = runtime_ns.get("inbox")
753
+ if not isinstance(inbox, list):
754
+ inbox = []
755
+ runtime_ns["inbox"] = inbox
756
+ inbox.append(
757
+ {
758
+ "role": "system",
759
+ "content": (
760
+ "You wrote an `observation[...]` line, but no tool was actually called.\n"
761
+ "Do NOT fabricate tool outputs.\n"
762
+ "If you need to search/fetch/read/write, CALL a tool now using the required tool call format.\n"
763
+ "Never output `observation[...]` markers; those are context-only."
764
+ ),
765
+ }
766
+ )
767
+ emit("parse_retry_missing_tool_call", {"retries": retries + 1})
768
+ return StepPlan(node_id="parse", next_node="reason")
769
+
770
+ # If the model still fails after retries, attempt a single minimal-context recovery call
771
+ # instead of accepting a fabricated transcript as the final answer.
772
+ if not bool(scratchpad.get("tool_retry_minimal_used")):
773
+ scratchpad["tool_retry_minimal_used"] = True
774
+ scratchpad["tool_retry_count"] = 0
775
+ temp["tool_retry_bad_content"] = content
776
+ emit("parse_retry_minimal_context", {"retries": retries})
777
+ return StepPlan(node_id="parse", next_node="tool_retry_minimal")
778
+
779
+ safe = (
780
+ "I can't proceed safely: the model repeatedly produced fabricated `observation[...]` tool logs instead of calling tools.\n"
781
+ "Please retry, reduce context, or switch models."
782
+ )
783
+ context["messages"].append(_new_message(ctx, role="assistant", content=safe, metadata={"kind": "error"}))
784
+ temp["final_answer"] = safe
785
+ scratchpad["tool_retry_count"] = 0
786
+ return StepPlan(node_id="parse", next_node="maybe_review")
787
+
788
+ final_raw = _sanitize_tool_call_content(content)
789
+ if not final_raw.strip():
790
+ final_raw = str(content or "").strip()
791
+
792
+ is_final, final_text = _extract_final_answer(final_raw)
793
+ if is_final:
794
+ if final_text:
795
+ context["messages"].append(_new_message(ctx, role="assistant", content=final_text))
796
+ if _flag(runtime_ns, "plan_mode", default=False):
797
+ updated = _extract_plan_update(final_text)
798
+ if isinstance(updated, str) and updated.strip():
799
+ scratchpad["plan"] = updated.strip()
800
+ temp["final_answer"] = final_text or "No answer provided"
801
+ temp["pending_tool_calls"] = []
802
+ scratchpad["tool_retry_count"] = 0
803
+ return StepPlan(node_id="parse", next_node="maybe_review")
804
+
805
+ # Default: treat as a normal final answer even if it lacks an explicit FINAL marker.
806
+ final = final_raw
807
+ if final:
808
+ context["messages"].append(_new_message(ctx, role="assistant", content=final))
809
+ if _flag(runtime_ns, "plan_mode", default=False):
810
+ updated = _extract_plan_update(final)
811
+ if isinstance(updated, str) and updated.strip():
812
+ scratchpad["plan"] = updated.strip()
813
+
814
+ temp["final_answer"] = final or "No answer provided"
815
+ temp["pending_tool_calls"] = []
816
+ scratchpad["tool_retry_count"] = 0
817
+ scratchpad["empty_response_retry_count"] = 0
818
+ return StepPlan(node_id="parse", next_node="maybe_review")
216
819
 
217
820
  def act_node(run: RunState, ctx) -> StepPlan:
218
- _, _, _, temp, _ = ensure_react_vars(run)
219
- tool_calls = temp.get("pending_tool_calls", [])
220
- if not isinstance(tool_calls, list):
221
- tool_calls = []
222
-
223
- if not tool_calls:
821
+ # Treat `_temp.pending_tool_calls` as a durable queue.
822
+ # This avoids dropping calls when schema-only tools (ask_user/memory/etc.) are interleaved
823
+ # with normal tools, and avoids re-asking the same question due to missing context.
824
+ context, scratchpad, runtime_ns, temp, _ = ensure_react_vars(run)
825
+ raw_queue = temp.get("pending_tool_calls", [])
826
+ if not isinstance(raw_queue, list) or not raw_queue:
827
+ temp["pending_tool_calls"] = []
224
828
  return StepPlan(node_id="act", next_node="reason")
225
829
 
226
- # Handle ask_user specially with ASK_USER effect.
227
- for i, tc in enumerate(tool_calls):
228
- if not isinstance(tc, dict):
229
- continue
230
- if tc.get("name") != "ask_user":
830
+ allow = _effective_allowlist(runtime_ns)
831
+ builtin_effect_tools = {
832
+ "ask_user",
833
+ "recall_memory",
834
+ "inspect_vars",
835
+ "remember",
836
+ "remember_note",
837
+ "compact_memory",
838
+ }
839
+
840
+ # Normalize queue items and assign stable call_ids once so splitting into batches does not
841
+ # introduce duplicate ids.
842
+ tool_queue: List[Dict[str, Any]] = []
843
+ for idx, item in enumerate(raw_queue, start=1):
844
+ if isinstance(item, ToolCall):
845
+ d: Dict[str, Any] = {"name": item.name, "arguments": item.arguments, "call_id": item.call_id}
846
+ elif isinstance(item, dict):
847
+ d = dict(item)
848
+ else:
231
849
  continue
850
+ call_id = str(d.get("call_id") or "").strip()
851
+ if not call_id:
852
+ d["call_id"] = str(idx)
853
+ tool_queue.append(d)
854
+
855
+ if not tool_queue:
856
+ temp["pending_tool_calls"] = []
857
+ return StepPlan(node_id="act", next_node="reason")
858
+
859
+ def _is_builtin(tc: Dict[str, Any]) -> bool:
860
+ name = tc.get("name")
861
+ return isinstance(name, str) and name in builtin_effect_tools
862
+
863
+ # Execute one schema-only builtin (if it is next), otherwise execute the longest contiguous
864
+ # prefix of normal tools. Leave the remainder queued for subsequent act/observe cycles.
865
+ if _is_builtin(tool_queue[0]):
866
+ tc = tool_queue[0]
867
+ name = str(tc.get("name") or "").strip()
232
868
  args = tc.get("arguments") or {}
233
- question = str(args.get("question") or "Please provide input:")
234
- choices = args.get("choices")
235
- choices = list(choices) if isinstance(choices, list) else None
236
-
237
- temp["pending_tool_calls"] = tool_calls[i + 1 :]
238
- emit("ask_user", {"question": question, "choices": choices or []})
239
- return StepPlan(
240
- node_id="act",
241
- effect=Effect(
242
- type=EffectType.ASK_USER,
243
- payload={"prompt": question, "choices": choices, "allow_free_text": True},
244
- result_key="_temp.user_response",
245
- ),
246
- next_node="handle_user_response",
247
- )
869
+ if not isinstance(args, dict):
870
+ args = {}
871
+
872
+ # Pop the builtin from the queue.
873
+ temp["pending_tool_calls"] = list(tool_queue[1:])
874
+
875
+ if name and name not in allow:
876
+ temp["tool_results"] = {
877
+ "results": [
878
+ {
879
+ "call_id": str(tc.get("call_id") or ""),
880
+ "name": name,
881
+ "success": False,
882
+ "output": None,
883
+ "error": f"Tool '{name}' is not allowed for this agent",
884
+ }
885
+ ]
886
+ }
887
+ emit("act_blocked", {"tool": name})
888
+ return StepPlan(node_id="act", next_node="observe")
889
+
890
+ if name == "ask_user":
891
+ question = str(args.get("question") or "Please provide input:")
892
+ choices = args.get("choices")
893
+ choices = list(choices) if isinstance(choices, list) else None
894
+
895
+ # Persist the asked question in the durable message history so both the main model
896
+ # and the reviewer can see what was asked (and avoid re-asking).
897
+ msgs = context.get("messages")
898
+ if isinstance(msgs, list):
899
+ content = f"[Agent question]: {question}"
900
+ last = msgs[-1] if msgs else None
901
+ last_role = last.get("role") if isinstance(last, dict) else None
902
+ last_meta = last.get("metadata") if isinstance(last, dict) else None
903
+ last_kind = last_meta.get("kind") if isinstance(last_meta, dict) else None
904
+ last_content = last.get("content") if isinstance(last, dict) else None
905
+ if not (last_role == "assistant" and last_kind == "ask_user_prompt" and str(last_content or "") == content):
906
+ msgs.append(_new_message(ctx, role="assistant", content=content, metadata={"kind": "ask_user_prompt"}))
907
+
908
+ emit("ask_user", {"question": question, "choices": choices or []})
909
+ return StepPlan(
910
+ node_id="act",
911
+ effect=Effect(
912
+ type=EffectType.ASK_USER,
913
+ payload={"prompt": question, "choices": choices, "allow_free_text": True},
914
+ result_key="_temp.user_response",
915
+ ),
916
+ next_node="handle_user_response",
917
+ )
248
918
 
249
- for tc in tool_calls:
250
- if isinstance(tc, dict):
251
- emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {})})
919
+ if name == "recall_memory":
920
+ payload = dict(args)
921
+ payload.setdefault("tool_name", "recall_memory")
922
+ payload.setdefault("call_id", tc.get("call_id") or "memory")
923
+ emit("memory_query", {"query": payload.get("query"), "span_id": payload.get("span_id")})
924
+ return StepPlan(
925
+ node_id="act",
926
+ effect=Effect(type=EffectType.MEMORY_QUERY, payload=payload, result_key="_temp.tool_results"),
927
+ next_node="observe",
928
+ )
252
929
 
253
- formatted_calls: List[Dict[str, Any]] = []
254
- for tc in tool_calls:
255
- if isinstance(tc, dict):
256
- formatted_calls.append(
257
- {
258
- "name": tc.get("name", ""),
259
- "arguments": tc.get("arguments", {}),
260
- "call_id": tc.get("call_id", "1"),
261
- }
930
+ if name == "inspect_vars":
931
+ payload = dict(args)
932
+ payload.setdefault("tool_name", "inspect_vars")
933
+ payload.setdefault("call_id", tc.get("call_id") or "vars")
934
+ emit("vars_query", {"path": payload.get("path")})
935
+ return StepPlan(
936
+ node_id="act",
937
+ effect=Effect(type=EffectType.VARS_QUERY, payload=payload, result_key="_temp.tool_results"),
938
+ next_node="observe",
939
+ )
940
+
941
+ if name == "remember":
942
+ payload = dict(args)
943
+ payload.setdefault("tool_name", "remember")
944
+ payload.setdefault("call_id", tc.get("call_id") or "memory")
945
+ emit("memory_tag", {"span_id": payload.get("span_id"), "tags": payload.get("tags")})
946
+ return StepPlan(
947
+ node_id="act",
948
+ effect=Effect(type=EffectType.MEMORY_TAG, payload=payload, result_key="_temp.tool_results"),
949
+ next_node="observe",
262
950
  )
263
- elif isinstance(tc, ToolCall):
264
- formatted_calls.append(
951
+
952
+ if name == "remember_note":
953
+ payload = dict(args)
954
+ payload.setdefault("tool_name", "remember_note")
955
+ payload.setdefault("call_id", tc.get("call_id") or "memory")
956
+ emit("memory_note", {"note": payload.get("note"), "tags": payload.get("tags")})
957
+ return StepPlan(
958
+ node_id="act",
959
+ effect=Effect(type=EffectType.MEMORY_NOTE, payload=payload, result_key="_temp.tool_results"),
960
+ next_node="observe",
961
+ )
962
+
963
+ if name == "compact_memory":
964
+ payload = dict(args)
965
+ payload.setdefault("tool_name", "compact_memory")
966
+ payload.setdefault("call_id", tc.get("call_id") or "compact")
967
+ emit(
968
+ "memory_compact",
265
969
  {
266
- "name": tc.name,
267
- "arguments": tc.arguments,
268
- "call_id": tc.call_id or "1",
269
- }
970
+ "preserve_recent": payload.get("preserve_recent"),
971
+ "mode": payload.get("compression_mode"),
972
+ "focus": payload.get("focus"),
973
+ },
974
+ )
975
+ return StepPlan(
976
+ node_id="act",
977
+ effect=Effect(type=EffectType.MEMORY_COMPACT, payload=payload, result_key="_temp.tool_results"),
978
+ next_node="observe",
270
979
  )
271
980
 
981
+ # Unknown builtin: continue with the queue (best-effort).
982
+ if temp.get("pending_tool_calls"):
983
+ return StepPlan(node_id="act", next_node="act")
984
+ return StepPlan(node_id="act", next_node="reason")
985
+
986
+ # Normal tools: execute contiguous prefix until the next builtin.
987
+ batch: List[Dict[str, Any]] = []
988
+ for tc in tool_queue:
989
+ if _is_builtin(tc):
990
+ break
991
+ batch.append(tc)
992
+
993
+ remaining = tool_queue[len(batch) :]
994
+ temp["pending_tool_calls"] = list(remaining)
995
+
996
+ # Emit observability events for the batch.
997
+ for tc in batch:
998
+ emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")})
999
+
1000
+ formatted_calls: List[Dict[str, Any]] = []
1001
+ for tc in batch:
1002
+ formatted_calls.append(
1003
+ {"name": tc.get("name", ""), "arguments": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")}
1004
+ )
1005
+
272
1006
  return StepPlan(
273
1007
  node_id="act",
274
1008
  effect=Effect(
275
1009
  type=EffectType.TOOL_CALLS,
276
- payload={"tool_calls": formatted_calls},
1010
+ payload={"tool_calls": formatted_calls, "allowed_tools": list(allow)},
277
1011
  result_key="_temp.tool_results",
278
1012
  ),
279
1013
  next_node="observe",
280
1014
  )
281
1015
 
282
1016
  def observe_node(run: RunState, ctx) -> StepPlan:
283
- context, _, _, temp, _ = ensure_react_vars(run)
1017
+ context, scratchpad, _, temp, _ = ensure_react_vars(run)
284
1018
  tool_results = temp.get("tool_results", {})
285
1019
  if not isinstance(tool_results, dict):
286
1020
  tool_results = {}
@@ -288,6 +1022,16 @@ def create_react_workflow(
288
1022
  results = tool_results.get("results", [])
289
1023
  if not isinstance(results, list):
290
1024
  results = []
1025
+ if results:
1026
+ scratchpad["used_tools"] = True
1027
+
1028
+ # Prefer a tool-supplied human/LLM-friendly rendering when present.
1029
+ def _display(v: Any) -> str:
1030
+ if isinstance(v, dict):
1031
+ rendered = v.get("rendered")
1032
+ if isinstance(rendered, str) and rendered.strip():
1033
+ return rendered.strip()
1034
+ return "" if v is None else str(v)
291
1035
 
292
1036
  for r in results:
293
1037
  if not isinstance(r, dict):
@@ -296,12 +1040,17 @@ def create_react_workflow(
296
1040
  success = bool(r.get("success"))
297
1041
  output = r.get("output", "")
298
1042
  error = r.get("error", "")
1043
+ display = _display(output)
1044
+ if not success:
1045
+ # Preserve structured outputs for provenance, but show a clean string to the LLM/UI.
1046
+ display = _display(output) if isinstance(output, dict) else str(error or output)
299
1047
  rendered = logic.format_observation(
300
1048
  name=name,
301
- output=str(output if success else (error or output)),
1049
+ output=display,
302
1050
  success=success,
303
1051
  )
304
- emit("observe", {"tool": name, "result": rendered[:150]})
1052
+ emit("observe", {"tool": name, "success": success, "result": rendered})
1053
+
305
1054
  context["messages"].append(
306
1055
  _new_message(
307
1056
  ctx,
@@ -316,9 +1065,291 @@ def create_react_workflow(
316
1065
  )
317
1066
 
318
1067
  temp.pop("tool_results", None)
1068
+ # Reset verifier/review rounds after executing tools. This enables repeated
1069
+ # verify→act→observe cycles without immediately hitting review_max_rounds.
1070
+ scratchpad["review_count"] = 0
1071
+ pending = temp.get("pending_tool_calls", [])
1072
+ if isinstance(pending, list) and pending:
1073
+ return StepPlan(node_id="observe", next_node="act")
319
1074
  temp["pending_tool_calls"] = []
320
1075
  return StepPlan(node_id="observe", next_node="reason")
321
1076
 
1077
+ def maybe_review_node(run: RunState, ctx) -> StepPlan:
1078
+ _, scratchpad, runtime_ns, _, _ = ensure_react_vars(run)
1079
+
1080
+ if not _flag(runtime_ns, "review_mode", default=False):
1081
+ return StepPlan(node_id="maybe_review", next_node="done")
1082
+
1083
+ max_rounds = _int(runtime_ns, "review_max_rounds", default=1)
1084
+ if max_rounds < 0:
1085
+ max_rounds = 0
1086
+ count = scratchpad.get("review_count")
1087
+ try:
1088
+ count_int = int(count or 0)
1089
+ except Exception:
1090
+ count_int = 0
1091
+
1092
+ if count_int >= max_rounds:
1093
+ return StepPlan(node_id="maybe_review", next_node="done")
1094
+
1095
+ scratchpad["review_count"] = count_int + 1
1096
+ return StepPlan(node_id="maybe_review", next_node="review")
1097
+
1098
+ def review_node(run: RunState, ctx) -> StepPlan:
1099
+ context, scratchpad, runtime_ns, _, limits = ensure_react_vars(run)
1100
+
1101
+ task = str(context.get("task", "") or "")
1102
+ plan = scratchpad.get("plan")
1103
+ plan_text = str(plan).strip() if isinstance(plan, str) and plan.strip() else "(no plan)"
1104
+
1105
+ allow = _effective_allowlist(runtime_ns)
1106
+
1107
+ def _truncate_block(text: str, *, max_chars: int) -> str:
1108
+ s = str(text or "")
1109
+ if max_chars <= 0:
1110
+ return s
1111
+ if len(s) <= max_chars:
1112
+ return s
1113
+ suffix = f"\n… (truncated, {len(s):,} chars total)"
1114
+ keep = max_chars - len(suffix)
1115
+ if keep < 200:
1116
+ keep = max_chars
1117
+ suffix = ""
1118
+ return s[:keep].rstrip() + suffix
1119
+
1120
+ def _format_allowed_tools() -> str:
1121
+ # Prefer the already-computed tool_specs (created in reason_node) to avoid
1122
+ # re-materializing tool definitions and to keep formatting stable.
1123
+ specs = runtime_ns.get("tool_specs")
1124
+ if not isinstance(specs, list) or not specs:
1125
+ defs = _allowed_tool_defs(allow)
1126
+ specs = [t.to_dict() for t in defs]
1127
+ lines: list[str] = []
1128
+ for spec in specs:
1129
+ if not isinstance(spec, dict):
1130
+ continue
1131
+ name = str(spec.get("name") or "").strip()
1132
+ if not name:
1133
+ continue
1134
+ params = spec.get("parameters")
1135
+ props = params.get("properties", {}) if isinstance(params, dict) else {}
1136
+ keys = sorted([k for k in props.keys() if isinstance(k, str)])
1137
+ if keys:
1138
+ lines.append(f"- {name}({', '.join(keys)})")
1139
+ else:
1140
+ lines.append(f"- {name}()")
1141
+ return "\n".join(lines) if lines else "(no tools available)"
1142
+
1143
+ # Include recent tool outputs for evidence-based review.
1144
+ messages = list(context.get("messages") or [])
1145
+ tool_msgs: list[str] = []
1146
+ try:
1147
+ tool_limit = int(limits.get("review_max_tool_output_chars", -1))
1148
+ except Exception:
1149
+ tool_limit = -1
1150
+ try:
1151
+ answer_limit = int(limits.get("review_max_answer_chars", -1))
1152
+ except Exception:
1153
+ answer_limit = -1
1154
+
1155
+ for m in reversed(messages):
1156
+ if not isinstance(m, dict) or m.get("role") != "tool":
1157
+ continue
1158
+ content = m.get("content")
1159
+ if isinstance(content, str) and content.strip():
1160
+ tool_msgs.append(_truncate_block(content.strip(), max_chars=tool_limit))
1161
+ if len(tool_msgs) >= 8:
1162
+ break
1163
+ tool_msgs.reverse()
1164
+ observations = "\n\n".join(tool_msgs) if tool_msgs else "(no tool outputs)"
1165
+
1166
+ # Include recent user messages (especially ask_user responses) so the reviewer can
1167
+ # avoid re-asking questions the user already answered.
1168
+ try:
1169
+ user_limit = int(limits.get("review_max_user_message_chars", -1))
1170
+ except Exception:
1171
+ user_limit = -1
1172
+
1173
+ user_msgs: list[str] = []
1174
+ ask_prompts: list[str] = []
1175
+ for m in reversed(messages):
1176
+ if not isinstance(m, dict):
1177
+ continue
1178
+ role = m.get("role")
1179
+ content = m.get("content")
1180
+ if role == "user" and isinstance(content, str) and content.strip():
1181
+ if content.strip() != task.strip():
1182
+ user_msgs.append(_truncate_block(content.strip(), max_chars=user_limit))
1183
+ if len(user_msgs) >= 4:
1184
+ break
1185
+ for m in reversed(messages):
1186
+ if not isinstance(m, dict):
1187
+ continue
1188
+ if m.get("role") != "assistant":
1189
+ continue
1190
+ meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
1191
+ if not isinstance(meta, dict) or meta.get("kind") != "ask_user_prompt":
1192
+ continue
1193
+ content = m.get("content")
1194
+ if isinstance(content, str) and content.strip():
1195
+ ask_prompts.append(_truncate_block(content.strip(), max_chars=user_limit))
1196
+ if len(ask_prompts) >= 4:
1197
+ break
1198
+
1199
+ user_msgs.reverse()
1200
+ ask_prompts.reverse()
1201
+ user_context = "\n\n".join(user_msgs) if user_msgs else "(no additional user messages)"
1202
+ asked_context = "\n\n".join(ask_prompts) if ask_prompts else "(no ask_user prompts recorded)"
1203
+
1204
+ # The verifier should primarily judge based on tool outputs. Only include an answer
1205
+ # excerpt when we have no tool evidence (pure Q&A runs).
1206
+ answer_raw = str(run.vars.get("_temp", {}).get("final_answer") or "")
1207
+ answer_excerpt = ""
1208
+ if not tool_msgs and answer_raw.strip():
1209
+ answer_excerpt = _truncate_block(answer_raw.strip(), max_chars=answer_limit)
1210
+
1211
+ prompt = (
1212
+ "You are a verifier. Review whether the user's request has been fully satisfied.\n"
1213
+ "Be strict: only count actions that are supported by the tool outputs.\n"
1214
+ "If anything is missing, propose the NEXT ACTIONS.\n"
1215
+ "Prefer returning `next_tool_calls` over `next_prompt`.\n"
1216
+ "Return JSON ONLY.\n\n"
1217
+ f"User request:\n{task}\n\n"
1218
+ f"Plan:\n{plan_text}\n\n"
1219
+ f"Recent ask_user prompts:\n{asked_context}\n\n"
1220
+ f"Recent user messages:\n{user_context}\n\n"
1221
+ + (f"Current answer (excerpt):\n{answer_excerpt}\n\n" if answer_excerpt else "")
1222
+ + f"Tool outputs:\n{observations}\n\n"
1223
+ f"Allowed tools:\n{_format_allowed_tools()}\n\n"
1224
+ )
1225
+
1226
+ schema = {
1227
+ "type": "object",
1228
+ "properties": {
1229
+ "complete": {"type": "boolean"},
1230
+ "missing": {"type": "array", "items": {"type": "string"}},
1231
+ "next_prompt": {"type": "string"},
1232
+ "next_tool_calls": {
1233
+ "type": "array",
1234
+ "items": {
1235
+ "type": "object",
1236
+ "properties": {
1237
+ "name": {"type": "string"},
1238
+ "arguments": {"type": "object"},
1239
+ },
1240
+ "required": ["name", "arguments"],
1241
+ "additionalProperties": False,
1242
+ },
1243
+ },
1244
+ },
1245
+ "required": ["complete", "missing", "next_prompt", "next_tool_calls"],
1246
+ "additionalProperties": False,
1247
+ }
1248
+
1249
+ emit("review_request", {"tool_messages": len(tool_msgs)})
1250
+
1251
+ payload: Dict[str, Any] = {
1252
+ "prompt": prompt,
1253
+ "response_schema": schema,
1254
+ "response_schema_name": "ReActVerifier",
1255
+ "params": {"temperature": 0.2},
1256
+ }
1257
+ sys = _system_prompt(runtime_ns)
1258
+ if sys is not None:
1259
+ payload["system_prompt"] = sys
1260
+ eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
1261
+ eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
1262
+ if isinstance(eff_provider, str) and eff_provider.strip():
1263
+ payload["provider"] = eff_provider.strip()
1264
+ if isinstance(eff_model, str) and eff_model.strip():
1265
+ payload["model"] = eff_model.strip()
1266
+
1267
+ return StepPlan(
1268
+ node_id="review",
1269
+ effect=Effect(
1270
+ type=EffectType.LLM_CALL,
1271
+ payload=payload,
1272
+ result_key="_temp.review_llm_response",
1273
+ ),
1274
+ next_node="review_parse",
1275
+ )
1276
+
1277
+ def review_parse_node(run: RunState, ctx) -> StepPlan:
1278
+ _, _, runtime_ns, temp, _ = ensure_react_vars(run)
1279
+ resp = temp.get("review_llm_response", {})
1280
+ if not isinstance(resp, dict):
1281
+ resp = {}
1282
+
1283
+ data = resp.get("data")
1284
+ if data is None and isinstance(resp.get("content"), str):
1285
+ try:
1286
+ data = json.loads(resp["content"])
1287
+ except Exception:
1288
+ data = None
1289
+ if not isinstance(data, dict):
1290
+ data = {}
1291
+
1292
+ complete = bool(data.get("complete"))
1293
+ missing = data.get("missing") if isinstance(data.get("missing"), list) else []
1294
+ next_prompt = data.get("next_prompt")
1295
+ next_prompt_text = str(next_prompt or "").strip()
1296
+ next_tool_calls_raw = data.get("next_tool_calls")
1297
+ next_tool_calls: list[dict[str, Any]] = []
1298
+ if isinstance(next_tool_calls_raw, list):
1299
+ for item in next_tool_calls_raw:
1300
+ if not isinstance(item, dict):
1301
+ continue
1302
+ name = str(item.get("name") or "").strip()
1303
+ args = item.get("arguments")
1304
+ if not isinstance(args, dict):
1305
+ args = {}
1306
+ if name:
1307
+ next_tool_calls.append({"name": name, "arguments": args})
1308
+
1309
+ emit("review", {"complete": complete, "missing": missing})
1310
+ temp.pop("review_llm_response", None)
1311
+
1312
+ if complete:
1313
+ return StepPlan(node_id="review_parse", next_node="done")
1314
+
1315
+ if next_tool_calls:
1316
+ temp["pending_tool_calls"] = next_tool_calls
1317
+ emit("review_tool_calls", {"count": len(next_tool_calls)})
1318
+ return StepPlan(node_id="review_parse", next_node="act")
1319
+
1320
+ # Behavioral validation: if incomplete but no tool calls, re-ask reviewer once with stricter rules.
1321
+ if not complete and not next_tool_calls:
1322
+ try:
1323
+ retry_count = int(runtime_ns.get("review_retry_count") or 0)
1324
+ except Exception:
1325
+ retry_count = 0
1326
+ if retry_count < 1:
1327
+ runtime_ns["review_retry_count"] = retry_count + 1
1328
+ inbox = runtime_ns.get("inbox")
1329
+ if not isinstance(inbox, list):
1330
+ inbox = []
1331
+ runtime_ns["inbox"] = inbox
1332
+ inbox.append(
1333
+ {
1334
+ "content": (
1335
+ "[Review] Your last review output was not actionable. "
1336
+ "If incomplete, you MUST return at least one `next_tool_call` "
1337
+ "(use `ask_user` if you need clarification). Return JSON only."
1338
+ )
1339
+ }
1340
+ )
1341
+ emit("review_retry_unactionable", {"retry": retry_count + 1})
1342
+ return StepPlan(node_id="review_parse", next_node="review")
1343
+
1344
+ runtime_ns["review_retry_count"] = 0
1345
+ if next_prompt_text:
1346
+ inbox = runtime_ns.get("inbox")
1347
+ if not isinstance(inbox, list):
1348
+ inbox = []
1349
+ runtime_ns["inbox"] = inbox
1350
+ inbox.append({"content": f"[Review] {next_prompt_text}"})
1351
+ return StepPlan(node_id="review_parse", next_node="reason")
1352
+
322
1353
  def handle_user_response_node(run: RunState, ctx) -> StepPlan:
323
1354
  context, _, _, temp, _ = ensure_react_vars(run)
324
1355
  user_response = temp.get("user_response", {})
@@ -344,6 +1375,16 @@ def create_react_workflow(
344
1375
  # Prefer _limits.current_iteration, fall back to scratchpad
345
1376
  iterations = int(limits.get("current_iteration", 0) or scratchpad.get("iteration", 0) or 0)
346
1377
 
1378
+ # Persist the final user-facing answer into the conversation history so it shows up
1379
+ # in /history and becomes part of the next run's seed context.
1380
+ messages = context.get("messages")
1381
+ if isinstance(messages, list):
1382
+ last = messages[-1] if messages else None
1383
+ last_role = last.get("role") if isinstance(last, dict) else None
1384
+ last_content = last.get("content") if isinstance(last, dict) else None
1385
+ if last_role != "assistant" or str(last_content or "") != answer:
1386
+ messages.append(_new_message(ctx, role="assistant", content=answer, metadata={"kind": "final_answer"}))
1387
+
347
1388
  return StepPlan(
348
1389
  node_id="done",
349
1390
  complete_output={
@@ -374,17 +1415,23 @@ def create_react_workflow(
374
1415
  )
375
1416
 
376
1417
  return WorkflowSpec(
377
- workflow_id="react_agent",
1418
+ workflow_id=str(workflow_id or "react_agent"),
378
1419
  entry_node="init",
379
1420
  nodes={
380
1421
  "init": init_node,
1422
+ "plan": plan_node,
1423
+ "plan_parse": plan_parse_node,
381
1424
  "reason": reason_node,
1425
+ "tool_retry_minimal": tool_retry_minimal_node,
1426
+ "empty_response_retry": empty_response_retry_node,
382
1427
  "parse": parse_node,
383
1428
  "act": act_node,
384
1429
  "observe": observe_node,
385
1430
  "handle_user_response": handle_user_response_node,
1431
+ "maybe_review": maybe_review_node,
1432
+ "review": review_node,
1433
+ "review_parse": review_parse_node,
386
1434
  "done": done_node,
387
1435
  "max_iterations": max_iterations_node,
388
1436
  },
389
1437
  )
390
-