abstractagent 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,10 +6,13 @@ import hashlib
6
6
  import json
7
7
  from typing import Any, Callable, Dict, List, Optional
8
8
 
9
- from abstractcore.tools import ToolCall
9
+ from abstractcore.tools import ToolCall, ToolDefinition
10
10
  from abstractruntime import Effect, EffectType, RunState, StepPlan, WorkflowSpec
11
11
  from abstractruntime.core.vars import ensure_limits, ensure_namespaces
12
+ from abstractruntime.memory.active_context import ActiveContextPolicy
12
13
 
14
+ from .generation_params import runtime_llm_params
15
+ from .media import extract_media_from_context
13
16
  from ..logic.codeact import CodeActLogic
14
17
 
15
18
 
@@ -29,11 +32,16 @@ def _new_message(
29
32
 
30
33
  timestamp = datetime.now(timezone.utc).isoformat()
31
34
 
35
+ import uuid
36
+
37
+ meta = dict(metadata or {})
38
+ meta.setdefault("message_id", f"msg_{uuid.uuid4().hex}")
39
+
32
40
  return {
33
41
  "role": role,
34
42
  "content": content,
35
43
  "timestamp": timestamp,
36
- "metadata": metadata or {},
44
+ "metadata": meta,
37
45
  }
38
46
 
39
47
 
@@ -108,9 +116,187 @@ def create_codeact_workflow(
108
116
  if on_step:
109
117
  on_step(step, data)
110
118
 
111
- tool_defs = logic.tools
112
- tool_specs = [t.to_dict() for t in tool_defs]
113
- toolset_id = _compute_toolset_id(tool_specs)
119
+ def _current_tool_defs() -> list[ToolDefinition]:
120
+ defs = getattr(logic, "tools", None)
121
+ if not isinstance(defs, list):
122
+ try:
123
+ defs = list(defs) # type: ignore[arg-type]
124
+ except Exception:
125
+ defs = []
126
+ return [t for t in defs if getattr(t, "name", None)]
127
+
128
+ def _tool_by_name() -> dict[str, ToolDefinition]:
129
+ out: dict[str, ToolDefinition] = {}
130
+ for t in _current_tool_defs():
131
+ name = getattr(t, "name", None)
132
+ if isinstance(name, str) and name.strip():
133
+ out[name] = t
134
+ return out
135
+
136
+ def _default_allowlist() -> list[str]:
137
+ out: list[str] = []
138
+ seen: set[str] = set()
139
+ for t in _current_tool_defs():
140
+ name = getattr(t, "name", None)
141
+ if not isinstance(name, str) or not name.strip() or name in seen:
142
+ continue
143
+ seen.add(name)
144
+ out.append(name)
145
+ return out
146
+
147
+ def _normalize_allowlist(raw: Any) -> list[str]:
148
+ if raw is None:
149
+ return []
150
+ if isinstance(raw, str):
151
+ val = raw.strip()
152
+ return [val] if val else []
153
+ if isinstance(raw, list):
154
+ out: list[str] = []
155
+ seen: set[str] = set()
156
+ for item in raw:
157
+ if not isinstance(item, str):
158
+ continue
159
+ name = item.strip()
160
+ if not name or name in seen:
161
+ continue
162
+ seen.add(name)
163
+ out.append(name)
164
+ return out
165
+ return []
166
+
167
+ def _effective_allowlist(runtime_ns: Dict[str, Any]) -> list[str]:
168
+ if isinstance(runtime_ns, dict) and "allowed_tools" in runtime_ns:
169
+ normalized = _normalize_allowlist(runtime_ns.get("allowed_tools"))
170
+ # Filter to currently known tools (dynamic), preserving order.
171
+ current = _tool_by_name()
172
+ filtered = [name for name in normalized if name in current]
173
+ runtime_ns["allowed_tools"] = filtered
174
+ return filtered
175
+ return list(_default_allowlist())
176
+
177
+ def _allowed_tool_defs(allowlist: list[str]) -> list[ToolDefinition]:
178
+ tool_by_name = _tool_by_name()
179
+ out: list[ToolDefinition] = []
180
+ for name in allowlist:
181
+ tool = tool_by_name.get(name)
182
+ if tool is not None:
183
+ out.append(tool)
184
+ return out
185
+
186
+ def _system_prompt(runtime_ns: Dict[str, Any]) -> Optional[str]:
187
+ raw = runtime_ns.get("system_prompt") if isinstance(runtime_ns, dict) else None
188
+ if isinstance(raw, str) and raw.strip():
189
+ return raw
190
+ return None
191
+
192
+ def _sanitize_llm_messages(messages: Any, *, limits: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]:
193
+ """Convert runtime-owned message dicts into OpenAI-style {role, content, ...}.
194
+
195
+ Runtime messages can include extra metadata fields (`timestamp`, `metadata`) that many providers
196
+ will reject. Keep only the fields the LLM API expects.
197
+ """
198
+ if not isinstance(messages, list) or not messages:
199
+ return []
200
+ def _limit_int(key: str, default: int) -> int:
201
+ if not isinstance(limits, dict):
202
+ return default
203
+ try:
204
+ return int(limits.get(key, default))
205
+ except Exception:
206
+ return default
207
+ max_message_chars = _limit_int("max_message_chars", -1)
208
+ max_tool_message_chars = _limit_int("max_tool_message_chars", -1)
209
+
210
+ def _truncate(text: str, *, max_chars: int) -> str:
211
+ if max_chars <= 0:
212
+ return text
213
+ if len(text) <= max_chars:
214
+ return text
215
+ suffix = f"\n… (truncated, {len(text):,} chars total)"
216
+ keep = max_chars - len(suffix)
217
+ if keep < 200:
218
+ keep = max_chars
219
+ suffix = ""
220
+ #[WARNING:TRUNCATION] bounded message content for LLM payload
221
+ return text[:keep].rstrip() + suffix
222
+
223
+ out: List[Dict[str, str]] = []
224
+ for m in messages:
225
+ if not isinstance(m, dict):
226
+ continue
227
+ role = str(m.get("role") or "").strip()
228
+ content = m.get("content")
229
+ if not role or content is None:
230
+ continue
231
+ content_str = str(content)
232
+ if not content_str.strip():
233
+ continue
234
+ limit = max_tool_message_chars if role == "tool" else max_message_chars
235
+ entry: Dict[str, str] = {"role": role, "content": _truncate(content_str, max_chars=limit)}
236
+ if role == "tool":
237
+ meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
238
+ call_id = meta.get("call_id") if isinstance(meta, dict) else None
239
+ if call_id is not None and str(call_id).strip():
240
+ entry["tool_call_id"] = str(call_id).strip()
241
+ out.append(entry)
242
+ return out
243
+
244
+ def _flag(runtime_ns: Dict[str, Any], key: str, *, default: bool = False) -> bool:
245
+ if not isinstance(runtime_ns, dict) or key not in runtime_ns:
246
+ return bool(default)
247
+ val = runtime_ns.get(key)
248
+ if isinstance(val, bool):
249
+ return val
250
+ if isinstance(val, (int, float)):
251
+ return bool(val)
252
+ if isinstance(val, str):
253
+ lowered = val.strip().lower()
254
+ if lowered in ("1", "true", "yes", "on", "enabled"):
255
+ return True
256
+ if lowered in ("0", "false", "no", "off", "disabled"):
257
+ return False
258
+ return bool(default)
259
+
260
+ def _int(runtime_ns: Dict[str, Any], key: str, *, default: int) -> int:
261
+ if not isinstance(runtime_ns, dict) or key not in runtime_ns:
262
+ return int(default)
263
+ val = runtime_ns.get(key)
264
+ try:
265
+ return int(val) # type: ignore[arg-type]
266
+ except Exception:
267
+ return int(default)
268
+
269
+ def _extract_plan_update(content: str) -> Optional[str]:
270
+ """Extract a plan update block from model content (best-effort).
271
+
272
+ Convention (prompted in Plan mode): the model appends a final section:
273
+
274
+ Plan Update:
275
+ - [ ] ...
276
+ - [x] ...
277
+ """
278
+ if not isinstance(content, str) or not content.strip():
279
+ return None
280
+
281
+ import re
282
+
283
+ lines = content.splitlines()
284
+ header_idx: Optional[int] = None
285
+ for i, line in enumerate(lines):
286
+ if re.match(r"(?i)^\s*plan\s*update\s*:\s*$", line.strip()):
287
+ header_idx = i
288
+ if header_idx is None:
289
+ return None
290
+
291
+ plan_lines = lines[header_idx + 1 :]
292
+ while plan_lines and not plan_lines[0].strip():
293
+ plan_lines.pop(0)
294
+ plan_text = "\n".join(plan_lines).strip()
295
+ if not plan_text:
296
+ return None
297
+ if not re.search(r"(?m)^\s*(?:[-*]|\d+\.)\s+", plan_text):
298
+ return None
299
+ return plan_text
114
300
 
115
301
  def init_node(run: RunState, ctx) -> StepPlan:
116
302
  context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
@@ -123,13 +309,71 @@ def create_codeact_workflow(
123
309
  if task and (not messages or messages[-1].get("role") != "user" or messages[-1].get("content") != task):
124
310
  messages.append(_new_message(ctx, role="user", content=task))
125
311
 
126
- runtime_ns.setdefault("tool_specs", tool_specs)
127
- runtime_ns.setdefault("toolset_id", toolset_id)
312
+ allow = _effective_allowlist(runtime_ns)
313
+ allowed_defs = _allowed_tool_defs(allow)
314
+ runtime_ns["tool_specs"] = [t.to_dict() for t in allowed_defs]
315
+ runtime_ns["toolset_id"] = _compute_toolset_id(runtime_ns["tool_specs"])
316
+ runtime_ns.setdefault("allowed_tools", allow)
128
317
  runtime_ns.setdefault("inbox", [])
129
318
 
130
319
  emit("init", {"task": task})
320
+ if _flag(runtime_ns, "plan_mode", default=False) and not isinstance(scratchpad.get("plan"), str):
321
+ return StepPlan(node_id="init", next_node="plan")
131
322
  return StepPlan(node_id="init", next_node="reason")
132
323
 
324
+ def plan_node(run: RunState, ctx) -> StepPlan:
325
+ context, scratchpad, runtime_ns, _, _ = ensure_codeact_vars(run)
326
+ task = str(context.get("task", "") or "")
327
+
328
+ allow = _effective_allowlist(runtime_ns)
329
+
330
+ prompt = (
331
+ "You are preparing a high-level execution plan for the user's request.\n"
332
+ "Return a concise TODO list (5–12 steps) that is actionable and verifiable.\n"
333
+ "Do not call tools yet. Do not include role prefixes like 'assistant:'.\n\n"
334
+ f"User request:\n{task}\n\n"
335
+ "Plan (markdown checklist):\n"
336
+ "- [ ] ...\n"
337
+ )
338
+
339
+ emit("plan_request", {"tools": allow})
340
+
341
+ payload: Dict[str, Any] = {"prompt": prompt, "params": runtime_llm_params(runtime_ns, extra={"temperature": 0.2})}
342
+ media = extract_media_from_context(context)
343
+ if media:
344
+ payload["media"] = media
345
+ sys = _system_prompt(runtime_ns)
346
+ if isinstance(sys, str) and sys.strip():
347
+ payload["system_prompt"] = sys
348
+
349
+ return StepPlan(
350
+ node_id="plan",
351
+ effect=Effect(
352
+ type=EffectType.LLM_CALL,
353
+ payload=payload,
354
+ result_key="_temp.plan_llm_response",
355
+ ),
356
+ next_node="plan_parse",
357
+ )
358
+
359
+ def plan_parse_node(run: RunState, ctx) -> StepPlan:
360
+ context, scratchpad, _, temp, _ = ensure_codeact_vars(run)
361
+ resp = temp.get("plan_llm_response", {})
362
+ if not isinstance(resp, dict):
363
+ resp = {}
364
+ plan_text = resp.get("content")
365
+ plan = "" if plan_text is None else str(plan_text).strip()
366
+ if not plan and isinstance(resp.get("data"), dict):
367
+ plan = json.dumps(resp.get("data"), ensure_ascii=False, indent=2).strip()
368
+
369
+ scratchpad["plan"] = plan
370
+ temp.pop("plan_llm_response", None)
371
+
372
+ if plan:
373
+ context["messages"].append(_new_message(ctx, role="assistant", content=plan, metadata={"kind": "plan"}))
374
+ emit("plan", {"plan": plan})
375
+ return StepPlan(node_id="plan_parse", next_node="reason")
376
+
133
377
  def reason_node(run: RunState, ctx) -> StepPlan:
134
378
  context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
135
379
 
@@ -159,9 +403,23 @@ def create_codeact_workflow(
159
403
  guidance = " | ".join([m for m in inbox_messages if m])
160
404
  runtime_ns["inbox"] = []
161
405
 
406
+ messages_view = ActiveContextPolicy.select_active_messages_for_llm_from_run(run)
407
+
408
+ # Refresh tool metadata BEFORE rendering Active Memory so token fitting stays accurate
409
+ # (even though we do not render a "Tools (session)" block into Active Memory prompts).
410
+ allow = _effective_allowlist(runtime_ns)
411
+ allowed_defs = _allowed_tool_defs(allow)
412
+ tool_specs = [t.to_dict() for t in allowed_defs]
413
+ include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
414
+ if not include_examples:
415
+ tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
416
+ runtime_ns["tool_specs"] = tool_specs
417
+ runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
418
+ runtime_ns.setdefault("allowed_tools", allow)
419
+
162
420
  req = logic.build_request(
163
421
  task=str(context.get("task", "") or ""),
164
- messages=list(context.get("messages") or []),
422
+ messages=messages_view,
165
423
  guidance=guidance,
166
424
  iteration=iteration + 1,
167
425
  max_iterations=max_iterations,
@@ -170,9 +428,25 @@ def create_codeact_workflow(
170
428
 
171
429
  emit("reason", {"iteration": iteration + 1, "max_iterations": max_iterations, "has_guidance": bool(guidance)})
172
430
 
173
- payload = {"prompt": req.prompt, "tools": [t.to_dict() for t in req.tools]}
431
+ # IMPORTANT: When we send `messages`, do not also send a non-empty `prompt`.
432
+ # Some providers/servers will append `prompt` as an extra user message even when the
433
+ # current request is already present in `messages`, which duplicates user turns and
434
+ # wastes context budget.
435
+ payload: Dict[str, Any] = {
436
+ "prompt": "",
437
+ "messages": _sanitize_llm_messages(messages_view, limits=limits),
438
+ "tools": list(tool_specs),
439
+ }
440
+ media = extract_media_from_context(context)
441
+ if media:
442
+ payload["media"] = media
443
+ sys = _system_prompt(runtime_ns) or req.system_prompt
444
+ if isinstance(sys, str) and sys.strip():
445
+ payload["system_prompt"] = sys
446
+ params: Dict[str, Any] = {}
174
447
  if req.max_tokens is not None:
175
- payload["params"] = {"max_tokens": req.max_tokens}
448
+ params["max_tokens"] = req.max_tokens
449
+ payload["params"] = runtime_llm_params(runtime_ns, extra=params)
176
450
 
177
451
  return StepPlan(
178
452
  node_id="reason",
@@ -185,93 +459,359 @@ def create_codeact_workflow(
185
459
  )
186
460
 
187
461
  def parse_node(run: RunState, ctx) -> StepPlan:
188
- context, _, _, temp, _ = ensure_codeact_vars(run)
462
+ context, scratchpad, runtime_ns, temp, _ = ensure_codeact_vars(run)
189
463
  response = temp.get("llm_response", {})
190
464
  content, tool_calls = logic.parse_response(response)
191
465
 
192
- if content:
193
- context["messages"].append(_new_message(ctx, role="assistant", content=content))
194
-
195
466
  temp.pop("llm_response", None)
196
467
  emit("parse", {"has_tool_calls": bool(tool_calls), "content_preview": (content[:100] if content else "(no content)")})
197
468
 
198
469
  if tool_calls:
470
+ if content:
471
+ context["messages"].append(_new_message(ctx, role="assistant", content=content))
472
+ if _flag(runtime_ns, "plan_mode", default=False):
473
+ updated = _extract_plan_update(content)
474
+ if isinstance(updated, str) and updated.strip():
475
+ scratchpad["plan"] = updated.strip()
199
476
  temp["pending_tool_calls"] = [tc.__dict__ for tc in tool_calls]
200
477
  return StepPlan(node_id="parse", next_node="act")
201
478
 
479
+ # Empty response is an invalid step: recover with a bounded retry that carries evidence.
480
+ if not isinstance(content, str) or not content.strip():
481
+ try:
482
+ empty_retries = int(scratchpad.get("empty_response_retry_count") or 0)
483
+ except Exception:
484
+ empty_retries = 0
485
+
486
+ if empty_retries < 2:
487
+ scratchpad["empty_response_retry_count"] = empty_retries + 1
488
+ emit("parse_retry_empty_response", {"retries": empty_retries + 1})
489
+ inbox = runtime_ns.get("inbox")
490
+ if not isinstance(inbox, list):
491
+ inbox = []
492
+ runtime_ns["inbox"] = inbox
493
+ inbox.append(
494
+ {
495
+ "content": (
496
+ "[Recover] Your last message was empty. Continue the task now. "
497
+ "If you need info, CALL tools (preferred). Do not output an empty message."
498
+ )
499
+ }
500
+ )
501
+ return StepPlan(node_id="parse", next_node="reason")
502
+
503
+ safe = (
504
+ "I can't proceed: the model repeatedly returned empty outputs (no content, no tool calls).\n"
505
+ "Please retry, reduce context, or switch models."
506
+ )
507
+ context["messages"].append(_new_message(ctx, role="assistant", content=safe, metadata={"kind": "error"}))
508
+ temp["final_answer"] = safe
509
+ temp["pending_tool_calls"] = []
510
+ scratchpad["empty_response_retry_count"] = 0
511
+ return StepPlan(node_id="parse", next_node="maybe_review")
512
+
202
513
  code = logic.extract_code(content)
203
514
  if code:
515
+ if content:
516
+ context["messages"].append(_new_message(ctx, role="assistant", content=content))
517
+ if _flag(runtime_ns, "plan_mode", default=False):
518
+ updated = _extract_plan_update(content)
519
+ if isinstance(updated, str) and updated.strip():
520
+ scratchpad["plan"] = updated.strip()
204
521
  temp["pending_code"] = code
205
522
  return StepPlan(node_id="parse", next_node="execute_code")
206
523
 
207
- temp["final_answer"] = content
208
- return StepPlan(node_id="parse", next_node="done")
524
+ def _extract_final_answer(text: str) -> tuple[bool, str]:
525
+ if not isinstance(text, str) or not text.strip():
526
+ return False, ""
527
+ s = text.lstrip()
528
+ if s.upper().startswith("FINAL:"):
529
+ return True, s[len("FINAL:") :].lstrip()
530
+ return False, text
531
+
532
+ raw = str(content or "").strip()
533
+ is_final, final = _extract_final_answer(raw)
534
+ if is_final:
535
+ if final:
536
+ context["messages"].append(_new_message(ctx, role="assistant", content=final))
537
+ if _flag(runtime_ns, "plan_mode", default=False):
538
+ updated = _extract_plan_update(final)
539
+ if isinstance(updated, str) and updated.strip():
540
+ scratchpad["plan"] = updated.strip()
541
+ temp["final_answer"] = final or "No answer provided"
542
+ temp["pending_tool_calls"] = []
543
+ return StepPlan(node_id="parse", next_node="maybe_review")
544
+
545
+ # Default: treat as a final answer even without an explicit FINAL marker.
546
+ if raw:
547
+ context["messages"].append(_new_message(ctx, role="assistant", content=raw))
548
+ if _flag(runtime_ns, "plan_mode", default=False):
549
+ updated = _extract_plan_update(raw)
550
+ if isinstance(updated, str) and updated.strip():
551
+ scratchpad["plan"] = updated.strip()
552
+ temp["final_answer"] = raw or "No answer provided"
553
+ temp["pending_tool_calls"] = []
554
+ scratchpad["empty_response_retry_count"] = 0
555
+ return StepPlan(node_id="parse", next_node="maybe_review")
209
556
 
210
557
  def act_node(run: RunState, ctx) -> StepPlan:
211
- _, _, _, temp, _ = ensure_codeact_vars(run)
212
- tool_calls = temp.get("pending_tool_calls", [])
213
- if not isinstance(tool_calls, list):
214
- tool_calls = []
215
-
216
- if not tool_calls:
558
+ # Treat `_temp.pending_tool_calls` as a durable queue to avoid dropping tool calls when
559
+ # schema-only tools (ask_user/memory/etc.) are interleaved with normal tools.
560
+ context, _, runtime_ns, temp, _ = ensure_codeact_vars(run)
561
+ raw_queue = temp.get("pending_tool_calls", [])
562
+ if not isinstance(raw_queue, list) or not raw_queue:
563
+ temp["pending_tool_calls"] = []
217
564
  return StepPlan(node_id="act", next_node="reason")
218
565
 
219
- # Handle ask_user specially with ASK_USER effect.
220
- for i, tc in enumerate(tool_calls):
221
- if not isinstance(tc, dict):
222
- continue
223
- if tc.get("name") != "ask_user":
566
+ allow = _effective_allowlist(runtime_ns)
567
+ builtin_effect_tools = {
568
+ "ask_user",
569
+ "recall_memory",
570
+ "inspect_vars",
571
+ "remember",
572
+ "remember_note",
573
+ "compact_memory",
574
+ "delegate_agent",
575
+ }
576
+
577
+ tool_queue: List[Dict[str, Any]] = []
578
+ for idx, item in enumerate(raw_queue, start=1):
579
+ if isinstance(item, ToolCall):
580
+ d: Dict[str, Any] = {"name": item.name, "arguments": item.arguments, "call_id": item.call_id}
581
+ elif isinstance(item, dict):
582
+ d = dict(item)
583
+ else:
224
584
  continue
585
+ call_id = str(d.get("call_id") or "").strip()
586
+ if not call_id:
587
+ d["call_id"] = str(idx)
588
+ tool_queue.append(d)
589
+
590
+ if not tool_queue:
591
+ temp["pending_tool_calls"] = []
592
+ return StepPlan(node_id="act", next_node="reason")
593
+
594
+ def _is_builtin(tc: Dict[str, Any]) -> bool:
595
+ name = tc.get("name")
596
+ return isinstance(name, str) and name in builtin_effect_tools
597
+
598
+ if _is_builtin(tool_queue[0]):
599
+ tc = tool_queue[0]
600
+ name = str(tc.get("name") or "").strip()
225
601
  args = tc.get("arguments") or {}
226
- question = str(args.get("question") or "Please provide input:")
227
- choices = args.get("choices")
228
- choices = list(choices) if isinstance(choices, list) else None
229
-
230
- temp["pending_tool_calls"] = tool_calls[i + 1 :]
231
- emit("ask_user", {"question": question, "choices": choices or []})
232
- return StepPlan(
233
- node_id="act",
234
- effect=Effect(
235
- type=EffectType.ASK_USER,
236
- payload={"prompt": question, "choices": choices, "allow_free_text": True},
237
- result_key="_temp.user_response",
238
- ),
239
- next_node="handle_user_response",
240
- )
602
+ if not isinstance(args, dict):
603
+ args = {}
241
604
 
242
- for tc in tool_calls:
243
- if isinstance(tc, dict):
244
- emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {})})
605
+ # Pop builtin.
606
+ temp["pending_tool_calls"] = list(tool_queue[1:])
245
607
 
246
- formatted_calls: List[Dict[str, Any]] = []
247
- for tc in tool_calls:
248
- if isinstance(tc, dict):
249
- formatted_calls.append(
250
- {"name": tc.get("name", ""), "arguments": tc.get("arguments", {}), "call_id": tc.get("call_id", "1")}
608
+ if name and name not in allow:
609
+ temp["tool_results"] = {
610
+ "results": [
611
+ {
612
+ "call_id": str(tc.get("call_id") or ""),
613
+ "name": name,
614
+ "success": False,
615
+ "output": None,
616
+ "error": f"Tool '{name}' is not allowed for this agent",
617
+ }
618
+ ]
619
+ }
620
+ emit("act_blocked", {"tool": name})
621
+ return StepPlan(node_id="act", next_node="observe")
622
+
623
+ if name == "ask_user":
624
+ question = str(args.get("question") or "Please provide input:")
625
+ choices = args.get("choices")
626
+ choices = list(choices) if isinstance(choices, list) else None
627
+
628
+ msgs = context.get("messages")
629
+ if isinstance(msgs, list):
630
+ content = f"[Agent question]: {question}"
631
+ last = msgs[-1] if msgs else None
632
+ last_role = last.get("role") if isinstance(last, dict) else None
633
+ last_meta = last.get("metadata") if isinstance(last, dict) else None
634
+ last_kind = last_meta.get("kind") if isinstance(last_meta, dict) else None
635
+ last_content = last.get("content") if isinstance(last, dict) else None
636
+ if not (last_role == "assistant" and last_kind == "ask_user_prompt" and str(last_content or "") == content):
637
+ msgs.append(_new_message(ctx, role="assistant", content=content, metadata={"kind": "ask_user_prompt"}))
638
+
639
+ emit("ask_user", {"question": question, "choices": choices or []})
640
+ return StepPlan(
641
+ node_id="act",
642
+ effect=Effect(
643
+ type=EffectType.ASK_USER,
644
+ payload={"prompt": question, "choices": choices, "allow_free_text": True},
645
+ result_key="_temp.user_response",
646
+ ),
647
+ next_node="handle_user_response",
648
+ )
649
+
650
+ if name == "delegate_agent":
651
+ delegated_task = str(args.get("task") or "").strip()
652
+ delegated_context = str(args.get("context") or "").strip()
653
+
654
+ tools_raw = args.get("tools")
655
+ if tools_raw is None:
656
+ # Inherit the current allowlist, but avoid recursive delegation and avoid waiting on ask_user
657
+ # unless explicitly enabled.
658
+ child_allow = [t for t in allow if t not in {"delegate_agent", "ask_user"}]
659
+ else:
660
+ child_allow = _normalize_allowlist(tools_raw)
661
+
662
+ if not delegated_task:
663
+ temp["tool_results"] = {
664
+ "results": [
665
+ {
666
+ "call_id": str(tc.get("call_id") or ""),
667
+ "name": "delegate_agent",
668
+ "success": False,
669
+ "output": None,
670
+ "error": "delegate_agent requires a non-empty task",
671
+ }
672
+ ]
673
+ }
674
+ return StepPlan(node_id="act", next_node="observe")
675
+
676
+ combined_task = delegated_task
677
+ if delegated_context:
678
+ combined_task = f"{delegated_task}\n\nContext:\n{delegated_context}"
679
+
680
+ sub_vars: Dict[str, Any] = {
681
+ "context": {"task": combined_task, "messages": []},
682
+ "_runtime": {
683
+ "allowed_tools": list(child_allow),
684
+ "system_prompt_extra": (
685
+ "You are a delegated sub-agent.\n"
686
+ "- Focus ONLY on the delegated task.\n"
687
+ "- Use ONLY the allowed tools when needed.\n"
688
+ "- Do not ask the user questions; if blocked, state assumptions and proceed.\n"
689
+ "- Return a concise result suitable for the parent agent to act on.\n"
690
+ ),
691
+ },
692
+ "_limits": {"max_iterations": 10},
693
+ }
694
+
695
+ payload = {
696
+ "workflow_id": str(getattr(run, "workflow_id", "") or "codeact_agent"),
697
+ "vars": sub_vars,
698
+ "async": False,
699
+ "include_traces": False,
700
+ # Tool-mode wrapper so the parent receives a normal tool observation (no run failure on child failure).
701
+ "wrap_as_tool_result": True,
702
+ "tool_name": "delegate_agent",
703
+ "call_id": str(tc.get("call_id") or ""),
704
+ }
705
+ emit("delegate_agent", {"tools": list(child_allow), "call_id": payload.get("call_id")})
706
+ return StepPlan(
707
+ node_id="act",
708
+ effect=Effect(type=EffectType.START_SUBWORKFLOW, payload=payload, result_key="_temp.tool_results"),
709
+ next_node="observe",
710
+ )
711
+
712
+ if name == "recall_memory":
713
+ payload = dict(args)
714
+ payload.setdefault("tool_name", "recall_memory")
715
+ payload.setdefault("call_id", tc.get("call_id") or "memory")
716
+ emit("memory_query", {"query": payload.get("query"), "span_id": payload.get("span_id")})
717
+ return StepPlan(
718
+ node_id="act",
719
+ effect=Effect(type=EffectType.MEMORY_QUERY, payload=payload, result_key="_temp.tool_results"),
720
+ next_node="observe",
251
721
  )
252
- elif isinstance(tc, ToolCall):
253
- formatted_calls.append(
254
- {"name": tc.name, "arguments": tc.arguments, "call_id": tc.call_id or "1"}
722
+
723
+ if name == "inspect_vars":
724
+ payload = dict(args)
725
+ payload.setdefault("tool_name", "inspect_vars")
726
+ payload.setdefault("call_id", tc.get("call_id") or "vars")
727
+ emit("vars_query", {"path": payload.get("path")})
728
+ return StepPlan(
729
+ node_id="act",
730
+ effect=Effect(type=EffectType.VARS_QUERY, payload=payload, result_key="_temp.tool_results"),
731
+ next_node="observe",
732
+ )
733
+
734
+ if name == "remember":
735
+ payload = dict(args)
736
+ payload.setdefault("tool_name", "remember")
737
+ payload.setdefault("call_id", tc.get("call_id") or "memory")
738
+ emit("memory_tag", {"span_id": payload.get("span_id"), "tags": payload.get("tags")})
739
+ return StepPlan(
740
+ node_id="act",
741
+ effect=Effect(type=EffectType.MEMORY_TAG, payload=payload, result_key="_temp.tool_results"),
742
+ next_node="observe",
255
743
  )
256
744
 
745
+ if name == "remember_note":
746
+ payload = dict(args)
747
+ payload.setdefault("tool_name", "remember_note")
748
+ payload.setdefault("call_id", tc.get("call_id") or "memory")
749
+ emit("memory_note", {"note": payload.get("note"), "tags": payload.get("tags")})
750
+ return StepPlan(
751
+ node_id="act",
752
+ effect=Effect(type=EffectType.MEMORY_NOTE, payload=payload, result_key="_temp.tool_results"),
753
+ next_node="observe",
754
+ )
755
+
756
+ if name == "compact_memory":
757
+ payload = dict(args)
758
+ payload.setdefault("tool_name", "compact_memory")
759
+ payload.setdefault("call_id", tc.get("call_id") or "compact")
760
+ emit(
761
+ "memory_compact",
762
+ {
763
+ "preserve_recent": payload.get("preserve_recent"),
764
+ "mode": payload.get("compression_mode"),
765
+ "focus": payload.get("focus"),
766
+ },
767
+ )
768
+ return StepPlan(
769
+ node_id="act",
770
+ effect=Effect(type=EffectType.MEMORY_COMPACT, payload=payload, result_key="_temp.tool_results"),
771
+ next_node="observe",
772
+ )
773
+
774
+ if temp.get("pending_tool_calls"):
775
+ return StepPlan(node_id="act", next_node="act")
776
+ return StepPlan(node_id="act", next_node="reason")
777
+
778
+ batch: List[Dict[str, Any]] = []
779
+ for tc in tool_queue:
780
+ if _is_builtin(tc):
781
+ break
782
+ batch.append(tc)
783
+
784
+ remaining = tool_queue[len(batch) :]
785
+ temp["pending_tool_calls"] = list(remaining)
786
+
787
+ for tc in batch:
788
+ emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")})
789
+
790
+ formatted_calls: List[Dict[str, Any]] = []
791
+ for tc in batch:
792
+ formatted_calls.append(
793
+ {"name": tc.get("name", ""), "arguments": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")}
794
+ )
795
+
257
796
  return StepPlan(
258
797
  node_id="act",
259
798
  effect=Effect(
260
799
  type=EffectType.TOOL_CALLS,
261
- payload={"tool_calls": formatted_calls},
800
+ payload={"tool_calls": formatted_calls, "allowed_tools": list(allow)},
262
801
  result_key="_temp.tool_results",
263
802
  ),
264
803
  next_node="observe",
265
804
  )
266
805
 
267
806
  def execute_code_node(run: RunState, ctx) -> StepPlan:
268
- _, _, _, temp, _ = ensure_codeact_vars(run)
807
+ _, _, runtime_ns, temp, _ = ensure_codeact_vars(run)
269
808
  code = temp.get("pending_code")
270
809
  if not isinstance(code, str) or not code.strip():
271
810
  return StepPlan(node_id="execute_code", next_node="reason")
272
811
 
273
812
  temp.pop("pending_code", None)
274
813
  emit("act", {"tool": "execute_python", "args": {"code": "(inline)", "timeout_s": 10.0}})
814
+ allow = _effective_allowlist(runtime_ns)
275
815
 
276
816
  return StepPlan(
277
817
  node_id="execute_code",
@@ -284,7 +824,8 @@ def create_codeact_workflow(
284
824
  "arguments": {"code": code, "timeout_s": 10.0},
285
825
  "call_id": "code",
286
826
  }
287
- ]
827
+ ],
828
+ "allowed_tools": list(allow),
288
829
  },
289
830
  result_key="_temp.tool_results",
290
831
  ),
@@ -292,7 +833,7 @@ def create_codeact_workflow(
292
833
  )
293
834
 
294
835
  def observe_node(run: RunState, ctx) -> StepPlan:
295
- context, _, _, temp, _ = ensure_codeact_vars(run)
836
+ context, scratchpad, _, temp, _ = ensure_codeact_vars(run)
296
837
  tool_results = temp.get("tool_results", {})
297
838
  if not isinstance(tool_results, dict):
298
839
  tool_results = {}
@@ -308,12 +849,30 @@ def create_codeact_workflow(
308
849
  success = bool(r.get("success"))
309
850
  output = r.get("output", "")
310
851
  error = r.get("error", "")
852
+ # Prefer a tool-supplied human/LLM-friendly rendering when present.
853
+ def _display(v: Any) -> str:
854
+ if isinstance(v, dict):
855
+ rendered = v.get("rendered")
856
+ if isinstance(rendered, str) and rendered.strip():
857
+ return rendered.strip()
858
+ return "" if v is None else str(v)
859
+
860
+ display = _display(output)
861
+ if not success:
862
+ # Preserve structured outputs for provenance, but show a clean string to the LLM/UI.
863
+ display = _display(output) if isinstance(output, dict) else str(error or output)
311
864
  rendered = logic.format_observation(
312
865
  name=name,
313
- output=(output if success else (error or output)),
866
+ output=display,
314
867
  success=success,
315
868
  )
316
- emit("observe", {"tool": name, "result": rendered[:150]})
869
+ # Observability: avoid truncating normal tool results in step events.
870
+ # Keep a bounded preview for huge tool outputs to avoid bloating traces/ledgers.
871
+ preview = rendered
872
+ if len(preview) > 1000:
873
+ #[WARNING:TRUNCATION] bounded preview for observability payloads
874
+ preview = preview[:1000] + f"\n… (truncated, {len(rendered):,} chars total)"
875
+ emit("observe", {"tool": name, "success": success, "result": preview})
317
876
  context["messages"].append(
318
877
  _new_message(
319
878
  ctx,
@@ -324,6 +883,12 @@ def create_codeact_workflow(
324
883
  )
325
884
 
326
885
  temp.pop("tool_results", None)
886
+ # Reset verifier/review rounds after executing tools so the verifier can run
887
+ # again on the next candidate answer.
888
+ scratchpad["review_count"] = 0
889
+ pending = temp.get("pending_tool_calls", [])
890
+ if isinstance(pending, list) and pending:
891
+ return StepPlan(node_id="observe", next_node="act")
327
892
  temp["pending_tool_calls"] = []
328
893
  return StepPlan(node_id="observe", next_node="reason")
329
894
 
@@ -342,6 +907,274 @@ def create_codeact_workflow(
342
907
  return StepPlan(node_id="handle_user_response", next_node="act")
343
908
  return StepPlan(node_id="handle_user_response", next_node="reason")
344
909
 
910
+ def maybe_review_node(run: RunState, ctx) -> StepPlan:
911
+ _, scratchpad, runtime_ns, _, _ = ensure_codeact_vars(run)
912
+
913
+ if not _flag(runtime_ns, "review_mode", default=False):
914
+ return StepPlan(node_id="maybe_review", next_node="done")
915
+
916
+ max_rounds = _int(runtime_ns, "review_max_rounds", default=1)
917
+ if max_rounds < 0:
918
+ max_rounds = 0
919
+ count = scratchpad.get("review_count")
920
+ try:
921
+ count_int = int(count or 0)
922
+ except Exception:
923
+ count_int = 0
924
+
925
+ if count_int >= max_rounds:
926
+ return StepPlan(node_id="maybe_review", next_node="done")
927
+
928
+ scratchpad["review_count"] = count_int + 1
929
+ return StepPlan(node_id="maybe_review", next_node="review")
930
+
931
+ def review_node(run: RunState, ctx) -> StepPlan:
932
+ context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
933
+ task = str(context.get("task", "") or "")
934
+ plan = scratchpad.get("plan")
935
+ plan_text = str(plan).strip() if isinstance(plan, str) and plan.strip() else "(no plan)"
936
+
937
+ allow = _effective_allowlist(runtime_ns)
938
+
939
+ def _truncate_block(text: str, *, max_chars: int) -> str:
940
+ s = str(text or "")
941
+ if max_chars <= 0:
942
+ return s
943
+ if len(s) <= max_chars:
944
+ return s
945
+ suffix = f"\n… (truncated, {len(s):,} chars total)"
946
+ keep = max_chars - len(suffix)
947
+ if keep < 200:
948
+ keep = max_chars
949
+ suffix = ""
950
+ #[WARNING:TRUNCATION] bounded transcript blocks for prompt reconstruction
951
+ return s[:keep].rstrip() + suffix
952
+
953
+ def _format_allowed_tools() -> str:
954
+ specs = runtime_ns.get("tool_specs")
955
+ if not isinstance(specs, list) or not specs:
956
+ defs = _allowed_tool_defs(allow)
957
+ specs = [t.to_dict() for t in defs]
958
+ lines: list[str] = []
959
+ for spec in specs:
960
+ if not isinstance(spec, dict):
961
+ continue
962
+ name = str(spec.get("name") or "").strip()
963
+ if not name:
964
+ continue
965
+ params = spec.get("parameters")
966
+ props = params.get("properties", {}) if isinstance(params, dict) else {}
967
+ keys = sorted([k for k in props.keys() if isinstance(k, str)])
968
+ if keys:
969
+ lines.append(f"- {name}({', '.join(keys)})")
970
+ else:
971
+ lines.append(f"- {name}()")
972
+ return "\n".join(lines) if lines else "(no tools available)"
973
+
974
+ messages = list(context.get("messages") or [])
975
+ tool_msgs: list[str] = []
976
+ try:
977
+ tool_limit = int(limits.get("review_max_tool_output_chars", -1))
978
+ except Exception:
979
+ tool_limit = -1
980
+ try:
981
+ answer_limit = int(limits.get("review_max_answer_chars", -1))
982
+ except Exception:
983
+ answer_limit = -1
984
+
985
+ for m in reversed(messages):
986
+ if not isinstance(m, dict) or m.get("role") != "tool":
987
+ continue
988
+ content = m.get("content")
989
+ if isinstance(content, str) and content.strip():
990
+ tool_msgs.append(_truncate_block(content.strip(), max_chars=tool_limit))
991
+ if len(tool_msgs) >= 8:
992
+ break
993
+ tool_msgs.reverse()
994
+ observations = "\n\n".join(tool_msgs) if tool_msgs else "(no tool outputs)"
995
+
996
+ # Include recent user messages (especially ask_user responses) so the reviewer can
997
+ # avoid re-asking questions the user already answered.
998
+ try:
999
+ user_limit = int(limits.get("review_max_user_message_chars", -1))
1000
+ except Exception:
1001
+ user_limit = -1
1002
+
1003
+ user_msgs: list[str] = []
1004
+ ask_prompts: list[str] = []
1005
+ for m in reversed(messages):
1006
+ if not isinstance(m, dict):
1007
+ continue
1008
+ role = m.get("role")
1009
+ content = m.get("content")
1010
+ if role == "user" and isinstance(content, str) and content.strip():
1011
+ if content.strip() != task.strip():
1012
+ user_msgs.append(_truncate_block(content.strip(), max_chars=user_limit))
1013
+ if len(user_msgs) >= 4:
1014
+ break
1015
+ for m in reversed(messages):
1016
+ if not isinstance(m, dict):
1017
+ continue
1018
+ if m.get("role") != "assistant":
1019
+ continue
1020
+ meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
1021
+ if not isinstance(meta, dict) or meta.get("kind") != "ask_user_prompt":
1022
+ continue
1023
+ content = m.get("content")
1024
+ if isinstance(content, str) and content.strip():
1025
+ ask_prompts.append(_truncate_block(content.strip(), max_chars=user_limit))
1026
+ if len(ask_prompts) >= 4:
1027
+ break
1028
+
1029
+ user_msgs.reverse()
1030
+ ask_prompts.reverse()
1031
+ user_context = "\n\n".join(user_msgs) if user_msgs else "(no additional user messages)"
1032
+ asked_context = "\n\n".join(ask_prompts) if ask_prompts else "(no ask_user prompts recorded)"
1033
+
1034
+ answer_raw = str(run.vars.get("_temp", {}).get("final_answer") or "")
1035
+ answer_excerpt = ""
1036
+ if not tool_msgs and answer_raw.strip():
1037
+ answer_excerpt = _truncate_block(answer_raw.strip(), max_chars=answer_limit)
1038
+
1039
+ prompt = (
1040
+ "You are a verifier. Review whether the user's request has been fully satisfied.\n"
1041
+ "Be strict: only count actions that are supported by the tool outputs.\n"
1042
+ "If anything is missing, propose the NEXT ACTIONS.\n"
1043
+ "Prefer returning `next_tool_calls` over `next_prompt`.\n"
1044
+ "Return JSON ONLY.\n\n"
1045
+ f"User request:\n{task}\n\n"
1046
+ f"Plan:\n{plan_text}\n\n"
1047
+ f"Recent ask_user prompts:\n{asked_context}\n\n"
1048
+ f"Recent user messages:\n{user_context}\n\n"
1049
+ + (f"Current answer (excerpt):\n{answer_excerpt}\n\n" if answer_excerpt else "")
1050
+ + f"Tool outputs:\n{observations}\n\n"
1051
+ f"Allowed tools:\n{_format_allowed_tools()}\n\n"
1052
+ )
1053
+
1054
+ schema = {
1055
+ "type": "object",
1056
+ "properties": {
1057
+ "complete": {"type": "boolean"},
1058
+ "missing": {"type": "array", "items": {"type": "string"}},
1059
+ "next_prompt": {"type": "string"},
1060
+ "next_tool_calls": {
1061
+ "type": "array",
1062
+ "items": {
1063
+ "type": "object",
1064
+ "properties": {
1065
+ "name": {"type": "string"},
1066
+ "arguments": {"type": "object"},
1067
+ },
1068
+ "required": ["name", "arguments"],
1069
+ "additionalProperties": False,
1070
+ },
1071
+ },
1072
+ },
1073
+ "required": ["complete", "missing", "next_prompt", "next_tool_calls"],
1074
+ "additionalProperties": False,
1075
+ }
1076
+
1077
+ emit("review_request", {"tool_messages": len(tool_msgs)})
1078
+
1079
+ payload: Dict[str, Any] = {
1080
+ "prompt": prompt,
1081
+ "response_schema": schema,
1082
+ "response_schema_name": "CodeActVerifier",
1083
+ "params": runtime_llm_params(runtime_ns, extra={"temperature": 0.2}),
1084
+ }
1085
+ media = extract_media_from_context(context)
1086
+ if media:
1087
+ payload["media"] = media
1088
+ sys = _system_prompt(runtime_ns)
1089
+ if sys is not None:
1090
+ payload["system_prompt"] = sys
1091
+
1092
+ return StepPlan(
1093
+ node_id="review",
1094
+ effect=Effect(
1095
+ type=EffectType.LLM_CALL,
1096
+ payload=payload,
1097
+ result_key="_temp.review_llm_response",
1098
+ ),
1099
+ next_node="review_parse",
1100
+ )
1101
+
1102
+ def review_parse_node(run: RunState, ctx) -> StepPlan:
1103
+ _, _, runtime_ns, temp, _ = ensure_codeact_vars(run)
1104
+ resp = temp.get("review_llm_response", {})
1105
+ if not isinstance(resp, dict):
1106
+ resp = {}
1107
+
1108
+ data = resp.get("data")
1109
+ if data is None and isinstance(resp.get("content"), str):
1110
+ try:
1111
+ data = json.loads(resp["content"])
1112
+ except Exception:
1113
+ data = None
1114
+ if not isinstance(data, dict):
1115
+ data = {}
1116
+
1117
+ complete = bool(data.get("complete"))
1118
+ missing = data.get("missing") if isinstance(data.get("missing"), list) else []
1119
+ next_prompt = data.get("next_prompt")
1120
+ next_prompt_text = str(next_prompt or "").strip()
1121
+ next_tool_calls_raw = data.get("next_tool_calls")
1122
+ next_tool_calls: list[dict[str, Any]] = []
1123
+ if isinstance(next_tool_calls_raw, list):
1124
+ for item in next_tool_calls_raw:
1125
+ if not isinstance(item, dict):
1126
+ continue
1127
+ name = str(item.get("name") or "").strip()
1128
+ args = item.get("arguments")
1129
+ if not isinstance(args, dict):
1130
+ args = {}
1131
+ if name:
1132
+ next_tool_calls.append({"name": name, "arguments": args})
1133
+
1134
+ emit("review", {"complete": complete, "missing": missing})
1135
+ temp.pop("review_llm_response", None)
1136
+
1137
+ if complete:
1138
+ return StepPlan(node_id="review_parse", next_node="done")
1139
+
1140
+ if next_tool_calls:
1141
+ temp["pending_tool_calls"] = next_tool_calls
1142
+ emit("review_tool_calls", {"count": len(next_tool_calls)})
1143
+ return StepPlan(node_id="review_parse", next_node="act")
1144
+
1145
+ # Behavioral validation: if incomplete but no tool calls, re-ask reviewer once with stricter rules.
1146
+ if not complete and not next_tool_calls:
1147
+ try:
1148
+ retry_count = int(runtime_ns.get("review_retry_count") or 0)
1149
+ except Exception:
1150
+ retry_count = 0
1151
+ if retry_count < 1:
1152
+ runtime_ns["review_retry_count"] = retry_count + 1
1153
+ inbox = runtime_ns.get("inbox")
1154
+ if not isinstance(inbox, list):
1155
+ inbox = []
1156
+ runtime_ns["inbox"] = inbox
1157
+ inbox.append(
1158
+ {
1159
+ "content": (
1160
+ "[Review] Your last review output was not actionable. "
1161
+ "If incomplete, you MUST return at least one `next_tool_call` "
1162
+ "(use `ask_user` if you need clarification). Return JSON only."
1163
+ )
1164
+ }
1165
+ )
1166
+ emit("review_retry_unactionable", {"retry": retry_count + 1})
1167
+ return StepPlan(node_id="review_parse", next_node="review")
1168
+
1169
+ runtime_ns["review_retry_count"] = 0
1170
+ if next_prompt_text:
1171
+ inbox = runtime_ns.get("inbox")
1172
+ if not isinstance(inbox, list):
1173
+ inbox = []
1174
+ runtime_ns["inbox"] = inbox
1175
+ inbox.append({"content": f"[Review] {next_prompt_text}"})
1176
+ return StepPlan(node_id="review_parse", next_node="reason")
1177
+
345
1178
  def done_node(run: RunState, ctx) -> StepPlan:
346
1179
  context, scratchpad, _, temp, limits = ensure_codeact_vars(run)
347
1180
  answer = str(temp.get("final_answer") or "No answer provided")
@@ -350,6 +1183,16 @@ def create_codeact_workflow(
350
1183
  # Prefer _limits.current_iteration, fall back to scratchpad
351
1184
  iterations = int(limits.get("current_iteration", 0) or scratchpad.get("iteration", 0) or 0)
352
1185
 
1186
+ # Persist the final answer into the conversation history so it becomes part of the
1187
+ # next run's seed context and shows up in /history.
1188
+ messages = context.get("messages")
1189
+ if isinstance(messages, list):
1190
+ last = messages[-1] if messages else None
1191
+ last_role = last.get("role") if isinstance(last, dict) else None
1192
+ last_content = last.get("content") if isinstance(last, dict) else None
1193
+ if last_role != "assistant" or str(last_content or "") != answer:
1194
+ messages.append(_new_message(ctx, role="assistant", content=answer, metadata={"kind": "final_answer"}))
1195
+
353
1196
  return StepPlan(
354
1197
  node_id="done",
355
1198
  complete_output={
@@ -384,14 +1227,18 @@ def create_codeact_workflow(
384
1227
  entry_node="init",
385
1228
  nodes={
386
1229
  "init": init_node,
1230
+ "plan": plan_node,
1231
+ "plan_parse": plan_parse_node,
387
1232
  "reason": reason_node,
388
1233
  "parse": parse_node,
389
1234
  "act": act_node,
390
1235
  "execute_code": execute_code_node,
391
1236
  "observe": observe_node,
392
1237
  "handle_user_response": handle_user_response_node,
1238
+ "maybe_review": maybe_review_node,
1239
+ "review": review_node,
1240
+ "review_parse": review_parse_node,
393
1241
  "done": done_node,
394
1242
  "max_iterations": max_iterations_node,
395
1243
  },
396
1244
  )
397
-