abstractagent 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,9 +6,10 @@ import hashlib
6
6
  import json
7
7
  from typing import Any, Callable, Dict, List, Optional
8
8
 
9
- from abstractcore.tools import ToolCall
9
+ from abstractcore.tools import ToolCall, ToolDefinition
10
10
  from abstractruntime import Effect, EffectType, RunState, StepPlan, WorkflowSpec
11
11
  from abstractruntime.core.vars import ensure_limits, ensure_namespaces
12
+ from abstractruntime.memory.active_context import ActiveContextPolicy
12
13
 
13
14
  from ..logic.codeact import CodeActLogic
14
15
 
@@ -29,11 +30,16 @@ def _new_message(
29
30
 
30
31
  timestamp = datetime.now(timezone.utc).isoformat()
31
32
 
33
+ import uuid
34
+
35
+ meta = dict(metadata or {})
36
+ meta.setdefault("message_id", f"msg_{uuid.uuid4().hex}")
37
+
32
38
  return {
33
39
  "role": role,
34
40
  "content": content,
35
41
  "timestamp": timestamp,
36
- "metadata": metadata or {},
42
+ "metadata": meta,
37
43
  }
38
44
 
39
45
 
@@ -108,9 +114,186 @@ def create_codeact_workflow(
108
114
  if on_step:
109
115
  on_step(step, data)
110
116
 
111
- tool_defs = logic.tools
112
- tool_specs = [t.to_dict() for t in tool_defs]
113
- toolset_id = _compute_toolset_id(tool_specs)
117
+ def _current_tool_defs() -> list[ToolDefinition]:
118
+ defs = getattr(logic, "tools", None)
119
+ if not isinstance(defs, list):
120
+ try:
121
+ defs = list(defs) # type: ignore[arg-type]
122
+ except Exception:
123
+ defs = []
124
+ return [t for t in defs if getattr(t, "name", None)]
125
+
126
+ def _tool_by_name() -> dict[str, ToolDefinition]:
127
+ out: dict[str, ToolDefinition] = {}
128
+ for t in _current_tool_defs():
129
+ name = getattr(t, "name", None)
130
+ if isinstance(name, str) and name.strip():
131
+ out[name] = t
132
+ return out
133
+
134
+ def _default_allowlist() -> list[str]:
135
+ out: list[str] = []
136
+ seen: set[str] = set()
137
+ for t in _current_tool_defs():
138
+ name = getattr(t, "name", None)
139
+ if not isinstance(name, str) or not name.strip() or name in seen:
140
+ continue
141
+ seen.add(name)
142
+ out.append(name)
143
+ return out
144
+
145
+ def _normalize_allowlist(raw: Any) -> list[str]:
146
+ if raw is None:
147
+ return []
148
+ if isinstance(raw, str):
149
+ val = raw.strip()
150
+ return [val] if val else []
151
+ if isinstance(raw, list):
152
+ out: list[str] = []
153
+ seen: set[str] = set()
154
+ for item in raw:
155
+ if not isinstance(item, str):
156
+ continue
157
+ name = item.strip()
158
+ if not name or name in seen:
159
+ continue
160
+ seen.add(name)
161
+ out.append(name)
162
+ return out
163
+ return []
164
+
165
+ def _effective_allowlist(runtime_ns: Dict[str, Any]) -> list[str]:
166
+ if isinstance(runtime_ns, dict) and "allowed_tools" in runtime_ns:
167
+ normalized = _normalize_allowlist(runtime_ns.get("allowed_tools"))
168
+ # Filter to currently known tools (dynamic), preserving order.
169
+ current = _tool_by_name()
170
+ filtered = [name for name in normalized if name in current]
171
+ runtime_ns["allowed_tools"] = filtered
172
+ return filtered
173
+ return list(_default_allowlist())
174
+
175
+ def _allowed_tool_defs(allowlist: list[str]) -> list[ToolDefinition]:
176
+ tool_by_name = _tool_by_name()
177
+ out: list[ToolDefinition] = []
178
+ for name in allowlist:
179
+ tool = tool_by_name.get(name)
180
+ if tool is not None:
181
+ out.append(tool)
182
+ return out
183
+
184
+ def _system_prompt(runtime_ns: Dict[str, Any]) -> Optional[str]:
185
+ raw = runtime_ns.get("system_prompt") if isinstance(runtime_ns, dict) else None
186
+ if isinstance(raw, str) and raw.strip():
187
+ return raw
188
+ return None
189
+
190
+ def _sanitize_llm_messages(messages: Any, *, limits: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]:
191
+ """Convert runtime-owned message dicts into OpenAI-style {role, content, ...}.
192
+
193
+ Runtime messages can include extra metadata fields (`timestamp`, `metadata`) that many providers
194
+ will reject. Keep only the fields the LLM API expects.
195
+ """
196
+ if not isinstance(messages, list) or not messages:
197
+ return []
198
+ def _limit_int(key: str, default: int) -> int:
199
+ if not isinstance(limits, dict):
200
+ return default
201
+ try:
202
+ return int(limits.get(key, default))
203
+ except Exception:
204
+ return default
205
+ max_message_chars = _limit_int("max_message_chars", -1)
206
+ max_tool_message_chars = _limit_int("max_tool_message_chars", -1)
207
+
208
+ def _truncate(text: str, *, max_chars: int) -> str:
209
+ if max_chars <= 0:
210
+ return text
211
+ if len(text) <= max_chars:
212
+ return text
213
+ suffix = f"\n… (truncated, {len(text):,} chars total)"
214
+ keep = max_chars - len(suffix)
215
+ if keep < 200:
216
+ keep = max_chars
217
+ suffix = ""
218
+ return text[:keep].rstrip() + suffix
219
+
220
+ out: List[Dict[str, str]] = []
221
+ for m in messages:
222
+ if not isinstance(m, dict):
223
+ continue
224
+ role = str(m.get("role") or "").strip()
225
+ content = m.get("content")
226
+ if not role or content is None:
227
+ continue
228
+ content_str = str(content)
229
+ if not content_str.strip():
230
+ continue
231
+ limit = max_tool_message_chars if role == "tool" else max_message_chars
232
+ entry: Dict[str, str] = {"role": role, "content": _truncate(content_str, max_chars=limit)}
233
+ if role == "tool":
234
+ meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
235
+ call_id = meta.get("call_id") if isinstance(meta, dict) else None
236
+ if call_id is not None and str(call_id).strip():
237
+ entry["tool_call_id"] = str(call_id).strip()
238
+ out.append(entry)
239
+ return out
240
+
241
+ def _flag(runtime_ns: Dict[str, Any], key: str, *, default: bool = False) -> bool:
242
+ if not isinstance(runtime_ns, dict) or key not in runtime_ns:
243
+ return bool(default)
244
+ val = runtime_ns.get(key)
245
+ if isinstance(val, bool):
246
+ return val
247
+ if isinstance(val, (int, float)):
248
+ return bool(val)
249
+ if isinstance(val, str):
250
+ lowered = val.strip().lower()
251
+ if lowered in ("1", "true", "yes", "on", "enabled"):
252
+ return True
253
+ if lowered in ("0", "false", "no", "off", "disabled"):
254
+ return False
255
+ return bool(default)
256
+
257
+ def _int(runtime_ns: Dict[str, Any], key: str, *, default: int) -> int:
258
+ if not isinstance(runtime_ns, dict) or key not in runtime_ns:
259
+ return int(default)
260
+ val = runtime_ns.get(key)
261
+ try:
262
+ return int(val) # type: ignore[arg-type]
263
+ except Exception:
264
+ return int(default)
265
+
266
+ def _extract_plan_update(content: str) -> Optional[str]:
267
+ """Extract a plan update block from model content (best-effort).
268
+
269
+ Convention (prompted in Plan mode): the model appends a final section:
270
+
271
+ Plan Update:
272
+ - [ ] ...
273
+ - [x] ...
274
+ """
275
+ if not isinstance(content, str) or not content.strip():
276
+ return None
277
+
278
+ import re
279
+
280
+ lines = content.splitlines()
281
+ header_idx: Optional[int] = None
282
+ for i, line in enumerate(lines):
283
+ if re.match(r"(?i)^\s*plan\s*update\s*:\s*$", line.strip()):
284
+ header_idx = i
285
+ if header_idx is None:
286
+ return None
287
+
288
+ plan_lines = lines[header_idx + 1 :]
289
+ while plan_lines and not plan_lines[0].strip():
290
+ plan_lines.pop(0)
291
+ plan_text = "\n".join(plan_lines).strip()
292
+ if not plan_text:
293
+ return None
294
+ if not re.search(r"(?m)^\s*(?:[-*]|\d+\.)\s+", plan_text):
295
+ return None
296
+ return plan_text
114
297
 
115
298
  def init_node(run: RunState, ctx) -> StepPlan:
116
299
  context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
@@ -123,13 +306,68 @@ def create_codeact_workflow(
123
306
  if task and (not messages or messages[-1].get("role") != "user" or messages[-1].get("content") != task):
124
307
  messages.append(_new_message(ctx, role="user", content=task))
125
308
 
126
- runtime_ns.setdefault("tool_specs", tool_specs)
127
- runtime_ns.setdefault("toolset_id", toolset_id)
309
+ allow = _effective_allowlist(runtime_ns)
310
+ allowed_defs = _allowed_tool_defs(allow)
311
+ runtime_ns["tool_specs"] = [t.to_dict() for t in allowed_defs]
312
+ runtime_ns["toolset_id"] = _compute_toolset_id(runtime_ns["tool_specs"])
313
+ runtime_ns.setdefault("allowed_tools", allow)
128
314
  runtime_ns.setdefault("inbox", [])
129
315
 
130
316
  emit("init", {"task": task})
317
+ if _flag(runtime_ns, "plan_mode", default=False) and not isinstance(scratchpad.get("plan"), str):
318
+ return StepPlan(node_id="init", next_node="plan")
131
319
  return StepPlan(node_id="init", next_node="reason")
132
320
 
321
+ def plan_node(run: RunState, ctx) -> StepPlan:
322
+ context, scratchpad, runtime_ns, _, _ = ensure_codeact_vars(run)
323
+ task = str(context.get("task", "") or "")
324
+
325
+ allow = _effective_allowlist(runtime_ns)
326
+
327
+ prompt = (
328
+ "You are preparing a high-level execution plan for the user's request.\n"
329
+ "Return a concise TODO list (5–12 steps) that is actionable and verifiable.\n"
330
+ "Do not call tools yet. Do not include role prefixes like 'assistant:'.\n\n"
331
+ f"User request:\n{task}\n\n"
332
+ "Plan (markdown checklist):\n"
333
+ "- [ ] ...\n"
334
+ )
335
+
336
+ emit("plan_request", {"tools": allow})
337
+
338
+ payload: Dict[str, Any] = {"prompt": prompt, "params": {"temperature": 0.2}}
339
+ sys = _system_prompt(runtime_ns)
340
+ if isinstance(sys, str) and sys.strip():
341
+ payload["system_prompt"] = sys
342
+
343
+ return StepPlan(
344
+ node_id="plan",
345
+ effect=Effect(
346
+ type=EffectType.LLM_CALL,
347
+ payload=payload,
348
+ result_key="_temp.plan_llm_response",
349
+ ),
350
+ next_node="plan_parse",
351
+ )
352
+
353
+ def plan_parse_node(run: RunState, ctx) -> StepPlan:
354
+ context, scratchpad, _, temp, _ = ensure_codeact_vars(run)
355
+ resp = temp.get("plan_llm_response", {})
356
+ if not isinstance(resp, dict):
357
+ resp = {}
358
+ plan_text = resp.get("content")
359
+ plan = "" if plan_text is None else str(plan_text).strip()
360
+ if not plan and isinstance(resp.get("data"), dict):
361
+ plan = json.dumps(resp.get("data"), ensure_ascii=False, indent=2).strip()
362
+
363
+ scratchpad["plan"] = plan
364
+ temp.pop("plan_llm_response", None)
365
+
366
+ if plan:
367
+ context["messages"].append(_new_message(ctx, role="assistant", content=plan, metadata={"kind": "plan"}))
368
+ emit("plan", {"plan": plan})
369
+ return StepPlan(node_id="plan_parse", next_node="reason")
370
+
133
371
  def reason_node(run: RunState, ctx) -> StepPlan:
134
372
  context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
135
373
 
@@ -159,9 +397,23 @@ def create_codeact_workflow(
159
397
  guidance = " | ".join([m for m in inbox_messages if m])
160
398
  runtime_ns["inbox"] = []
161
399
 
400
+ messages_view = ActiveContextPolicy.select_active_messages_for_llm_from_run(run)
401
+
402
+ # Refresh tool metadata BEFORE rendering Active Memory so token fitting stays accurate
403
+ # (even though we do not render a "Tools (session)" block into Active Memory prompts).
404
+ allow = _effective_allowlist(runtime_ns)
405
+ allowed_defs = _allowed_tool_defs(allow)
406
+ tool_specs = [t.to_dict() for t in allowed_defs]
407
+ include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
408
+ if not include_examples:
409
+ tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
410
+ runtime_ns["tool_specs"] = tool_specs
411
+ runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
412
+ runtime_ns.setdefault("allowed_tools", allow)
413
+
162
414
  req = logic.build_request(
163
415
  task=str(context.get("task", "") or ""),
164
- messages=list(context.get("messages") or []),
416
+ messages=messages_view,
165
417
  guidance=guidance,
166
418
  iteration=iteration + 1,
167
419
  max_iterations=max_iterations,
@@ -170,7 +422,18 @@ def create_codeact_workflow(
170
422
 
171
423
  emit("reason", {"iteration": iteration + 1, "max_iterations": max_iterations, "has_guidance": bool(guidance)})
172
424
 
173
- payload = {"prompt": req.prompt, "tools": [t.to_dict() for t in req.tools]}
425
+ # IMPORTANT: When we send `messages`, do not also send a non-empty `prompt`.
426
+ # Some providers/servers will append `prompt` as an extra user message even when the
427
+ # current request is already present in `messages`, which duplicates user turns and
428
+ # wastes context budget.
429
+ payload: Dict[str, Any] = {
430
+ "prompt": "",
431
+ "messages": _sanitize_llm_messages(messages_view, limits=limits),
432
+ "tools": list(tool_specs),
433
+ }
434
+ sys = _system_prompt(runtime_ns) or req.system_prompt
435
+ if isinstance(sys, str) and sys.strip():
436
+ payload["system_prompt"] = sys
174
437
  if req.max_tokens is not None:
175
438
  payload["params"] = {"max_tokens": req.max_tokens}
176
439
 
@@ -185,80 +448,282 @@ def create_codeact_workflow(
185
448
  )
186
449
 
187
450
  def parse_node(run: RunState, ctx) -> StepPlan:
188
- context, _, _, temp, _ = ensure_codeact_vars(run)
451
+ context, scratchpad, runtime_ns, temp, _ = ensure_codeact_vars(run)
189
452
  response = temp.get("llm_response", {})
190
453
  content, tool_calls = logic.parse_response(response)
191
454
 
192
- if content:
193
- context["messages"].append(_new_message(ctx, role="assistant", content=content))
194
-
195
455
  temp.pop("llm_response", None)
196
456
  emit("parse", {"has_tool_calls": bool(tool_calls), "content_preview": (content[:100] if content else "(no content)")})
197
457
 
198
458
  if tool_calls:
459
+ if content:
460
+ context["messages"].append(_new_message(ctx, role="assistant", content=content))
461
+ if _flag(runtime_ns, "plan_mode", default=False):
462
+ updated = _extract_plan_update(content)
463
+ if isinstance(updated, str) and updated.strip():
464
+ scratchpad["plan"] = updated.strip()
199
465
  temp["pending_tool_calls"] = [tc.__dict__ for tc in tool_calls]
200
466
  return StepPlan(node_id="parse", next_node="act")
201
467
 
468
+ # Empty response is an invalid step: recover with a bounded retry that carries evidence.
469
+ if not isinstance(content, str) or not content.strip():
470
+ try:
471
+ empty_retries = int(scratchpad.get("empty_response_retry_count") or 0)
472
+ except Exception:
473
+ empty_retries = 0
474
+
475
+ if empty_retries < 2:
476
+ scratchpad["empty_response_retry_count"] = empty_retries + 1
477
+ emit("parse_retry_empty_response", {"retries": empty_retries + 1})
478
+ inbox = runtime_ns.get("inbox")
479
+ if not isinstance(inbox, list):
480
+ inbox = []
481
+ runtime_ns["inbox"] = inbox
482
+ inbox.append(
483
+ {
484
+ "content": (
485
+ "[Recover] Your last message was empty. Continue the task now. "
486
+ "If you need info, CALL tools (preferred). Do not output an empty message."
487
+ )
488
+ }
489
+ )
490
+ return StepPlan(node_id="parse", next_node="reason")
491
+
492
+ safe = (
493
+ "I can't proceed: the model repeatedly returned empty outputs (no content, no tool calls).\n"
494
+ "Please retry, reduce context, or switch models."
495
+ )
496
+ context["messages"].append(_new_message(ctx, role="assistant", content=safe, metadata={"kind": "error"}))
497
+ temp["final_answer"] = safe
498
+ temp["pending_tool_calls"] = []
499
+ scratchpad["empty_response_retry_count"] = 0
500
+ return StepPlan(node_id="parse", next_node="maybe_review")
501
+
202
502
  code = logic.extract_code(content)
203
503
  if code:
504
+ if content:
505
+ context["messages"].append(_new_message(ctx, role="assistant", content=content))
506
+ if _flag(runtime_ns, "plan_mode", default=False):
507
+ updated = _extract_plan_update(content)
508
+ if isinstance(updated, str) and updated.strip():
509
+ scratchpad["plan"] = updated.strip()
204
510
  temp["pending_code"] = code
205
511
  return StepPlan(node_id="parse", next_node="execute_code")
206
512
 
207
- temp["final_answer"] = content
208
- return StepPlan(node_id="parse", next_node="done")
513
+ def _extract_final_answer(text: str) -> tuple[bool, str]:
514
+ if not isinstance(text, str) or not text.strip():
515
+ return False, ""
516
+ s = text.lstrip()
517
+ if s.upper().startswith("FINAL:"):
518
+ return True, s[len("FINAL:") :].lstrip()
519
+ return False, text
520
+
521
+ raw = str(content or "").strip()
522
+ is_final, final = _extract_final_answer(raw)
523
+ if is_final:
524
+ if final:
525
+ context["messages"].append(_new_message(ctx, role="assistant", content=final))
526
+ if _flag(runtime_ns, "plan_mode", default=False):
527
+ updated = _extract_plan_update(final)
528
+ if isinstance(updated, str) and updated.strip():
529
+ scratchpad["plan"] = updated.strip()
530
+ temp["final_answer"] = final or "No answer provided"
531
+ temp["pending_tool_calls"] = []
532
+ return StepPlan(node_id="parse", next_node="maybe_review")
533
+
534
+ # Default: treat as a final answer even without an explicit FINAL marker.
535
+ if raw:
536
+ context["messages"].append(_new_message(ctx, role="assistant", content=raw))
537
+ if _flag(runtime_ns, "plan_mode", default=False):
538
+ updated = _extract_plan_update(raw)
539
+ if isinstance(updated, str) and updated.strip():
540
+ scratchpad["plan"] = updated.strip()
541
+ temp["final_answer"] = raw or "No answer provided"
542
+ temp["pending_tool_calls"] = []
543
+ scratchpad["empty_response_retry_count"] = 0
544
+ return StepPlan(node_id="parse", next_node="maybe_review")
209
545
 
210
546
  def act_node(run: RunState, ctx) -> StepPlan:
211
- _, _, _, temp, _ = ensure_codeact_vars(run)
212
- tool_calls = temp.get("pending_tool_calls", [])
213
- if not isinstance(tool_calls, list):
214
- tool_calls = []
215
-
216
- if not tool_calls:
547
+ # Treat `_temp.pending_tool_calls` as a durable queue to avoid dropping tool calls when
548
+ # schema-only tools (ask_user/memory/etc.) are interleaved with normal tools.
549
+ context, _, runtime_ns, temp, _ = ensure_codeact_vars(run)
550
+ raw_queue = temp.get("pending_tool_calls", [])
551
+ if not isinstance(raw_queue, list) or not raw_queue:
552
+ temp["pending_tool_calls"] = []
217
553
  return StepPlan(node_id="act", next_node="reason")
218
554
 
219
- # Handle ask_user specially with ASK_USER effect.
220
- for i, tc in enumerate(tool_calls):
221
- if not isinstance(tc, dict):
222
- continue
223
- if tc.get("name") != "ask_user":
555
+ allow = _effective_allowlist(runtime_ns)
556
+ builtin_effect_tools = {
557
+ "ask_user",
558
+ "recall_memory",
559
+ "inspect_vars",
560
+ "remember",
561
+ "remember_note",
562
+ "compact_memory",
563
+ }
564
+
565
+ tool_queue: List[Dict[str, Any]] = []
566
+ for idx, item in enumerate(raw_queue, start=1):
567
+ if isinstance(item, ToolCall):
568
+ d: Dict[str, Any] = {"name": item.name, "arguments": item.arguments, "call_id": item.call_id}
569
+ elif isinstance(item, dict):
570
+ d = dict(item)
571
+ else:
224
572
  continue
573
+ call_id = str(d.get("call_id") or "").strip()
574
+ if not call_id:
575
+ d["call_id"] = str(idx)
576
+ tool_queue.append(d)
577
+
578
+ if not tool_queue:
579
+ temp["pending_tool_calls"] = []
580
+ return StepPlan(node_id="act", next_node="reason")
581
+
582
+ def _is_builtin(tc: Dict[str, Any]) -> bool:
583
+ name = tc.get("name")
584
+ return isinstance(name, str) and name in builtin_effect_tools
585
+
586
+ if _is_builtin(tool_queue[0]):
587
+ tc = tool_queue[0]
588
+ name = str(tc.get("name") or "").strip()
225
589
  args = tc.get("arguments") or {}
226
- question = str(args.get("question") or "Please provide input:")
227
- choices = args.get("choices")
228
- choices = list(choices) if isinstance(choices, list) else None
229
-
230
- temp["pending_tool_calls"] = tool_calls[i + 1 :]
231
- emit("ask_user", {"question": question, "choices": choices or []})
232
- return StepPlan(
233
- node_id="act",
234
- effect=Effect(
235
- type=EffectType.ASK_USER,
236
- payload={"prompt": question, "choices": choices, "allow_free_text": True},
237
- result_key="_temp.user_response",
238
- ),
239
- next_node="handle_user_response",
240
- )
590
+ if not isinstance(args, dict):
591
+ args = {}
241
592
 
242
- for tc in tool_calls:
243
- if isinstance(tc, dict):
244
- emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {})})
593
+ # Pop builtin.
594
+ temp["pending_tool_calls"] = list(tool_queue[1:])
245
595
 
246
- formatted_calls: List[Dict[str, Any]] = []
247
- for tc in tool_calls:
248
- if isinstance(tc, dict):
249
- formatted_calls.append(
250
- {"name": tc.get("name", ""), "arguments": tc.get("arguments", {}), "call_id": tc.get("call_id", "1")}
596
+ if name and name not in allow:
597
+ temp["tool_results"] = {
598
+ "results": [
599
+ {
600
+ "call_id": str(tc.get("call_id") or ""),
601
+ "name": name,
602
+ "success": False,
603
+ "output": None,
604
+ "error": f"Tool '{name}' is not allowed for this agent",
605
+ }
606
+ ]
607
+ }
608
+ emit("act_blocked", {"tool": name})
609
+ return StepPlan(node_id="act", next_node="observe")
610
+
611
+ if name == "ask_user":
612
+ question = str(args.get("question") or "Please provide input:")
613
+ choices = args.get("choices")
614
+ choices = list(choices) if isinstance(choices, list) else None
615
+
616
+ msgs = context.get("messages")
617
+ if isinstance(msgs, list):
618
+ content = f"[Agent question]: {question}"
619
+ last = msgs[-1] if msgs else None
620
+ last_role = last.get("role") if isinstance(last, dict) else None
621
+ last_meta = last.get("metadata") if isinstance(last, dict) else None
622
+ last_kind = last_meta.get("kind") if isinstance(last_meta, dict) else None
623
+ last_content = last.get("content") if isinstance(last, dict) else None
624
+ if not (last_role == "assistant" and last_kind == "ask_user_prompt" and str(last_content or "") == content):
625
+ msgs.append(_new_message(ctx, role="assistant", content=content, metadata={"kind": "ask_user_prompt"}))
626
+
627
+ emit("ask_user", {"question": question, "choices": choices or []})
628
+ return StepPlan(
629
+ node_id="act",
630
+ effect=Effect(
631
+ type=EffectType.ASK_USER,
632
+ payload={"prompt": question, "choices": choices, "allow_free_text": True},
633
+ result_key="_temp.user_response",
634
+ ),
635
+ next_node="handle_user_response",
636
+ )
637
+
638
+ if name == "recall_memory":
639
+ payload = dict(args)
640
+ payload.setdefault("tool_name", "recall_memory")
641
+ payload.setdefault("call_id", tc.get("call_id") or "memory")
642
+ emit("memory_query", {"query": payload.get("query"), "span_id": payload.get("span_id")})
643
+ return StepPlan(
644
+ node_id="act",
645
+ effect=Effect(type=EffectType.MEMORY_QUERY, payload=payload, result_key="_temp.tool_results"),
646
+ next_node="observe",
251
647
  )
252
- elif isinstance(tc, ToolCall):
253
- formatted_calls.append(
254
- {"name": tc.name, "arguments": tc.arguments, "call_id": tc.call_id or "1"}
648
+
649
+ if name == "inspect_vars":
650
+ payload = dict(args)
651
+ payload.setdefault("tool_name", "inspect_vars")
652
+ payload.setdefault("call_id", tc.get("call_id") or "vars")
653
+ emit("vars_query", {"path": payload.get("path")})
654
+ return StepPlan(
655
+ node_id="act",
656
+ effect=Effect(type=EffectType.VARS_QUERY, payload=payload, result_key="_temp.tool_results"),
657
+ next_node="observe",
658
+ )
659
+
660
+ if name == "remember":
661
+ payload = dict(args)
662
+ payload.setdefault("tool_name", "remember")
663
+ payload.setdefault("call_id", tc.get("call_id") or "memory")
664
+ emit("memory_tag", {"span_id": payload.get("span_id"), "tags": payload.get("tags")})
665
+ return StepPlan(
666
+ node_id="act",
667
+ effect=Effect(type=EffectType.MEMORY_TAG, payload=payload, result_key="_temp.tool_results"),
668
+ next_node="observe",
255
669
  )
256
670
 
671
+ if name == "remember_note":
672
+ payload = dict(args)
673
+ payload.setdefault("tool_name", "remember_note")
674
+ payload.setdefault("call_id", tc.get("call_id") or "memory")
675
+ emit("memory_note", {"note": payload.get("note"), "tags": payload.get("tags")})
676
+ return StepPlan(
677
+ node_id="act",
678
+ effect=Effect(type=EffectType.MEMORY_NOTE, payload=payload, result_key="_temp.tool_results"),
679
+ next_node="observe",
680
+ )
681
+
682
+ if name == "compact_memory":
683
+ payload = dict(args)
684
+ payload.setdefault("tool_name", "compact_memory")
685
+ payload.setdefault("call_id", tc.get("call_id") or "compact")
686
+ emit(
687
+ "memory_compact",
688
+ {
689
+ "preserve_recent": payload.get("preserve_recent"),
690
+ "mode": payload.get("compression_mode"),
691
+ "focus": payload.get("focus"),
692
+ },
693
+ )
694
+ return StepPlan(
695
+ node_id="act",
696
+ effect=Effect(type=EffectType.MEMORY_COMPACT, payload=payload, result_key="_temp.tool_results"),
697
+ next_node="observe",
698
+ )
699
+
700
+ if temp.get("pending_tool_calls"):
701
+ return StepPlan(node_id="act", next_node="act")
702
+ return StepPlan(node_id="act", next_node="reason")
703
+
704
+ batch: List[Dict[str, Any]] = []
705
+ for tc in tool_queue:
706
+ if _is_builtin(tc):
707
+ break
708
+ batch.append(tc)
709
+
710
+ remaining = tool_queue[len(batch) :]
711
+ temp["pending_tool_calls"] = list(remaining)
712
+
713
+ for tc in batch:
714
+ emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")})
715
+
716
+ formatted_calls: List[Dict[str, Any]] = []
717
+ for tc in batch:
718
+ formatted_calls.append(
719
+ {"name": tc.get("name", ""), "arguments": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")}
720
+ )
721
+
257
722
  return StepPlan(
258
723
  node_id="act",
259
724
  effect=Effect(
260
725
  type=EffectType.TOOL_CALLS,
261
- payload={"tool_calls": formatted_calls},
726
+ payload={"tool_calls": formatted_calls, "allowed_tools": list(allow)},
262
727
  result_key="_temp.tool_results",
263
728
  ),
264
729
  next_node="observe",
@@ -292,7 +757,7 @@ def create_codeact_workflow(
292
757
  )
293
758
 
294
759
  def observe_node(run: RunState, ctx) -> StepPlan:
295
- context, _, _, temp, _ = ensure_codeact_vars(run)
760
+ context, scratchpad, _, temp, _ = ensure_codeact_vars(run)
296
761
  tool_results = temp.get("tool_results", {})
297
762
  if not isinstance(tool_results, dict):
298
763
  tool_results = {}
@@ -308,12 +773,29 @@ def create_codeact_workflow(
308
773
  success = bool(r.get("success"))
309
774
  output = r.get("output", "")
310
775
  error = r.get("error", "")
776
+ # Prefer a tool-supplied human/LLM-friendly rendering when present.
777
+ def _display(v: Any) -> str:
778
+ if isinstance(v, dict):
779
+ rendered = v.get("rendered")
780
+ if isinstance(rendered, str) and rendered.strip():
781
+ return rendered.strip()
782
+ return "" if v is None else str(v)
783
+
784
+ display = _display(output)
785
+ if not success:
786
+ # Preserve structured outputs for provenance, but show a clean string to the LLM/UI.
787
+ display = _display(output) if isinstance(output, dict) else str(error or output)
311
788
  rendered = logic.format_observation(
312
789
  name=name,
313
- output=(output if success else (error or output)),
790
+ output=display,
314
791
  success=success,
315
792
  )
316
- emit("observe", {"tool": name, "result": rendered[:150]})
793
+ # Observability: avoid truncating normal tool results in step events.
794
+ # Keep a bounded preview for huge tool outputs to avoid bloating traces/ledgers.
795
+ preview = rendered
796
+ if len(preview) > 1000:
797
+ preview = preview[:1000] + f"\n… (truncated, {len(rendered):,} chars total)"
798
+ emit("observe", {"tool": name, "success": success, "result": preview})
317
799
  context["messages"].append(
318
800
  _new_message(
319
801
  ctx,
@@ -324,6 +806,12 @@ def create_codeact_workflow(
324
806
  )
325
807
 
326
808
  temp.pop("tool_results", None)
809
+ # Reset verifier/review rounds after executing tools so the verifier can run
810
+ # again on the next candidate answer.
811
+ scratchpad["review_count"] = 0
812
+ pending = temp.get("pending_tool_calls", [])
813
+ if isinstance(pending, list) and pending:
814
+ return StepPlan(node_id="observe", next_node="act")
327
815
  temp["pending_tool_calls"] = []
328
816
  return StepPlan(node_id="observe", next_node="reason")
329
817
 
@@ -342,6 +830,270 @@ def create_codeact_workflow(
342
830
  return StepPlan(node_id="handle_user_response", next_node="act")
343
831
  return StepPlan(node_id="handle_user_response", next_node="reason")
344
832
 
833
+ def maybe_review_node(run: RunState, ctx) -> StepPlan:
834
+ _, scratchpad, runtime_ns, _, _ = ensure_codeact_vars(run)
835
+
836
+ if not _flag(runtime_ns, "review_mode", default=False):
837
+ return StepPlan(node_id="maybe_review", next_node="done")
838
+
839
+ max_rounds = _int(runtime_ns, "review_max_rounds", default=1)
840
+ if max_rounds < 0:
841
+ max_rounds = 0
842
+ count = scratchpad.get("review_count")
843
+ try:
844
+ count_int = int(count or 0)
845
+ except Exception:
846
+ count_int = 0
847
+
848
+ if count_int >= max_rounds:
849
+ return StepPlan(node_id="maybe_review", next_node="done")
850
+
851
+ scratchpad["review_count"] = count_int + 1
852
+ return StepPlan(node_id="maybe_review", next_node="review")
853
+
854
+ def review_node(run: RunState, ctx) -> StepPlan:
855
+ context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
856
+ task = str(context.get("task", "") or "")
857
+ plan = scratchpad.get("plan")
858
+ plan_text = str(plan).strip() if isinstance(plan, str) and plan.strip() else "(no plan)"
859
+
860
+ allow = _effective_allowlist(runtime_ns)
861
+
862
+ def _truncate_block(text: str, *, max_chars: int) -> str:
863
+ s = str(text or "")
864
+ if max_chars <= 0:
865
+ return s
866
+ if len(s) <= max_chars:
867
+ return s
868
+ suffix = f"\n… (truncated, {len(s):,} chars total)"
869
+ keep = max_chars - len(suffix)
870
+ if keep < 200:
871
+ keep = max_chars
872
+ suffix = ""
873
+ return s[:keep].rstrip() + suffix
874
+
875
+ def _format_allowed_tools() -> str:
876
+ specs = runtime_ns.get("tool_specs")
877
+ if not isinstance(specs, list) or not specs:
878
+ defs = _allowed_tool_defs(allow)
879
+ specs = [t.to_dict() for t in defs]
880
+ lines: list[str] = []
881
+ for spec in specs:
882
+ if not isinstance(spec, dict):
883
+ continue
884
+ name = str(spec.get("name") or "").strip()
885
+ if not name:
886
+ continue
887
+ params = spec.get("parameters")
888
+ props = params.get("properties", {}) if isinstance(params, dict) else {}
889
+ keys = sorted([k for k in props.keys() if isinstance(k, str)])
890
+ if keys:
891
+ lines.append(f"- {name}({', '.join(keys)})")
892
+ else:
893
+ lines.append(f"- {name}()")
894
+ return "\n".join(lines) if lines else "(no tools available)"
895
+
896
+ messages = list(context.get("messages") or [])
897
+ tool_msgs: list[str] = []
898
+ try:
899
+ tool_limit = int(limits.get("review_max_tool_output_chars", -1))
900
+ except Exception:
901
+ tool_limit = -1
902
+ try:
903
+ answer_limit = int(limits.get("review_max_answer_chars", -1))
904
+ except Exception:
905
+ answer_limit = -1
906
+
907
+ for m in reversed(messages):
908
+ if not isinstance(m, dict) or m.get("role") != "tool":
909
+ continue
910
+ content = m.get("content")
911
+ if isinstance(content, str) and content.strip():
912
+ tool_msgs.append(_truncate_block(content.strip(), max_chars=tool_limit))
913
+ if len(tool_msgs) >= 8:
914
+ break
915
+ tool_msgs.reverse()
916
+ observations = "\n\n".join(tool_msgs) if tool_msgs else "(no tool outputs)"
917
+
918
+ # Include recent user messages (especially ask_user responses) so the reviewer can
919
+ # avoid re-asking questions the user already answered.
920
+ try:
921
+ user_limit = int(limits.get("review_max_user_message_chars", -1))
922
+ except Exception:
923
+ user_limit = -1
924
+
925
+ user_msgs: list[str] = []
926
+ ask_prompts: list[str] = []
927
+ for m in reversed(messages):
928
+ if not isinstance(m, dict):
929
+ continue
930
+ role = m.get("role")
931
+ content = m.get("content")
932
+ if role == "user" and isinstance(content, str) and content.strip():
933
+ if content.strip() != task.strip():
934
+ user_msgs.append(_truncate_block(content.strip(), max_chars=user_limit))
935
+ if len(user_msgs) >= 4:
936
+ break
937
+ for m in reversed(messages):
938
+ if not isinstance(m, dict):
939
+ continue
940
+ if m.get("role") != "assistant":
941
+ continue
942
+ meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
943
+ if not isinstance(meta, dict) or meta.get("kind") != "ask_user_prompt":
944
+ continue
945
+ content = m.get("content")
946
+ if isinstance(content, str) and content.strip():
947
+ ask_prompts.append(_truncate_block(content.strip(), max_chars=user_limit))
948
+ if len(ask_prompts) >= 4:
949
+ break
950
+
951
+ user_msgs.reverse()
952
+ ask_prompts.reverse()
953
+ user_context = "\n\n".join(user_msgs) if user_msgs else "(no additional user messages)"
954
+ asked_context = "\n\n".join(ask_prompts) if ask_prompts else "(no ask_user prompts recorded)"
955
+
956
+ answer_raw = str(run.vars.get("_temp", {}).get("final_answer") or "")
957
+ answer_excerpt = ""
958
+ if not tool_msgs and answer_raw.strip():
959
+ answer_excerpt = _truncate_block(answer_raw.strip(), max_chars=answer_limit)
960
+
961
+ prompt = (
962
+ "You are a verifier. Review whether the user's request has been fully satisfied.\n"
963
+ "Be strict: only count actions that are supported by the tool outputs.\n"
964
+ "If anything is missing, propose the NEXT ACTIONS.\n"
965
+ "Prefer returning `next_tool_calls` over `next_prompt`.\n"
966
+ "Return JSON ONLY.\n\n"
967
+ f"User request:\n{task}\n\n"
968
+ f"Plan:\n{plan_text}\n\n"
969
+ f"Recent ask_user prompts:\n{asked_context}\n\n"
970
+ f"Recent user messages:\n{user_context}\n\n"
971
+ + (f"Current answer (excerpt):\n{answer_excerpt}\n\n" if answer_excerpt else "")
972
+ + f"Tool outputs:\n{observations}\n\n"
973
+ f"Allowed tools:\n{_format_allowed_tools()}\n\n"
974
+ )
975
+
976
+ schema = {
977
+ "type": "object",
978
+ "properties": {
979
+ "complete": {"type": "boolean"},
980
+ "missing": {"type": "array", "items": {"type": "string"}},
981
+ "next_prompt": {"type": "string"},
982
+ "next_tool_calls": {
983
+ "type": "array",
984
+ "items": {
985
+ "type": "object",
986
+ "properties": {
987
+ "name": {"type": "string"},
988
+ "arguments": {"type": "object"},
989
+ },
990
+ "required": ["name", "arguments"],
991
+ "additionalProperties": False,
992
+ },
993
+ },
994
+ },
995
+ "required": ["complete", "missing", "next_prompt", "next_tool_calls"],
996
+ "additionalProperties": False,
997
+ }
998
+
999
+ emit("review_request", {"tool_messages": len(tool_msgs)})
1000
+
1001
+ payload: Dict[str, Any] = {
1002
+ "prompt": prompt,
1003
+ "response_schema": schema,
1004
+ "response_schema_name": "CodeActVerifier",
1005
+ "params": {"temperature": 0.2},
1006
+ }
1007
+ sys = _system_prompt(runtime_ns)
1008
+ if sys is not None:
1009
+ payload["system_prompt"] = sys
1010
+
1011
+ return StepPlan(
1012
+ node_id="review",
1013
+ effect=Effect(
1014
+ type=EffectType.LLM_CALL,
1015
+ payload=payload,
1016
+ result_key="_temp.review_llm_response",
1017
+ ),
1018
+ next_node="review_parse",
1019
+ )
1020
+
1021
+ def review_parse_node(run: RunState, ctx) -> StepPlan:
1022
+ _, _, runtime_ns, temp, _ = ensure_codeact_vars(run)
1023
+ resp = temp.get("review_llm_response", {})
1024
+ if not isinstance(resp, dict):
1025
+ resp = {}
1026
+
1027
+ data = resp.get("data")
1028
+ if data is None and isinstance(resp.get("content"), str):
1029
+ try:
1030
+ data = json.loads(resp["content"])
1031
+ except Exception:
1032
+ data = None
1033
+ if not isinstance(data, dict):
1034
+ data = {}
1035
+
1036
+ complete = bool(data.get("complete"))
1037
+ missing = data.get("missing") if isinstance(data.get("missing"), list) else []
1038
+ next_prompt = data.get("next_prompt")
1039
+ next_prompt_text = str(next_prompt or "").strip()
1040
+ next_tool_calls_raw = data.get("next_tool_calls")
1041
+ next_tool_calls: list[dict[str, Any]] = []
1042
+ if isinstance(next_tool_calls_raw, list):
1043
+ for item in next_tool_calls_raw:
1044
+ if not isinstance(item, dict):
1045
+ continue
1046
+ name = str(item.get("name") or "").strip()
1047
+ args = item.get("arguments")
1048
+ if not isinstance(args, dict):
1049
+ args = {}
1050
+ if name:
1051
+ next_tool_calls.append({"name": name, "arguments": args})
1052
+
1053
+ emit("review", {"complete": complete, "missing": missing})
1054
+ temp.pop("review_llm_response", None)
1055
+
1056
+ if complete:
1057
+ return StepPlan(node_id="review_parse", next_node="done")
1058
+
1059
+ if next_tool_calls:
1060
+ temp["pending_tool_calls"] = next_tool_calls
1061
+ emit("review_tool_calls", {"count": len(next_tool_calls)})
1062
+ return StepPlan(node_id="review_parse", next_node="act")
1063
+
1064
+ # Behavioral validation: if incomplete but no tool calls, re-ask reviewer once with stricter rules.
1065
+ if not complete and not next_tool_calls:
1066
+ try:
1067
+ retry_count = int(runtime_ns.get("review_retry_count") or 0)
1068
+ except Exception:
1069
+ retry_count = 0
1070
+ if retry_count < 1:
1071
+ runtime_ns["review_retry_count"] = retry_count + 1
1072
+ inbox = runtime_ns.get("inbox")
1073
+ if not isinstance(inbox, list):
1074
+ inbox = []
1075
+ runtime_ns["inbox"] = inbox
1076
+ inbox.append(
1077
+ {
1078
+ "content": (
1079
+ "[Review] Your last review output was not actionable. "
1080
+ "If incomplete, you MUST return at least one `next_tool_call` "
1081
+ "(use `ask_user` if you need clarification). Return JSON only."
1082
+ )
1083
+ }
1084
+ )
1085
+ emit("review_retry_unactionable", {"retry": retry_count + 1})
1086
+ return StepPlan(node_id="review_parse", next_node="review")
1087
+
1088
+ runtime_ns["review_retry_count"] = 0
1089
+ if next_prompt_text:
1090
+ inbox = runtime_ns.get("inbox")
1091
+ if not isinstance(inbox, list):
1092
+ inbox = []
1093
+ runtime_ns["inbox"] = inbox
1094
+ inbox.append({"content": f"[Review] {next_prompt_text}"})
1095
+ return StepPlan(node_id="review_parse", next_node="reason")
1096
+
345
1097
  def done_node(run: RunState, ctx) -> StepPlan:
346
1098
  context, scratchpad, _, temp, limits = ensure_codeact_vars(run)
347
1099
  answer = str(temp.get("final_answer") or "No answer provided")
@@ -350,6 +1102,16 @@ def create_codeact_workflow(
350
1102
  # Prefer _limits.current_iteration, fall back to scratchpad
351
1103
  iterations = int(limits.get("current_iteration", 0) or scratchpad.get("iteration", 0) or 0)
352
1104
 
1105
+ # Persist the final answer into the conversation history so it becomes part of the
1106
+ # next run's seed context and shows up in /history.
1107
+ messages = context.get("messages")
1108
+ if isinstance(messages, list):
1109
+ last = messages[-1] if messages else None
1110
+ last_role = last.get("role") if isinstance(last, dict) else None
1111
+ last_content = last.get("content") if isinstance(last, dict) else None
1112
+ if last_role != "assistant" or str(last_content or "") != answer:
1113
+ messages.append(_new_message(ctx, role="assistant", content=answer, metadata={"kind": "final_answer"}))
1114
+
353
1115
  return StepPlan(
354
1116
  node_id="done",
355
1117
  complete_output={
@@ -384,14 +1146,18 @@ def create_codeact_workflow(
384
1146
  entry_node="init",
385
1147
  nodes={
386
1148
  "init": init_node,
1149
+ "plan": plan_node,
1150
+ "plan_parse": plan_parse_node,
387
1151
  "reason": reason_node,
388
1152
  "parse": parse_node,
389
1153
  "act": act_node,
390
1154
  "execute_code": execute_code_node,
391
1155
  "observe": observe_node,
392
1156
  "handle_user_response": handle_user_response_node,
1157
+ "maybe_review": maybe_review_node,
1158
+ "review": review_node,
1159
+ "review_parse": review_parse_node,
393
1160
  "done": done_node,
394
1161
  "max_iterations": max_iterations_node,
395
1162
  },
396
1163
  )
397
-