abstractagent 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractagent/adapters/__init__.py +2 -1
- abstractagent/adapters/codeact_runtime.py +823 -57
- abstractagent/adapters/memact_runtime.py +721 -0
- abstractagent/adapters/react_runtime.py +1114 -67
- abstractagent/agents/__init__.py +4 -0
- abstractagent/agents/base.py +58 -1
- abstractagent/agents/codeact.py +89 -18
- abstractagent/agents/memact.py +244 -0
- abstractagent/agents/react.py +91 -18
- abstractagent/logic/__init__.py +2 -0
- abstractagent/logic/builtins.py +212 -5
- abstractagent/logic/codeact.py +87 -80
- abstractagent/logic/memact.py +127 -0
- abstractagent/logic/react.py +108 -48
- abstractagent/repl.py +24 -447
- abstractagent/scripts/__init__.py +5 -0
- abstractagent/scripts/lmstudio_tool_eval.py +426 -0
- abstractagent/tools/__init__.py +3 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/METADATA +10 -11
- abstractagent-0.3.0.dist-info/RECORD +31 -0
- abstractagent/ui/__init__.py +0 -5
- abstractagent/ui/question.py +0 -197
- abstractagent-0.2.0.dist-info/RECORD +0 -28
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/WHEEL +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/entry_points.txt +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -6,9 +6,10 @@ import hashlib
|
|
|
6
6
|
import json
|
|
7
7
|
from typing import Any, Callable, Dict, List, Optional
|
|
8
8
|
|
|
9
|
-
from abstractcore.tools import ToolCall
|
|
9
|
+
from abstractcore.tools import ToolCall, ToolDefinition
|
|
10
10
|
from abstractruntime import Effect, EffectType, RunState, StepPlan, WorkflowSpec
|
|
11
11
|
from abstractruntime.core.vars import ensure_limits, ensure_namespaces
|
|
12
|
+
from abstractruntime.memory.active_context import ActiveContextPolicy
|
|
12
13
|
|
|
13
14
|
from ..logic.codeact import CodeActLogic
|
|
14
15
|
|
|
@@ -29,11 +30,16 @@ def _new_message(
|
|
|
29
30
|
|
|
30
31
|
timestamp = datetime.now(timezone.utc).isoformat()
|
|
31
32
|
|
|
33
|
+
import uuid
|
|
34
|
+
|
|
35
|
+
meta = dict(metadata or {})
|
|
36
|
+
meta.setdefault("message_id", f"msg_{uuid.uuid4().hex}")
|
|
37
|
+
|
|
32
38
|
return {
|
|
33
39
|
"role": role,
|
|
34
40
|
"content": content,
|
|
35
41
|
"timestamp": timestamp,
|
|
36
|
-
"metadata":
|
|
42
|
+
"metadata": meta,
|
|
37
43
|
}
|
|
38
44
|
|
|
39
45
|
|
|
@@ -108,9 +114,186 @@ def create_codeact_workflow(
|
|
|
108
114
|
if on_step:
|
|
109
115
|
on_step(step, data)
|
|
110
116
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
117
|
+
def _current_tool_defs() -> list[ToolDefinition]:
|
|
118
|
+
defs = getattr(logic, "tools", None)
|
|
119
|
+
if not isinstance(defs, list):
|
|
120
|
+
try:
|
|
121
|
+
defs = list(defs) # type: ignore[arg-type]
|
|
122
|
+
except Exception:
|
|
123
|
+
defs = []
|
|
124
|
+
return [t for t in defs if getattr(t, "name", None)]
|
|
125
|
+
|
|
126
|
+
def _tool_by_name() -> dict[str, ToolDefinition]:
|
|
127
|
+
out: dict[str, ToolDefinition] = {}
|
|
128
|
+
for t in _current_tool_defs():
|
|
129
|
+
name = getattr(t, "name", None)
|
|
130
|
+
if isinstance(name, str) and name.strip():
|
|
131
|
+
out[name] = t
|
|
132
|
+
return out
|
|
133
|
+
|
|
134
|
+
def _default_allowlist() -> list[str]:
|
|
135
|
+
out: list[str] = []
|
|
136
|
+
seen: set[str] = set()
|
|
137
|
+
for t in _current_tool_defs():
|
|
138
|
+
name = getattr(t, "name", None)
|
|
139
|
+
if not isinstance(name, str) or not name.strip() or name in seen:
|
|
140
|
+
continue
|
|
141
|
+
seen.add(name)
|
|
142
|
+
out.append(name)
|
|
143
|
+
return out
|
|
144
|
+
|
|
145
|
+
def _normalize_allowlist(raw: Any) -> list[str]:
|
|
146
|
+
if raw is None:
|
|
147
|
+
return []
|
|
148
|
+
if isinstance(raw, str):
|
|
149
|
+
val = raw.strip()
|
|
150
|
+
return [val] if val else []
|
|
151
|
+
if isinstance(raw, list):
|
|
152
|
+
out: list[str] = []
|
|
153
|
+
seen: set[str] = set()
|
|
154
|
+
for item in raw:
|
|
155
|
+
if not isinstance(item, str):
|
|
156
|
+
continue
|
|
157
|
+
name = item.strip()
|
|
158
|
+
if not name or name in seen:
|
|
159
|
+
continue
|
|
160
|
+
seen.add(name)
|
|
161
|
+
out.append(name)
|
|
162
|
+
return out
|
|
163
|
+
return []
|
|
164
|
+
|
|
165
|
+
def _effective_allowlist(runtime_ns: Dict[str, Any]) -> list[str]:
|
|
166
|
+
if isinstance(runtime_ns, dict) and "allowed_tools" in runtime_ns:
|
|
167
|
+
normalized = _normalize_allowlist(runtime_ns.get("allowed_tools"))
|
|
168
|
+
# Filter to currently known tools (dynamic), preserving order.
|
|
169
|
+
current = _tool_by_name()
|
|
170
|
+
filtered = [name for name in normalized if name in current]
|
|
171
|
+
runtime_ns["allowed_tools"] = filtered
|
|
172
|
+
return filtered
|
|
173
|
+
return list(_default_allowlist())
|
|
174
|
+
|
|
175
|
+
def _allowed_tool_defs(allowlist: list[str]) -> list[ToolDefinition]:
|
|
176
|
+
tool_by_name = _tool_by_name()
|
|
177
|
+
out: list[ToolDefinition] = []
|
|
178
|
+
for name in allowlist:
|
|
179
|
+
tool = tool_by_name.get(name)
|
|
180
|
+
if tool is not None:
|
|
181
|
+
out.append(tool)
|
|
182
|
+
return out
|
|
183
|
+
|
|
184
|
+
def _system_prompt(runtime_ns: Dict[str, Any]) -> Optional[str]:
|
|
185
|
+
raw = runtime_ns.get("system_prompt") if isinstance(runtime_ns, dict) else None
|
|
186
|
+
if isinstance(raw, str) and raw.strip():
|
|
187
|
+
return raw
|
|
188
|
+
return None
|
|
189
|
+
|
|
190
|
+
def _sanitize_llm_messages(messages: Any, *, limits: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]:
|
|
191
|
+
"""Convert runtime-owned message dicts into OpenAI-style {role, content, ...}.
|
|
192
|
+
|
|
193
|
+
Runtime messages can include extra metadata fields (`timestamp`, `metadata`) that many providers
|
|
194
|
+
will reject. Keep only the fields the LLM API expects.
|
|
195
|
+
"""
|
|
196
|
+
if not isinstance(messages, list) or not messages:
|
|
197
|
+
return []
|
|
198
|
+
def _limit_int(key: str, default: int) -> int:
|
|
199
|
+
if not isinstance(limits, dict):
|
|
200
|
+
return default
|
|
201
|
+
try:
|
|
202
|
+
return int(limits.get(key, default))
|
|
203
|
+
except Exception:
|
|
204
|
+
return default
|
|
205
|
+
max_message_chars = _limit_int("max_message_chars", -1)
|
|
206
|
+
max_tool_message_chars = _limit_int("max_tool_message_chars", -1)
|
|
207
|
+
|
|
208
|
+
def _truncate(text: str, *, max_chars: int) -> str:
|
|
209
|
+
if max_chars <= 0:
|
|
210
|
+
return text
|
|
211
|
+
if len(text) <= max_chars:
|
|
212
|
+
return text
|
|
213
|
+
suffix = f"\n… (truncated, {len(text):,} chars total)"
|
|
214
|
+
keep = max_chars - len(suffix)
|
|
215
|
+
if keep < 200:
|
|
216
|
+
keep = max_chars
|
|
217
|
+
suffix = ""
|
|
218
|
+
return text[:keep].rstrip() + suffix
|
|
219
|
+
|
|
220
|
+
out: List[Dict[str, str]] = []
|
|
221
|
+
for m in messages:
|
|
222
|
+
if not isinstance(m, dict):
|
|
223
|
+
continue
|
|
224
|
+
role = str(m.get("role") or "").strip()
|
|
225
|
+
content = m.get("content")
|
|
226
|
+
if not role or content is None:
|
|
227
|
+
continue
|
|
228
|
+
content_str = str(content)
|
|
229
|
+
if not content_str.strip():
|
|
230
|
+
continue
|
|
231
|
+
limit = max_tool_message_chars if role == "tool" else max_message_chars
|
|
232
|
+
entry: Dict[str, str] = {"role": role, "content": _truncate(content_str, max_chars=limit)}
|
|
233
|
+
if role == "tool":
|
|
234
|
+
meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
|
|
235
|
+
call_id = meta.get("call_id") if isinstance(meta, dict) else None
|
|
236
|
+
if call_id is not None and str(call_id).strip():
|
|
237
|
+
entry["tool_call_id"] = str(call_id).strip()
|
|
238
|
+
out.append(entry)
|
|
239
|
+
return out
|
|
240
|
+
|
|
241
|
+
def _flag(runtime_ns: Dict[str, Any], key: str, *, default: bool = False) -> bool:
|
|
242
|
+
if not isinstance(runtime_ns, dict) or key not in runtime_ns:
|
|
243
|
+
return bool(default)
|
|
244
|
+
val = runtime_ns.get(key)
|
|
245
|
+
if isinstance(val, bool):
|
|
246
|
+
return val
|
|
247
|
+
if isinstance(val, (int, float)):
|
|
248
|
+
return bool(val)
|
|
249
|
+
if isinstance(val, str):
|
|
250
|
+
lowered = val.strip().lower()
|
|
251
|
+
if lowered in ("1", "true", "yes", "on", "enabled"):
|
|
252
|
+
return True
|
|
253
|
+
if lowered in ("0", "false", "no", "off", "disabled"):
|
|
254
|
+
return False
|
|
255
|
+
return bool(default)
|
|
256
|
+
|
|
257
|
+
def _int(runtime_ns: Dict[str, Any], key: str, *, default: int) -> int:
|
|
258
|
+
if not isinstance(runtime_ns, dict) or key not in runtime_ns:
|
|
259
|
+
return int(default)
|
|
260
|
+
val = runtime_ns.get(key)
|
|
261
|
+
try:
|
|
262
|
+
return int(val) # type: ignore[arg-type]
|
|
263
|
+
except Exception:
|
|
264
|
+
return int(default)
|
|
265
|
+
|
|
266
|
+
def _extract_plan_update(content: str) -> Optional[str]:
|
|
267
|
+
"""Extract a plan update block from model content (best-effort).
|
|
268
|
+
|
|
269
|
+
Convention (prompted in Plan mode): the model appends a final section:
|
|
270
|
+
|
|
271
|
+
Plan Update:
|
|
272
|
+
- [ ] ...
|
|
273
|
+
- [x] ...
|
|
274
|
+
"""
|
|
275
|
+
if not isinstance(content, str) or not content.strip():
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
import re
|
|
279
|
+
|
|
280
|
+
lines = content.splitlines()
|
|
281
|
+
header_idx: Optional[int] = None
|
|
282
|
+
for i, line in enumerate(lines):
|
|
283
|
+
if re.match(r"(?i)^\s*plan\s*update\s*:\s*$", line.strip()):
|
|
284
|
+
header_idx = i
|
|
285
|
+
if header_idx is None:
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
plan_lines = lines[header_idx + 1 :]
|
|
289
|
+
while plan_lines and not plan_lines[0].strip():
|
|
290
|
+
plan_lines.pop(0)
|
|
291
|
+
plan_text = "\n".join(plan_lines).strip()
|
|
292
|
+
if not plan_text:
|
|
293
|
+
return None
|
|
294
|
+
if not re.search(r"(?m)^\s*(?:[-*]|\d+\.)\s+", plan_text):
|
|
295
|
+
return None
|
|
296
|
+
return plan_text
|
|
114
297
|
|
|
115
298
|
def init_node(run: RunState, ctx) -> StepPlan:
|
|
116
299
|
context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
|
|
@@ -123,13 +306,68 @@ def create_codeact_workflow(
|
|
|
123
306
|
if task and (not messages or messages[-1].get("role") != "user" or messages[-1].get("content") != task):
|
|
124
307
|
messages.append(_new_message(ctx, role="user", content=task))
|
|
125
308
|
|
|
126
|
-
runtime_ns
|
|
127
|
-
|
|
309
|
+
allow = _effective_allowlist(runtime_ns)
|
|
310
|
+
allowed_defs = _allowed_tool_defs(allow)
|
|
311
|
+
runtime_ns["tool_specs"] = [t.to_dict() for t in allowed_defs]
|
|
312
|
+
runtime_ns["toolset_id"] = _compute_toolset_id(runtime_ns["tool_specs"])
|
|
313
|
+
runtime_ns.setdefault("allowed_tools", allow)
|
|
128
314
|
runtime_ns.setdefault("inbox", [])
|
|
129
315
|
|
|
130
316
|
emit("init", {"task": task})
|
|
317
|
+
if _flag(runtime_ns, "plan_mode", default=False) and not isinstance(scratchpad.get("plan"), str):
|
|
318
|
+
return StepPlan(node_id="init", next_node="plan")
|
|
131
319
|
return StepPlan(node_id="init", next_node="reason")
|
|
132
320
|
|
|
321
|
+
def plan_node(run: RunState, ctx) -> StepPlan:
|
|
322
|
+
context, scratchpad, runtime_ns, _, _ = ensure_codeact_vars(run)
|
|
323
|
+
task = str(context.get("task", "") or "")
|
|
324
|
+
|
|
325
|
+
allow = _effective_allowlist(runtime_ns)
|
|
326
|
+
|
|
327
|
+
prompt = (
|
|
328
|
+
"You are preparing a high-level execution plan for the user's request.\n"
|
|
329
|
+
"Return a concise TODO list (5–12 steps) that is actionable and verifiable.\n"
|
|
330
|
+
"Do not call tools yet. Do not include role prefixes like 'assistant:'.\n\n"
|
|
331
|
+
f"User request:\n{task}\n\n"
|
|
332
|
+
"Plan (markdown checklist):\n"
|
|
333
|
+
"- [ ] ...\n"
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
emit("plan_request", {"tools": allow})
|
|
337
|
+
|
|
338
|
+
payload: Dict[str, Any] = {"prompt": prompt, "params": {"temperature": 0.2}}
|
|
339
|
+
sys = _system_prompt(runtime_ns)
|
|
340
|
+
if isinstance(sys, str) and sys.strip():
|
|
341
|
+
payload["system_prompt"] = sys
|
|
342
|
+
|
|
343
|
+
return StepPlan(
|
|
344
|
+
node_id="plan",
|
|
345
|
+
effect=Effect(
|
|
346
|
+
type=EffectType.LLM_CALL,
|
|
347
|
+
payload=payload,
|
|
348
|
+
result_key="_temp.plan_llm_response",
|
|
349
|
+
),
|
|
350
|
+
next_node="plan_parse",
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
def plan_parse_node(run: RunState, ctx) -> StepPlan:
|
|
354
|
+
context, scratchpad, _, temp, _ = ensure_codeact_vars(run)
|
|
355
|
+
resp = temp.get("plan_llm_response", {})
|
|
356
|
+
if not isinstance(resp, dict):
|
|
357
|
+
resp = {}
|
|
358
|
+
plan_text = resp.get("content")
|
|
359
|
+
plan = "" if plan_text is None else str(plan_text).strip()
|
|
360
|
+
if not plan and isinstance(resp.get("data"), dict):
|
|
361
|
+
plan = json.dumps(resp.get("data"), ensure_ascii=False, indent=2).strip()
|
|
362
|
+
|
|
363
|
+
scratchpad["plan"] = plan
|
|
364
|
+
temp.pop("plan_llm_response", None)
|
|
365
|
+
|
|
366
|
+
if plan:
|
|
367
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=plan, metadata={"kind": "plan"}))
|
|
368
|
+
emit("plan", {"plan": plan})
|
|
369
|
+
return StepPlan(node_id="plan_parse", next_node="reason")
|
|
370
|
+
|
|
133
371
|
def reason_node(run: RunState, ctx) -> StepPlan:
|
|
134
372
|
context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
|
|
135
373
|
|
|
@@ -159,9 +397,23 @@ def create_codeact_workflow(
|
|
|
159
397
|
guidance = " | ".join([m for m in inbox_messages if m])
|
|
160
398
|
runtime_ns["inbox"] = []
|
|
161
399
|
|
|
400
|
+
messages_view = ActiveContextPolicy.select_active_messages_for_llm_from_run(run)
|
|
401
|
+
|
|
402
|
+
# Refresh tool metadata BEFORE rendering Active Memory so token fitting stays accurate
|
|
403
|
+
# (even though we do not render a "Tools (session)" block into Active Memory prompts).
|
|
404
|
+
allow = _effective_allowlist(runtime_ns)
|
|
405
|
+
allowed_defs = _allowed_tool_defs(allow)
|
|
406
|
+
tool_specs = [t.to_dict() for t in allowed_defs]
|
|
407
|
+
include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
|
|
408
|
+
if not include_examples:
|
|
409
|
+
tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
|
|
410
|
+
runtime_ns["tool_specs"] = tool_specs
|
|
411
|
+
runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
|
|
412
|
+
runtime_ns.setdefault("allowed_tools", allow)
|
|
413
|
+
|
|
162
414
|
req = logic.build_request(
|
|
163
415
|
task=str(context.get("task", "") or ""),
|
|
164
|
-
messages=
|
|
416
|
+
messages=messages_view,
|
|
165
417
|
guidance=guidance,
|
|
166
418
|
iteration=iteration + 1,
|
|
167
419
|
max_iterations=max_iterations,
|
|
@@ -170,7 +422,18 @@ def create_codeact_workflow(
|
|
|
170
422
|
|
|
171
423
|
emit("reason", {"iteration": iteration + 1, "max_iterations": max_iterations, "has_guidance": bool(guidance)})
|
|
172
424
|
|
|
173
|
-
|
|
425
|
+
# IMPORTANT: When we send `messages`, do not also send a non-empty `prompt`.
|
|
426
|
+
# Some providers/servers will append `prompt` as an extra user message even when the
|
|
427
|
+
# current request is already present in `messages`, which duplicates user turns and
|
|
428
|
+
# wastes context budget.
|
|
429
|
+
payload: Dict[str, Any] = {
|
|
430
|
+
"prompt": "",
|
|
431
|
+
"messages": _sanitize_llm_messages(messages_view, limits=limits),
|
|
432
|
+
"tools": list(tool_specs),
|
|
433
|
+
}
|
|
434
|
+
sys = _system_prompt(runtime_ns) or req.system_prompt
|
|
435
|
+
if isinstance(sys, str) and sys.strip():
|
|
436
|
+
payload["system_prompt"] = sys
|
|
174
437
|
if req.max_tokens is not None:
|
|
175
438
|
payload["params"] = {"max_tokens": req.max_tokens}
|
|
176
439
|
|
|
@@ -185,80 +448,282 @@ def create_codeact_workflow(
|
|
|
185
448
|
)
|
|
186
449
|
|
|
187
450
|
def parse_node(run: RunState, ctx) -> StepPlan:
|
|
188
|
-
context,
|
|
451
|
+
context, scratchpad, runtime_ns, temp, _ = ensure_codeact_vars(run)
|
|
189
452
|
response = temp.get("llm_response", {})
|
|
190
453
|
content, tool_calls = logic.parse_response(response)
|
|
191
454
|
|
|
192
|
-
if content:
|
|
193
|
-
context["messages"].append(_new_message(ctx, role="assistant", content=content))
|
|
194
|
-
|
|
195
455
|
temp.pop("llm_response", None)
|
|
196
456
|
emit("parse", {"has_tool_calls": bool(tool_calls), "content_preview": (content[:100] if content else "(no content)")})
|
|
197
457
|
|
|
198
458
|
if tool_calls:
|
|
459
|
+
if content:
|
|
460
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=content))
|
|
461
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
462
|
+
updated = _extract_plan_update(content)
|
|
463
|
+
if isinstance(updated, str) and updated.strip():
|
|
464
|
+
scratchpad["plan"] = updated.strip()
|
|
199
465
|
temp["pending_tool_calls"] = [tc.__dict__ for tc in tool_calls]
|
|
200
466
|
return StepPlan(node_id="parse", next_node="act")
|
|
201
467
|
|
|
468
|
+
# Empty response is an invalid step: recover with a bounded retry that carries evidence.
|
|
469
|
+
if not isinstance(content, str) or not content.strip():
|
|
470
|
+
try:
|
|
471
|
+
empty_retries = int(scratchpad.get("empty_response_retry_count") or 0)
|
|
472
|
+
except Exception:
|
|
473
|
+
empty_retries = 0
|
|
474
|
+
|
|
475
|
+
if empty_retries < 2:
|
|
476
|
+
scratchpad["empty_response_retry_count"] = empty_retries + 1
|
|
477
|
+
emit("parse_retry_empty_response", {"retries": empty_retries + 1})
|
|
478
|
+
inbox = runtime_ns.get("inbox")
|
|
479
|
+
if not isinstance(inbox, list):
|
|
480
|
+
inbox = []
|
|
481
|
+
runtime_ns["inbox"] = inbox
|
|
482
|
+
inbox.append(
|
|
483
|
+
{
|
|
484
|
+
"content": (
|
|
485
|
+
"[Recover] Your last message was empty. Continue the task now. "
|
|
486
|
+
"If you need info, CALL tools (preferred). Do not output an empty message."
|
|
487
|
+
)
|
|
488
|
+
}
|
|
489
|
+
)
|
|
490
|
+
return StepPlan(node_id="parse", next_node="reason")
|
|
491
|
+
|
|
492
|
+
safe = (
|
|
493
|
+
"I can't proceed: the model repeatedly returned empty outputs (no content, no tool calls).\n"
|
|
494
|
+
"Please retry, reduce context, or switch models."
|
|
495
|
+
)
|
|
496
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=safe, metadata={"kind": "error"}))
|
|
497
|
+
temp["final_answer"] = safe
|
|
498
|
+
temp["pending_tool_calls"] = []
|
|
499
|
+
scratchpad["empty_response_retry_count"] = 0
|
|
500
|
+
return StepPlan(node_id="parse", next_node="maybe_review")
|
|
501
|
+
|
|
202
502
|
code = logic.extract_code(content)
|
|
203
503
|
if code:
|
|
504
|
+
if content:
|
|
505
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=content))
|
|
506
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
507
|
+
updated = _extract_plan_update(content)
|
|
508
|
+
if isinstance(updated, str) and updated.strip():
|
|
509
|
+
scratchpad["plan"] = updated.strip()
|
|
204
510
|
temp["pending_code"] = code
|
|
205
511
|
return StepPlan(node_id="parse", next_node="execute_code")
|
|
206
512
|
|
|
207
|
-
|
|
208
|
-
|
|
513
|
+
def _extract_final_answer(text: str) -> tuple[bool, str]:
|
|
514
|
+
if not isinstance(text, str) or not text.strip():
|
|
515
|
+
return False, ""
|
|
516
|
+
s = text.lstrip()
|
|
517
|
+
if s.upper().startswith("FINAL:"):
|
|
518
|
+
return True, s[len("FINAL:") :].lstrip()
|
|
519
|
+
return False, text
|
|
520
|
+
|
|
521
|
+
raw = str(content or "").strip()
|
|
522
|
+
is_final, final = _extract_final_answer(raw)
|
|
523
|
+
if is_final:
|
|
524
|
+
if final:
|
|
525
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=final))
|
|
526
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
527
|
+
updated = _extract_plan_update(final)
|
|
528
|
+
if isinstance(updated, str) and updated.strip():
|
|
529
|
+
scratchpad["plan"] = updated.strip()
|
|
530
|
+
temp["final_answer"] = final or "No answer provided"
|
|
531
|
+
temp["pending_tool_calls"] = []
|
|
532
|
+
return StepPlan(node_id="parse", next_node="maybe_review")
|
|
533
|
+
|
|
534
|
+
# Default: treat as a final answer even without an explicit FINAL marker.
|
|
535
|
+
if raw:
|
|
536
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=raw))
|
|
537
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
538
|
+
updated = _extract_plan_update(raw)
|
|
539
|
+
if isinstance(updated, str) and updated.strip():
|
|
540
|
+
scratchpad["plan"] = updated.strip()
|
|
541
|
+
temp["final_answer"] = raw or "No answer provided"
|
|
542
|
+
temp["pending_tool_calls"] = []
|
|
543
|
+
scratchpad["empty_response_retry_count"] = 0
|
|
544
|
+
return StepPlan(node_id="parse", next_node="maybe_review")
|
|
209
545
|
|
|
210
546
|
def act_node(run: RunState, ctx) -> StepPlan:
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
547
|
+
# Treat `_temp.pending_tool_calls` as a durable queue to avoid dropping tool calls when
|
|
548
|
+
# schema-only tools (ask_user/memory/etc.) are interleaved with normal tools.
|
|
549
|
+
context, _, runtime_ns, temp, _ = ensure_codeact_vars(run)
|
|
550
|
+
raw_queue = temp.get("pending_tool_calls", [])
|
|
551
|
+
if not isinstance(raw_queue, list) or not raw_queue:
|
|
552
|
+
temp["pending_tool_calls"] = []
|
|
217
553
|
return StepPlan(node_id="act", next_node="reason")
|
|
218
554
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
555
|
+
allow = _effective_allowlist(runtime_ns)
|
|
556
|
+
builtin_effect_tools = {
|
|
557
|
+
"ask_user",
|
|
558
|
+
"recall_memory",
|
|
559
|
+
"inspect_vars",
|
|
560
|
+
"remember",
|
|
561
|
+
"remember_note",
|
|
562
|
+
"compact_memory",
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
tool_queue: List[Dict[str, Any]] = []
|
|
566
|
+
for idx, item in enumerate(raw_queue, start=1):
|
|
567
|
+
if isinstance(item, ToolCall):
|
|
568
|
+
d: Dict[str, Any] = {"name": item.name, "arguments": item.arguments, "call_id": item.call_id}
|
|
569
|
+
elif isinstance(item, dict):
|
|
570
|
+
d = dict(item)
|
|
571
|
+
else:
|
|
224
572
|
continue
|
|
573
|
+
call_id = str(d.get("call_id") or "").strip()
|
|
574
|
+
if not call_id:
|
|
575
|
+
d["call_id"] = str(idx)
|
|
576
|
+
tool_queue.append(d)
|
|
577
|
+
|
|
578
|
+
if not tool_queue:
|
|
579
|
+
temp["pending_tool_calls"] = []
|
|
580
|
+
return StepPlan(node_id="act", next_node="reason")
|
|
581
|
+
|
|
582
|
+
def _is_builtin(tc: Dict[str, Any]) -> bool:
|
|
583
|
+
name = tc.get("name")
|
|
584
|
+
return isinstance(name, str) and name in builtin_effect_tools
|
|
585
|
+
|
|
586
|
+
if _is_builtin(tool_queue[0]):
|
|
587
|
+
tc = tool_queue[0]
|
|
588
|
+
name = str(tc.get("name") or "").strip()
|
|
225
589
|
args = tc.get("arguments") or {}
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
choices = list(choices) if isinstance(choices, list) else None
|
|
229
|
-
|
|
230
|
-
temp["pending_tool_calls"] = tool_calls[i + 1 :]
|
|
231
|
-
emit("ask_user", {"question": question, "choices": choices or []})
|
|
232
|
-
return StepPlan(
|
|
233
|
-
node_id="act",
|
|
234
|
-
effect=Effect(
|
|
235
|
-
type=EffectType.ASK_USER,
|
|
236
|
-
payload={"prompt": question, "choices": choices, "allow_free_text": True},
|
|
237
|
-
result_key="_temp.user_response",
|
|
238
|
-
),
|
|
239
|
-
next_node="handle_user_response",
|
|
240
|
-
)
|
|
590
|
+
if not isinstance(args, dict):
|
|
591
|
+
args = {}
|
|
241
592
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {})})
|
|
593
|
+
# Pop builtin.
|
|
594
|
+
temp["pending_tool_calls"] = list(tool_queue[1:])
|
|
245
595
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
596
|
+
if name and name not in allow:
|
|
597
|
+
temp["tool_results"] = {
|
|
598
|
+
"results": [
|
|
599
|
+
{
|
|
600
|
+
"call_id": str(tc.get("call_id") or ""),
|
|
601
|
+
"name": name,
|
|
602
|
+
"success": False,
|
|
603
|
+
"output": None,
|
|
604
|
+
"error": f"Tool '{name}' is not allowed for this agent",
|
|
605
|
+
}
|
|
606
|
+
]
|
|
607
|
+
}
|
|
608
|
+
emit("act_blocked", {"tool": name})
|
|
609
|
+
return StepPlan(node_id="act", next_node="observe")
|
|
610
|
+
|
|
611
|
+
if name == "ask_user":
|
|
612
|
+
question = str(args.get("question") or "Please provide input:")
|
|
613
|
+
choices = args.get("choices")
|
|
614
|
+
choices = list(choices) if isinstance(choices, list) else None
|
|
615
|
+
|
|
616
|
+
msgs = context.get("messages")
|
|
617
|
+
if isinstance(msgs, list):
|
|
618
|
+
content = f"[Agent question]: {question}"
|
|
619
|
+
last = msgs[-1] if msgs else None
|
|
620
|
+
last_role = last.get("role") if isinstance(last, dict) else None
|
|
621
|
+
last_meta = last.get("metadata") if isinstance(last, dict) else None
|
|
622
|
+
last_kind = last_meta.get("kind") if isinstance(last_meta, dict) else None
|
|
623
|
+
last_content = last.get("content") if isinstance(last, dict) else None
|
|
624
|
+
if not (last_role == "assistant" and last_kind == "ask_user_prompt" and str(last_content or "") == content):
|
|
625
|
+
msgs.append(_new_message(ctx, role="assistant", content=content, metadata={"kind": "ask_user_prompt"}))
|
|
626
|
+
|
|
627
|
+
emit("ask_user", {"question": question, "choices": choices or []})
|
|
628
|
+
return StepPlan(
|
|
629
|
+
node_id="act",
|
|
630
|
+
effect=Effect(
|
|
631
|
+
type=EffectType.ASK_USER,
|
|
632
|
+
payload={"prompt": question, "choices": choices, "allow_free_text": True},
|
|
633
|
+
result_key="_temp.user_response",
|
|
634
|
+
),
|
|
635
|
+
next_node="handle_user_response",
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
if name == "recall_memory":
|
|
639
|
+
payload = dict(args)
|
|
640
|
+
payload.setdefault("tool_name", "recall_memory")
|
|
641
|
+
payload.setdefault("call_id", tc.get("call_id") or "memory")
|
|
642
|
+
emit("memory_query", {"query": payload.get("query"), "span_id": payload.get("span_id")})
|
|
643
|
+
return StepPlan(
|
|
644
|
+
node_id="act",
|
|
645
|
+
effect=Effect(type=EffectType.MEMORY_QUERY, payload=payload, result_key="_temp.tool_results"),
|
|
646
|
+
next_node="observe",
|
|
251
647
|
)
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
648
|
+
|
|
649
|
+
if name == "inspect_vars":
|
|
650
|
+
payload = dict(args)
|
|
651
|
+
payload.setdefault("tool_name", "inspect_vars")
|
|
652
|
+
payload.setdefault("call_id", tc.get("call_id") or "vars")
|
|
653
|
+
emit("vars_query", {"path": payload.get("path")})
|
|
654
|
+
return StepPlan(
|
|
655
|
+
node_id="act",
|
|
656
|
+
effect=Effect(type=EffectType.VARS_QUERY, payload=payload, result_key="_temp.tool_results"),
|
|
657
|
+
next_node="observe",
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
if name == "remember":
|
|
661
|
+
payload = dict(args)
|
|
662
|
+
payload.setdefault("tool_name", "remember")
|
|
663
|
+
payload.setdefault("call_id", tc.get("call_id") or "memory")
|
|
664
|
+
emit("memory_tag", {"span_id": payload.get("span_id"), "tags": payload.get("tags")})
|
|
665
|
+
return StepPlan(
|
|
666
|
+
node_id="act",
|
|
667
|
+
effect=Effect(type=EffectType.MEMORY_TAG, payload=payload, result_key="_temp.tool_results"),
|
|
668
|
+
next_node="observe",
|
|
255
669
|
)
|
|
256
670
|
|
|
671
|
+
if name == "remember_note":
|
|
672
|
+
payload = dict(args)
|
|
673
|
+
payload.setdefault("tool_name", "remember_note")
|
|
674
|
+
payload.setdefault("call_id", tc.get("call_id") or "memory")
|
|
675
|
+
emit("memory_note", {"note": payload.get("note"), "tags": payload.get("tags")})
|
|
676
|
+
return StepPlan(
|
|
677
|
+
node_id="act",
|
|
678
|
+
effect=Effect(type=EffectType.MEMORY_NOTE, payload=payload, result_key="_temp.tool_results"),
|
|
679
|
+
next_node="observe",
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
if name == "compact_memory":
|
|
683
|
+
payload = dict(args)
|
|
684
|
+
payload.setdefault("tool_name", "compact_memory")
|
|
685
|
+
payload.setdefault("call_id", tc.get("call_id") or "compact")
|
|
686
|
+
emit(
|
|
687
|
+
"memory_compact",
|
|
688
|
+
{
|
|
689
|
+
"preserve_recent": payload.get("preserve_recent"),
|
|
690
|
+
"mode": payload.get("compression_mode"),
|
|
691
|
+
"focus": payload.get("focus"),
|
|
692
|
+
},
|
|
693
|
+
)
|
|
694
|
+
return StepPlan(
|
|
695
|
+
node_id="act",
|
|
696
|
+
effect=Effect(type=EffectType.MEMORY_COMPACT, payload=payload, result_key="_temp.tool_results"),
|
|
697
|
+
next_node="observe",
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
if temp.get("pending_tool_calls"):
|
|
701
|
+
return StepPlan(node_id="act", next_node="act")
|
|
702
|
+
return StepPlan(node_id="act", next_node="reason")
|
|
703
|
+
|
|
704
|
+
batch: List[Dict[str, Any]] = []
|
|
705
|
+
for tc in tool_queue:
|
|
706
|
+
if _is_builtin(tc):
|
|
707
|
+
break
|
|
708
|
+
batch.append(tc)
|
|
709
|
+
|
|
710
|
+
remaining = tool_queue[len(batch) :]
|
|
711
|
+
temp["pending_tool_calls"] = list(remaining)
|
|
712
|
+
|
|
713
|
+
for tc in batch:
|
|
714
|
+
emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")})
|
|
715
|
+
|
|
716
|
+
formatted_calls: List[Dict[str, Any]] = []
|
|
717
|
+
for tc in batch:
|
|
718
|
+
formatted_calls.append(
|
|
719
|
+
{"name": tc.get("name", ""), "arguments": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")}
|
|
720
|
+
)
|
|
721
|
+
|
|
257
722
|
return StepPlan(
|
|
258
723
|
node_id="act",
|
|
259
724
|
effect=Effect(
|
|
260
725
|
type=EffectType.TOOL_CALLS,
|
|
261
|
-
payload={"tool_calls": formatted_calls},
|
|
726
|
+
payload={"tool_calls": formatted_calls, "allowed_tools": list(allow)},
|
|
262
727
|
result_key="_temp.tool_results",
|
|
263
728
|
),
|
|
264
729
|
next_node="observe",
|
|
@@ -292,7 +757,7 @@ def create_codeact_workflow(
|
|
|
292
757
|
)
|
|
293
758
|
|
|
294
759
|
def observe_node(run: RunState, ctx) -> StepPlan:
|
|
295
|
-
context,
|
|
760
|
+
context, scratchpad, _, temp, _ = ensure_codeact_vars(run)
|
|
296
761
|
tool_results = temp.get("tool_results", {})
|
|
297
762
|
if not isinstance(tool_results, dict):
|
|
298
763
|
tool_results = {}
|
|
@@ -308,12 +773,29 @@ def create_codeact_workflow(
|
|
|
308
773
|
success = bool(r.get("success"))
|
|
309
774
|
output = r.get("output", "")
|
|
310
775
|
error = r.get("error", "")
|
|
776
|
+
# Prefer a tool-supplied human/LLM-friendly rendering when present.
|
|
777
|
+
def _display(v: Any) -> str:
|
|
778
|
+
if isinstance(v, dict):
|
|
779
|
+
rendered = v.get("rendered")
|
|
780
|
+
if isinstance(rendered, str) and rendered.strip():
|
|
781
|
+
return rendered.strip()
|
|
782
|
+
return "" if v is None else str(v)
|
|
783
|
+
|
|
784
|
+
display = _display(output)
|
|
785
|
+
if not success:
|
|
786
|
+
# Preserve structured outputs for provenance, but show a clean string to the LLM/UI.
|
|
787
|
+
display = _display(output) if isinstance(output, dict) else str(error or output)
|
|
311
788
|
rendered = logic.format_observation(
|
|
312
789
|
name=name,
|
|
313
|
-
output=
|
|
790
|
+
output=display,
|
|
314
791
|
success=success,
|
|
315
792
|
)
|
|
316
|
-
|
|
793
|
+
# Observability: avoid truncating normal tool results in step events.
|
|
794
|
+
# Keep a bounded preview for huge tool outputs to avoid bloating traces/ledgers.
|
|
795
|
+
preview = rendered
|
|
796
|
+
if len(preview) > 1000:
|
|
797
|
+
preview = preview[:1000] + f"\n… (truncated, {len(rendered):,} chars total)"
|
|
798
|
+
emit("observe", {"tool": name, "success": success, "result": preview})
|
|
317
799
|
context["messages"].append(
|
|
318
800
|
_new_message(
|
|
319
801
|
ctx,
|
|
@@ -324,6 +806,12 @@ def create_codeact_workflow(
|
|
|
324
806
|
)
|
|
325
807
|
|
|
326
808
|
temp.pop("tool_results", None)
|
|
809
|
+
# Reset verifier/review rounds after executing tools so the verifier can run
|
|
810
|
+
# again on the next candidate answer.
|
|
811
|
+
scratchpad["review_count"] = 0
|
|
812
|
+
pending = temp.get("pending_tool_calls", [])
|
|
813
|
+
if isinstance(pending, list) and pending:
|
|
814
|
+
return StepPlan(node_id="observe", next_node="act")
|
|
327
815
|
temp["pending_tool_calls"] = []
|
|
328
816
|
return StepPlan(node_id="observe", next_node="reason")
|
|
329
817
|
|
|
@@ -342,6 +830,270 @@ def create_codeact_workflow(
|
|
|
342
830
|
return StepPlan(node_id="handle_user_response", next_node="act")
|
|
343
831
|
return StepPlan(node_id="handle_user_response", next_node="reason")
|
|
344
832
|
|
|
833
|
+
def maybe_review_node(run: RunState, ctx) -> StepPlan:
|
|
834
|
+
_, scratchpad, runtime_ns, _, _ = ensure_codeact_vars(run)
|
|
835
|
+
|
|
836
|
+
if not _flag(runtime_ns, "review_mode", default=False):
|
|
837
|
+
return StepPlan(node_id="maybe_review", next_node="done")
|
|
838
|
+
|
|
839
|
+
max_rounds = _int(runtime_ns, "review_max_rounds", default=1)
|
|
840
|
+
if max_rounds < 0:
|
|
841
|
+
max_rounds = 0
|
|
842
|
+
count = scratchpad.get("review_count")
|
|
843
|
+
try:
|
|
844
|
+
count_int = int(count or 0)
|
|
845
|
+
except Exception:
|
|
846
|
+
count_int = 0
|
|
847
|
+
|
|
848
|
+
if count_int >= max_rounds:
|
|
849
|
+
return StepPlan(node_id="maybe_review", next_node="done")
|
|
850
|
+
|
|
851
|
+
scratchpad["review_count"] = count_int + 1
|
|
852
|
+
return StepPlan(node_id="maybe_review", next_node="review")
|
|
853
|
+
|
|
854
|
+
def review_node(run: RunState, ctx) -> StepPlan:
|
|
855
|
+
context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
|
|
856
|
+
task = str(context.get("task", "") or "")
|
|
857
|
+
plan = scratchpad.get("plan")
|
|
858
|
+
plan_text = str(plan).strip() if isinstance(plan, str) and plan.strip() else "(no plan)"
|
|
859
|
+
|
|
860
|
+
allow = _effective_allowlist(runtime_ns)
|
|
861
|
+
|
|
862
|
+
def _truncate_block(text: str, *, max_chars: int) -> str:
|
|
863
|
+
s = str(text or "")
|
|
864
|
+
if max_chars <= 0:
|
|
865
|
+
return s
|
|
866
|
+
if len(s) <= max_chars:
|
|
867
|
+
return s
|
|
868
|
+
suffix = f"\n… (truncated, {len(s):,} chars total)"
|
|
869
|
+
keep = max_chars - len(suffix)
|
|
870
|
+
if keep < 200:
|
|
871
|
+
keep = max_chars
|
|
872
|
+
suffix = ""
|
|
873
|
+
return s[:keep].rstrip() + suffix
|
|
874
|
+
|
|
875
|
+
def _format_allowed_tools() -> str:
|
|
876
|
+
specs = runtime_ns.get("tool_specs")
|
|
877
|
+
if not isinstance(specs, list) or not specs:
|
|
878
|
+
defs = _allowed_tool_defs(allow)
|
|
879
|
+
specs = [t.to_dict() for t in defs]
|
|
880
|
+
lines: list[str] = []
|
|
881
|
+
for spec in specs:
|
|
882
|
+
if not isinstance(spec, dict):
|
|
883
|
+
continue
|
|
884
|
+
name = str(spec.get("name") or "").strip()
|
|
885
|
+
if not name:
|
|
886
|
+
continue
|
|
887
|
+
params = spec.get("parameters")
|
|
888
|
+
props = params.get("properties", {}) if isinstance(params, dict) else {}
|
|
889
|
+
keys = sorted([k for k in props.keys() if isinstance(k, str)])
|
|
890
|
+
if keys:
|
|
891
|
+
lines.append(f"- {name}({', '.join(keys)})")
|
|
892
|
+
else:
|
|
893
|
+
lines.append(f"- {name}()")
|
|
894
|
+
return "\n".join(lines) if lines else "(no tools available)"
|
|
895
|
+
|
|
896
|
+
messages = list(context.get("messages") or [])
|
|
897
|
+
tool_msgs: list[str] = []
|
|
898
|
+
try:
|
|
899
|
+
tool_limit = int(limits.get("review_max_tool_output_chars", -1))
|
|
900
|
+
except Exception:
|
|
901
|
+
tool_limit = -1
|
|
902
|
+
try:
|
|
903
|
+
answer_limit = int(limits.get("review_max_answer_chars", -1))
|
|
904
|
+
except Exception:
|
|
905
|
+
answer_limit = -1
|
|
906
|
+
|
|
907
|
+
for m in reversed(messages):
|
|
908
|
+
if not isinstance(m, dict) or m.get("role") != "tool":
|
|
909
|
+
continue
|
|
910
|
+
content = m.get("content")
|
|
911
|
+
if isinstance(content, str) and content.strip():
|
|
912
|
+
tool_msgs.append(_truncate_block(content.strip(), max_chars=tool_limit))
|
|
913
|
+
if len(tool_msgs) >= 8:
|
|
914
|
+
break
|
|
915
|
+
tool_msgs.reverse()
|
|
916
|
+
observations = "\n\n".join(tool_msgs) if tool_msgs else "(no tool outputs)"
|
|
917
|
+
|
|
918
|
+
# Include recent user messages (especially ask_user responses) so the reviewer can
|
|
919
|
+
# avoid re-asking questions the user already answered.
|
|
920
|
+
try:
|
|
921
|
+
user_limit = int(limits.get("review_max_user_message_chars", -1))
|
|
922
|
+
except Exception:
|
|
923
|
+
user_limit = -1
|
|
924
|
+
|
|
925
|
+
user_msgs: list[str] = []
|
|
926
|
+
ask_prompts: list[str] = []
|
|
927
|
+
for m in reversed(messages):
|
|
928
|
+
if not isinstance(m, dict):
|
|
929
|
+
continue
|
|
930
|
+
role = m.get("role")
|
|
931
|
+
content = m.get("content")
|
|
932
|
+
if role == "user" and isinstance(content, str) and content.strip():
|
|
933
|
+
if content.strip() != task.strip():
|
|
934
|
+
user_msgs.append(_truncate_block(content.strip(), max_chars=user_limit))
|
|
935
|
+
if len(user_msgs) >= 4:
|
|
936
|
+
break
|
|
937
|
+
for m in reversed(messages):
|
|
938
|
+
if not isinstance(m, dict):
|
|
939
|
+
continue
|
|
940
|
+
if m.get("role") != "assistant":
|
|
941
|
+
continue
|
|
942
|
+
meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
|
|
943
|
+
if not isinstance(meta, dict) or meta.get("kind") != "ask_user_prompt":
|
|
944
|
+
continue
|
|
945
|
+
content = m.get("content")
|
|
946
|
+
if isinstance(content, str) and content.strip():
|
|
947
|
+
ask_prompts.append(_truncate_block(content.strip(), max_chars=user_limit))
|
|
948
|
+
if len(ask_prompts) >= 4:
|
|
949
|
+
break
|
|
950
|
+
|
|
951
|
+
user_msgs.reverse()
|
|
952
|
+
ask_prompts.reverse()
|
|
953
|
+
user_context = "\n\n".join(user_msgs) if user_msgs else "(no additional user messages)"
|
|
954
|
+
asked_context = "\n\n".join(ask_prompts) if ask_prompts else "(no ask_user prompts recorded)"
|
|
955
|
+
|
|
956
|
+
answer_raw = str(run.vars.get("_temp", {}).get("final_answer") or "")
|
|
957
|
+
answer_excerpt = ""
|
|
958
|
+
if not tool_msgs and answer_raw.strip():
|
|
959
|
+
answer_excerpt = _truncate_block(answer_raw.strip(), max_chars=answer_limit)
|
|
960
|
+
|
|
961
|
+
prompt = (
|
|
962
|
+
"You are a verifier. Review whether the user's request has been fully satisfied.\n"
|
|
963
|
+
"Be strict: only count actions that are supported by the tool outputs.\n"
|
|
964
|
+
"If anything is missing, propose the NEXT ACTIONS.\n"
|
|
965
|
+
"Prefer returning `next_tool_calls` over `next_prompt`.\n"
|
|
966
|
+
"Return JSON ONLY.\n\n"
|
|
967
|
+
f"User request:\n{task}\n\n"
|
|
968
|
+
f"Plan:\n{plan_text}\n\n"
|
|
969
|
+
f"Recent ask_user prompts:\n{asked_context}\n\n"
|
|
970
|
+
f"Recent user messages:\n{user_context}\n\n"
|
|
971
|
+
+ (f"Current answer (excerpt):\n{answer_excerpt}\n\n" if answer_excerpt else "")
|
|
972
|
+
+ f"Tool outputs:\n{observations}\n\n"
|
|
973
|
+
f"Allowed tools:\n{_format_allowed_tools()}\n\n"
|
|
974
|
+
)
|
|
975
|
+
|
|
976
|
+
schema = {
|
|
977
|
+
"type": "object",
|
|
978
|
+
"properties": {
|
|
979
|
+
"complete": {"type": "boolean"},
|
|
980
|
+
"missing": {"type": "array", "items": {"type": "string"}},
|
|
981
|
+
"next_prompt": {"type": "string"},
|
|
982
|
+
"next_tool_calls": {
|
|
983
|
+
"type": "array",
|
|
984
|
+
"items": {
|
|
985
|
+
"type": "object",
|
|
986
|
+
"properties": {
|
|
987
|
+
"name": {"type": "string"},
|
|
988
|
+
"arguments": {"type": "object"},
|
|
989
|
+
},
|
|
990
|
+
"required": ["name", "arguments"],
|
|
991
|
+
"additionalProperties": False,
|
|
992
|
+
},
|
|
993
|
+
},
|
|
994
|
+
},
|
|
995
|
+
"required": ["complete", "missing", "next_prompt", "next_tool_calls"],
|
|
996
|
+
"additionalProperties": False,
|
|
997
|
+
}
|
|
998
|
+
|
|
999
|
+
emit("review_request", {"tool_messages": len(tool_msgs)})
|
|
1000
|
+
|
|
1001
|
+
payload: Dict[str, Any] = {
|
|
1002
|
+
"prompt": prompt,
|
|
1003
|
+
"response_schema": schema,
|
|
1004
|
+
"response_schema_name": "CodeActVerifier",
|
|
1005
|
+
"params": {"temperature": 0.2},
|
|
1006
|
+
}
|
|
1007
|
+
sys = _system_prompt(runtime_ns)
|
|
1008
|
+
if sys is not None:
|
|
1009
|
+
payload["system_prompt"] = sys
|
|
1010
|
+
|
|
1011
|
+
return StepPlan(
|
|
1012
|
+
node_id="review",
|
|
1013
|
+
effect=Effect(
|
|
1014
|
+
type=EffectType.LLM_CALL,
|
|
1015
|
+
payload=payload,
|
|
1016
|
+
result_key="_temp.review_llm_response",
|
|
1017
|
+
),
|
|
1018
|
+
next_node="review_parse",
|
|
1019
|
+
)
|
|
1020
|
+
|
|
1021
|
+
def review_parse_node(run: RunState, ctx) -> StepPlan:
|
|
1022
|
+
_, _, runtime_ns, temp, _ = ensure_codeact_vars(run)
|
|
1023
|
+
resp = temp.get("review_llm_response", {})
|
|
1024
|
+
if not isinstance(resp, dict):
|
|
1025
|
+
resp = {}
|
|
1026
|
+
|
|
1027
|
+
data = resp.get("data")
|
|
1028
|
+
if data is None and isinstance(resp.get("content"), str):
|
|
1029
|
+
try:
|
|
1030
|
+
data = json.loads(resp["content"])
|
|
1031
|
+
except Exception:
|
|
1032
|
+
data = None
|
|
1033
|
+
if not isinstance(data, dict):
|
|
1034
|
+
data = {}
|
|
1035
|
+
|
|
1036
|
+
complete = bool(data.get("complete"))
|
|
1037
|
+
missing = data.get("missing") if isinstance(data.get("missing"), list) else []
|
|
1038
|
+
next_prompt = data.get("next_prompt")
|
|
1039
|
+
next_prompt_text = str(next_prompt or "").strip()
|
|
1040
|
+
next_tool_calls_raw = data.get("next_tool_calls")
|
|
1041
|
+
next_tool_calls: list[dict[str, Any]] = []
|
|
1042
|
+
if isinstance(next_tool_calls_raw, list):
|
|
1043
|
+
for item in next_tool_calls_raw:
|
|
1044
|
+
if not isinstance(item, dict):
|
|
1045
|
+
continue
|
|
1046
|
+
name = str(item.get("name") or "").strip()
|
|
1047
|
+
args = item.get("arguments")
|
|
1048
|
+
if not isinstance(args, dict):
|
|
1049
|
+
args = {}
|
|
1050
|
+
if name:
|
|
1051
|
+
next_tool_calls.append({"name": name, "arguments": args})
|
|
1052
|
+
|
|
1053
|
+
emit("review", {"complete": complete, "missing": missing})
|
|
1054
|
+
temp.pop("review_llm_response", None)
|
|
1055
|
+
|
|
1056
|
+
if complete:
|
|
1057
|
+
return StepPlan(node_id="review_parse", next_node="done")
|
|
1058
|
+
|
|
1059
|
+
if next_tool_calls:
|
|
1060
|
+
temp["pending_tool_calls"] = next_tool_calls
|
|
1061
|
+
emit("review_tool_calls", {"count": len(next_tool_calls)})
|
|
1062
|
+
return StepPlan(node_id="review_parse", next_node="act")
|
|
1063
|
+
|
|
1064
|
+
# Behavioral validation: if incomplete but no tool calls, re-ask reviewer once with stricter rules.
|
|
1065
|
+
if not complete and not next_tool_calls:
|
|
1066
|
+
try:
|
|
1067
|
+
retry_count = int(runtime_ns.get("review_retry_count") or 0)
|
|
1068
|
+
except Exception:
|
|
1069
|
+
retry_count = 0
|
|
1070
|
+
if retry_count < 1:
|
|
1071
|
+
runtime_ns["review_retry_count"] = retry_count + 1
|
|
1072
|
+
inbox = runtime_ns.get("inbox")
|
|
1073
|
+
if not isinstance(inbox, list):
|
|
1074
|
+
inbox = []
|
|
1075
|
+
runtime_ns["inbox"] = inbox
|
|
1076
|
+
inbox.append(
|
|
1077
|
+
{
|
|
1078
|
+
"content": (
|
|
1079
|
+
"[Review] Your last review output was not actionable. "
|
|
1080
|
+
"If incomplete, you MUST return at least one `next_tool_call` "
|
|
1081
|
+
"(use `ask_user` if you need clarification). Return JSON only."
|
|
1082
|
+
)
|
|
1083
|
+
}
|
|
1084
|
+
)
|
|
1085
|
+
emit("review_retry_unactionable", {"retry": retry_count + 1})
|
|
1086
|
+
return StepPlan(node_id="review_parse", next_node="review")
|
|
1087
|
+
|
|
1088
|
+
runtime_ns["review_retry_count"] = 0
|
|
1089
|
+
if next_prompt_text:
|
|
1090
|
+
inbox = runtime_ns.get("inbox")
|
|
1091
|
+
if not isinstance(inbox, list):
|
|
1092
|
+
inbox = []
|
|
1093
|
+
runtime_ns["inbox"] = inbox
|
|
1094
|
+
inbox.append({"content": f"[Review] {next_prompt_text}"})
|
|
1095
|
+
return StepPlan(node_id="review_parse", next_node="reason")
|
|
1096
|
+
|
|
345
1097
|
def done_node(run: RunState, ctx) -> StepPlan:
|
|
346
1098
|
context, scratchpad, _, temp, limits = ensure_codeact_vars(run)
|
|
347
1099
|
answer = str(temp.get("final_answer") or "No answer provided")
|
|
@@ -350,6 +1102,16 @@ def create_codeact_workflow(
|
|
|
350
1102
|
# Prefer _limits.current_iteration, fall back to scratchpad
|
|
351
1103
|
iterations = int(limits.get("current_iteration", 0) or scratchpad.get("iteration", 0) or 0)
|
|
352
1104
|
|
|
1105
|
+
# Persist the final answer into the conversation history so it becomes part of the
|
|
1106
|
+
# next run's seed context and shows up in /history.
|
|
1107
|
+
messages = context.get("messages")
|
|
1108
|
+
if isinstance(messages, list):
|
|
1109
|
+
last = messages[-1] if messages else None
|
|
1110
|
+
last_role = last.get("role") if isinstance(last, dict) else None
|
|
1111
|
+
last_content = last.get("content") if isinstance(last, dict) else None
|
|
1112
|
+
if last_role != "assistant" or str(last_content or "") != answer:
|
|
1113
|
+
messages.append(_new_message(ctx, role="assistant", content=answer, metadata={"kind": "final_answer"}))
|
|
1114
|
+
|
|
353
1115
|
return StepPlan(
|
|
354
1116
|
node_id="done",
|
|
355
1117
|
complete_output={
|
|
@@ -384,14 +1146,18 @@ def create_codeact_workflow(
|
|
|
384
1146
|
entry_node="init",
|
|
385
1147
|
nodes={
|
|
386
1148
|
"init": init_node,
|
|
1149
|
+
"plan": plan_node,
|
|
1150
|
+
"plan_parse": plan_parse_node,
|
|
387
1151
|
"reason": reason_node,
|
|
388
1152
|
"parse": parse_node,
|
|
389
1153
|
"act": act_node,
|
|
390
1154
|
"execute_code": execute_code_node,
|
|
391
1155
|
"observe": observe_node,
|
|
392
1156
|
"handle_user_response": handle_user_response_node,
|
|
1157
|
+
"maybe_review": maybe_review_node,
|
|
1158
|
+
"review": review_node,
|
|
1159
|
+
"review_parse": review_parse_node,
|
|
393
1160
|
"done": done_node,
|
|
394
1161
|
"max_iterations": max_iterations_node,
|
|
395
1162
|
},
|
|
396
1163
|
)
|
|
397
|
-
|