abstractagent 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractagent/adapters/__init__.py +2 -1
- abstractagent/adapters/codeact_runtime.py +907 -60
- abstractagent/adapters/generation_params.py +82 -0
- abstractagent/adapters/media.py +45 -0
- abstractagent/adapters/memact_runtime.py +959 -0
- abstractagent/adapters/react_runtime.py +1357 -135
- abstractagent/agents/__init__.py +4 -0
- abstractagent/agents/base.py +89 -1
- abstractagent/agents/codeact.py +125 -18
- abstractagent/agents/memact.py +280 -0
- abstractagent/agents/react.py +129 -18
- abstractagent/logic/__init__.py +2 -0
- abstractagent/logic/builtins.py +270 -5
- abstractagent/logic/codeact.py +91 -81
- abstractagent/logic/memact.py +128 -0
- abstractagent/logic/react.py +91 -50
- abstractagent/repl.py +24 -447
- abstractagent/scripts/__init__.py +5 -0
- abstractagent/scripts/lmstudio_tool_eval.py +426 -0
- abstractagent/tools/__init__.py +9 -0
- abstractagent-0.3.1.dist-info/METADATA +112 -0
- abstractagent-0.3.1.dist-info/RECORD +33 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.1.dist-info}/WHEEL +1 -1
- abstractagent/ui/__init__.py +0 -5
- abstractagent/ui/question.py +0 -197
- abstractagent-0.2.0.dist-info/METADATA +0 -134
- abstractagent-0.2.0.dist-info/RECORD +0 -28
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.1.dist-info}/entry_points.txt +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -6,10 +6,13 @@ import hashlib
|
|
|
6
6
|
import json
|
|
7
7
|
from typing import Any, Callable, Dict, List, Optional
|
|
8
8
|
|
|
9
|
-
from abstractcore.tools import ToolCall
|
|
9
|
+
from abstractcore.tools import ToolCall, ToolDefinition
|
|
10
10
|
from abstractruntime import Effect, EffectType, RunState, StepPlan, WorkflowSpec
|
|
11
11
|
from abstractruntime.core.vars import ensure_limits, ensure_namespaces
|
|
12
|
+
from abstractruntime.memory.active_context import ActiveContextPolicy
|
|
12
13
|
|
|
14
|
+
from .generation_params import runtime_llm_params
|
|
15
|
+
from .media import extract_media_from_context
|
|
13
16
|
from ..logic.codeact import CodeActLogic
|
|
14
17
|
|
|
15
18
|
|
|
@@ -29,11 +32,16 @@ def _new_message(
|
|
|
29
32
|
|
|
30
33
|
timestamp = datetime.now(timezone.utc).isoformat()
|
|
31
34
|
|
|
35
|
+
import uuid
|
|
36
|
+
|
|
37
|
+
meta = dict(metadata or {})
|
|
38
|
+
meta.setdefault("message_id", f"msg_{uuid.uuid4().hex}")
|
|
39
|
+
|
|
32
40
|
return {
|
|
33
41
|
"role": role,
|
|
34
42
|
"content": content,
|
|
35
43
|
"timestamp": timestamp,
|
|
36
|
-
"metadata":
|
|
44
|
+
"metadata": meta,
|
|
37
45
|
}
|
|
38
46
|
|
|
39
47
|
|
|
@@ -108,9 +116,187 @@ def create_codeact_workflow(
|
|
|
108
116
|
if on_step:
|
|
109
117
|
on_step(step, data)
|
|
110
118
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
119
|
+
def _current_tool_defs() -> list[ToolDefinition]:
|
|
120
|
+
defs = getattr(logic, "tools", None)
|
|
121
|
+
if not isinstance(defs, list):
|
|
122
|
+
try:
|
|
123
|
+
defs = list(defs) # type: ignore[arg-type]
|
|
124
|
+
except Exception:
|
|
125
|
+
defs = []
|
|
126
|
+
return [t for t in defs if getattr(t, "name", None)]
|
|
127
|
+
|
|
128
|
+
def _tool_by_name() -> dict[str, ToolDefinition]:
|
|
129
|
+
out: dict[str, ToolDefinition] = {}
|
|
130
|
+
for t in _current_tool_defs():
|
|
131
|
+
name = getattr(t, "name", None)
|
|
132
|
+
if isinstance(name, str) and name.strip():
|
|
133
|
+
out[name] = t
|
|
134
|
+
return out
|
|
135
|
+
|
|
136
|
+
def _default_allowlist() -> list[str]:
|
|
137
|
+
out: list[str] = []
|
|
138
|
+
seen: set[str] = set()
|
|
139
|
+
for t in _current_tool_defs():
|
|
140
|
+
name = getattr(t, "name", None)
|
|
141
|
+
if not isinstance(name, str) or not name.strip() or name in seen:
|
|
142
|
+
continue
|
|
143
|
+
seen.add(name)
|
|
144
|
+
out.append(name)
|
|
145
|
+
return out
|
|
146
|
+
|
|
147
|
+
def _normalize_allowlist(raw: Any) -> list[str]:
|
|
148
|
+
if raw is None:
|
|
149
|
+
return []
|
|
150
|
+
if isinstance(raw, str):
|
|
151
|
+
val = raw.strip()
|
|
152
|
+
return [val] if val else []
|
|
153
|
+
if isinstance(raw, list):
|
|
154
|
+
out: list[str] = []
|
|
155
|
+
seen: set[str] = set()
|
|
156
|
+
for item in raw:
|
|
157
|
+
if not isinstance(item, str):
|
|
158
|
+
continue
|
|
159
|
+
name = item.strip()
|
|
160
|
+
if not name or name in seen:
|
|
161
|
+
continue
|
|
162
|
+
seen.add(name)
|
|
163
|
+
out.append(name)
|
|
164
|
+
return out
|
|
165
|
+
return []
|
|
166
|
+
|
|
167
|
+
def _effective_allowlist(runtime_ns: Dict[str, Any]) -> list[str]:
|
|
168
|
+
if isinstance(runtime_ns, dict) and "allowed_tools" in runtime_ns:
|
|
169
|
+
normalized = _normalize_allowlist(runtime_ns.get("allowed_tools"))
|
|
170
|
+
# Filter to currently known tools (dynamic), preserving order.
|
|
171
|
+
current = _tool_by_name()
|
|
172
|
+
filtered = [name for name in normalized if name in current]
|
|
173
|
+
runtime_ns["allowed_tools"] = filtered
|
|
174
|
+
return filtered
|
|
175
|
+
return list(_default_allowlist())
|
|
176
|
+
|
|
177
|
+
def _allowed_tool_defs(allowlist: list[str]) -> list[ToolDefinition]:
|
|
178
|
+
tool_by_name = _tool_by_name()
|
|
179
|
+
out: list[ToolDefinition] = []
|
|
180
|
+
for name in allowlist:
|
|
181
|
+
tool = tool_by_name.get(name)
|
|
182
|
+
if tool is not None:
|
|
183
|
+
out.append(tool)
|
|
184
|
+
return out
|
|
185
|
+
|
|
186
|
+
def _system_prompt(runtime_ns: Dict[str, Any]) -> Optional[str]:
|
|
187
|
+
raw = runtime_ns.get("system_prompt") if isinstance(runtime_ns, dict) else None
|
|
188
|
+
if isinstance(raw, str) and raw.strip():
|
|
189
|
+
return raw
|
|
190
|
+
return None
|
|
191
|
+
|
|
192
|
+
def _sanitize_llm_messages(messages: Any, *, limits: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]:
|
|
193
|
+
"""Convert runtime-owned message dicts into OpenAI-style {role, content, ...}.
|
|
194
|
+
|
|
195
|
+
Runtime messages can include extra metadata fields (`timestamp`, `metadata`) that many providers
|
|
196
|
+
will reject. Keep only the fields the LLM API expects.
|
|
197
|
+
"""
|
|
198
|
+
if not isinstance(messages, list) or not messages:
|
|
199
|
+
return []
|
|
200
|
+
def _limit_int(key: str, default: int) -> int:
|
|
201
|
+
if not isinstance(limits, dict):
|
|
202
|
+
return default
|
|
203
|
+
try:
|
|
204
|
+
return int(limits.get(key, default))
|
|
205
|
+
except Exception:
|
|
206
|
+
return default
|
|
207
|
+
max_message_chars = _limit_int("max_message_chars", -1)
|
|
208
|
+
max_tool_message_chars = _limit_int("max_tool_message_chars", -1)
|
|
209
|
+
|
|
210
|
+
def _truncate(text: str, *, max_chars: int) -> str:
|
|
211
|
+
if max_chars <= 0:
|
|
212
|
+
return text
|
|
213
|
+
if len(text) <= max_chars:
|
|
214
|
+
return text
|
|
215
|
+
suffix = f"\n… (truncated, {len(text):,} chars total)"
|
|
216
|
+
keep = max_chars - len(suffix)
|
|
217
|
+
if keep < 200:
|
|
218
|
+
keep = max_chars
|
|
219
|
+
suffix = ""
|
|
220
|
+
#[WARNING:TRUNCATION] bounded message content for LLM payload
|
|
221
|
+
return text[:keep].rstrip() + suffix
|
|
222
|
+
|
|
223
|
+
out: List[Dict[str, str]] = []
|
|
224
|
+
for m in messages:
|
|
225
|
+
if not isinstance(m, dict):
|
|
226
|
+
continue
|
|
227
|
+
role = str(m.get("role") or "").strip()
|
|
228
|
+
content = m.get("content")
|
|
229
|
+
if not role or content is None:
|
|
230
|
+
continue
|
|
231
|
+
content_str = str(content)
|
|
232
|
+
if not content_str.strip():
|
|
233
|
+
continue
|
|
234
|
+
limit = max_tool_message_chars if role == "tool" else max_message_chars
|
|
235
|
+
entry: Dict[str, str] = {"role": role, "content": _truncate(content_str, max_chars=limit)}
|
|
236
|
+
if role == "tool":
|
|
237
|
+
meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
|
|
238
|
+
call_id = meta.get("call_id") if isinstance(meta, dict) else None
|
|
239
|
+
if call_id is not None and str(call_id).strip():
|
|
240
|
+
entry["tool_call_id"] = str(call_id).strip()
|
|
241
|
+
out.append(entry)
|
|
242
|
+
return out
|
|
243
|
+
|
|
244
|
+
def _flag(runtime_ns: Dict[str, Any], key: str, *, default: bool = False) -> bool:
|
|
245
|
+
if not isinstance(runtime_ns, dict) or key not in runtime_ns:
|
|
246
|
+
return bool(default)
|
|
247
|
+
val = runtime_ns.get(key)
|
|
248
|
+
if isinstance(val, bool):
|
|
249
|
+
return val
|
|
250
|
+
if isinstance(val, (int, float)):
|
|
251
|
+
return bool(val)
|
|
252
|
+
if isinstance(val, str):
|
|
253
|
+
lowered = val.strip().lower()
|
|
254
|
+
if lowered in ("1", "true", "yes", "on", "enabled"):
|
|
255
|
+
return True
|
|
256
|
+
if lowered in ("0", "false", "no", "off", "disabled"):
|
|
257
|
+
return False
|
|
258
|
+
return bool(default)
|
|
259
|
+
|
|
260
|
+
def _int(runtime_ns: Dict[str, Any], key: str, *, default: int) -> int:
|
|
261
|
+
if not isinstance(runtime_ns, dict) or key not in runtime_ns:
|
|
262
|
+
return int(default)
|
|
263
|
+
val = runtime_ns.get(key)
|
|
264
|
+
try:
|
|
265
|
+
return int(val) # type: ignore[arg-type]
|
|
266
|
+
except Exception:
|
|
267
|
+
return int(default)
|
|
268
|
+
|
|
269
|
+
def _extract_plan_update(content: str) -> Optional[str]:
|
|
270
|
+
"""Extract a plan update block from model content (best-effort).
|
|
271
|
+
|
|
272
|
+
Convention (prompted in Plan mode): the model appends a final section:
|
|
273
|
+
|
|
274
|
+
Plan Update:
|
|
275
|
+
- [ ] ...
|
|
276
|
+
- [x] ...
|
|
277
|
+
"""
|
|
278
|
+
if not isinstance(content, str) or not content.strip():
|
|
279
|
+
return None
|
|
280
|
+
|
|
281
|
+
import re
|
|
282
|
+
|
|
283
|
+
lines = content.splitlines()
|
|
284
|
+
header_idx: Optional[int] = None
|
|
285
|
+
for i, line in enumerate(lines):
|
|
286
|
+
if re.match(r"(?i)^\s*plan\s*update\s*:\s*$", line.strip()):
|
|
287
|
+
header_idx = i
|
|
288
|
+
if header_idx is None:
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
plan_lines = lines[header_idx + 1 :]
|
|
292
|
+
while plan_lines and not plan_lines[0].strip():
|
|
293
|
+
plan_lines.pop(0)
|
|
294
|
+
plan_text = "\n".join(plan_lines).strip()
|
|
295
|
+
if not plan_text:
|
|
296
|
+
return None
|
|
297
|
+
if not re.search(r"(?m)^\s*(?:[-*]|\d+\.)\s+", plan_text):
|
|
298
|
+
return None
|
|
299
|
+
return plan_text
|
|
114
300
|
|
|
115
301
|
def init_node(run: RunState, ctx) -> StepPlan:
|
|
116
302
|
context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
|
|
@@ -123,13 +309,71 @@ def create_codeact_workflow(
|
|
|
123
309
|
if task and (not messages or messages[-1].get("role") != "user" or messages[-1].get("content") != task):
|
|
124
310
|
messages.append(_new_message(ctx, role="user", content=task))
|
|
125
311
|
|
|
126
|
-
runtime_ns
|
|
127
|
-
|
|
312
|
+
allow = _effective_allowlist(runtime_ns)
|
|
313
|
+
allowed_defs = _allowed_tool_defs(allow)
|
|
314
|
+
runtime_ns["tool_specs"] = [t.to_dict() for t in allowed_defs]
|
|
315
|
+
runtime_ns["toolset_id"] = _compute_toolset_id(runtime_ns["tool_specs"])
|
|
316
|
+
runtime_ns.setdefault("allowed_tools", allow)
|
|
128
317
|
runtime_ns.setdefault("inbox", [])
|
|
129
318
|
|
|
130
319
|
emit("init", {"task": task})
|
|
320
|
+
if _flag(runtime_ns, "plan_mode", default=False) and not isinstance(scratchpad.get("plan"), str):
|
|
321
|
+
return StepPlan(node_id="init", next_node="plan")
|
|
131
322
|
return StepPlan(node_id="init", next_node="reason")
|
|
132
323
|
|
|
324
|
+
def plan_node(run: RunState, ctx) -> StepPlan:
|
|
325
|
+
context, scratchpad, runtime_ns, _, _ = ensure_codeact_vars(run)
|
|
326
|
+
task = str(context.get("task", "") or "")
|
|
327
|
+
|
|
328
|
+
allow = _effective_allowlist(runtime_ns)
|
|
329
|
+
|
|
330
|
+
prompt = (
|
|
331
|
+
"You are preparing a high-level execution plan for the user's request.\n"
|
|
332
|
+
"Return a concise TODO list (5–12 steps) that is actionable and verifiable.\n"
|
|
333
|
+
"Do not call tools yet. Do not include role prefixes like 'assistant:'.\n\n"
|
|
334
|
+
f"User request:\n{task}\n\n"
|
|
335
|
+
"Plan (markdown checklist):\n"
|
|
336
|
+
"- [ ] ...\n"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
emit("plan_request", {"tools": allow})
|
|
340
|
+
|
|
341
|
+
payload: Dict[str, Any] = {"prompt": prompt, "params": runtime_llm_params(runtime_ns, extra={"temperature": 0.2})}
|
|
342
|
+
media = extract_media_from_context(context)
|
|
343
|
+
if media:
|
|
344
|
+
payload["media"] = media
|
|
345
|
+
sys = _system_prompt(runtime_ns)
|
|
346
|
+
if isinstance(sys, str) and sys.strip():
|
|
347
|
+
payload["system_prompt"] = sys
|
|
348
|
+
|
|
349
|
+
return StepPlan(
|
|
350
|
+
node_id="plan",
|
|
351
|
+
effect=Effect(
|
|
352
|
+
type=EffectType.LLM_CALL,
|
|
353
|
+
payload=payload,
|
|
354
|
+
result_key="_temp.plan_llm_response",
|
|
355
|
+
),
|
|
356
|
+
next_node="plan_parse",
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
def plan_parse_node(run: RunState, ctx) -> StepPlan:
|
|
360
|
+
context, scratchpad, _, temp, _ = ensure_codeact_vars(run)
|
|
361
|
+
resp = temp.get("plan_llm_response", {})
|
|
362
|
+
if not isinstance(resp, dict):
|
|
363
|
+
resp = {}
|
|
364
|
+
plan_text = resp.get("content")
|
|
365
|
+
plan = "" if plan_text is None else str(plan_text).strip()
|
|
366
|
+
if not plan and isinstance(resp.get("data"), dict):
|
|
367
|
+
plan = json.dumps(resp.get("data"), ensure_ascii=False, indent=2).strip()
|
|
368
|
+
|
|
369
|
+
scratchpad["plan"] = plan
|
|
370
|
+
temp.pop("plan_llm_response", None)
|
|
371
|
+
|
|
372
|
+
if plan:
|
|
373
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=plan, metadata={"kind": "plan"}))
|
|
374
|
+
emit("plan", {"plan": plan})
|
|
375
|
+
return StepPlan(node_id="plan_parse", next_node="reason")
|
|
376
|
+
|
|
133
377
|
def reason_node(run: RunState, ctx) -> StepPlan:
|
|
134
378
|
context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
|
|
135
379
|
|
|
@@ -159,9 +403,23 @@ def create_codeact_workflow(
|
|
|
159
403
|
guidance = " | ".join([m for m in inbox_messages if m])
|
|
160
404
|
runtime_ns["inbox"] = []
|
|
161
405
|
|
|
406
|
+
messages_view = ActiveContextPolicy.select_active_messages_for_llm_from_run(run)
|
|
407
|
+
|
|
408
|
+
# Refresh tool metadata BEFORE rendering Active Memory so token fitting stays accurate
|
|
409
|
+
# (even though we do not render a "Tools (session)" block into Active Memory prompts).
|
|
410
|
+
allow = _effective_allowlist(runtime_ns)
|
|
411
|
+
allowed_defs = _allowed_tool_defs(allow)
|
|
412
|
+
tool_specs = [t.to_dict() for t in allowed_defs]
|
|
413
|
+
include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
|
|
414
|
+
if not include_examples:
|
|
415
|
+
tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
|
|
416
|
+
runtime_ns["tool_specs"] = tool_specs
|
|
417
|
+
runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
|
|
418
|
+
runtime_ns.setdefault("allowed_tools", allow)
|
|
419
|
+
|
|
162
420
|
req = logic.build_request(
|
|
163
421
|
task=str(context.get("task", "") or ""),
|
|
164
|
-
messages=
|
|
422
|
+
messages=messages_view,
|
|
165
423
|
guidance=guidance,
|
|
166
424
|
iteration=iteration + 1,
|
|
167
425
|
max_iterations=max_iterations,
|
|
@@ -170,9 +428,25 @@ def create_codeact_workflow(
|
|
|
170
428
|
|
|
171
429
|
emit("reason", {"iteration": iteration + 1, "max_iterations": max_iterations, "has_guidance": bool(guidance)})
|
|
172
430
|
|
|
173
|
-
|
|
431
|
+
# IMPORTANT: When we send `messages`, do not also send a non-empty `prompt`.
|
|
432
|
+
# Some providers/servers will append `prompt` as an extra user message even when the
|
|
433
|
+
# current request is already present in `messages`, which duplicates user turns and
|
|
434
|
+
# wastes context budget.
|
|
435
|
+
payload: Dict[str, Any] = {
|
|
436
|
+
"prompt": "",
|
|
437
|
+
"messages": _sanitize_llm_messages(messages_view, limits=limits),
|
|
438
|
+
"tools": list(tool_specs),
|
|
439
|
+
}
|
|
440
|
+
media = extract_media_from_context(context)
|
|
441
|
+
if media:
|
|
442
|
+
payload["media"] = media
|
|
443
|
+
sys = _system_prompt(runtime_ns) or req.system_prompt
|
|
444
|
+
if isinstance(sys, str) and sys.strip():
|
|
445
|
+
payload["system_prompt"] = sys
|
|
446
|
+
params: Dict[str, Any] = {}
|
|
174
447
|
if req.max_tokens is not None:
|
|
175
|
-
|
|
448
|
+
params["max_tokens"] = req.max_tokens
|
|
449
|
+
payload["params"] = runtime_llm_params(runtime_ns, extra=params)
|
|
176
450
|
|
|
177
451
|
return StepPlan(
|
|
178
452
|
node_id="reason",
|
|
@@ -185,93 +459,359 @@ def create_codeact_workflow(
|
|
|
185
459
|
)
|
|
186
460
|
|
|
187
461
|
def parse_node(run: RunState, ctx) -> StepPlan:
|
|
188
|
-
context,
|
|
462
|
+
context, scratchpad, runtime_ns, temp, _ = ensure_codeact_vars(run)
|
|
189
463
|
response = temp.get("llm_response", {})
|
|
190
464
|
content, tool_calls = logic.parse_response(response)
|
|
191
465
|
|
|
192
|
-
if content:
|
|
193
|
-
context["messages"].append(_new_message(ctx, role="assistant", content=content))
|
|
194
|
-
|
|
195
466
|
temp.pop("llm_response", None)
|
|
196
467
|
emit("parse", {"has_tool_calls": bool(tool_calls), "content_preview": (content[:100] if content else "(no content)")})
|
|
197
468
|
|
|
198
469
|
if tool_calls:
|
|
470
|
+
if content:
|
|
471
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=content))
|
|
472
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
473
|
+
updated = _extract_plan_update(content)
|
|
474
|
+
if isinstance(updated, str) and updated.strip():
|
|
475
|
+
scratchpad["plan"] = updated.strip()
|
|
199
476
|
temp["pending_tool_calls"] = [tc.__dict__ for tc in tool_calls]
|
|
200
477
|
return StepPlan(node_id="parse", next_node="act")
|
|
201
478
|
|
|
479
|
+
# Empty response is an invalid step: recover with a bounded retry that carries evidence.
|
|
480
|
+
if not isinstance(content, str) or not content.strip():
|
|
481
|
+
try:
|
|
482
|
+
empty_retries = int(scratchpad.get("empty_response_retry_count") or 0)
|
|
483
|
+
except Exception:
|
|
484
|
+
empty_retries = 0
|
|
485
|
+
|
|
486
|
+
if empty_retries < 2:
|
|
487
|
+
scratchpad["empty_response_retry_count"] = empty_retries + 1
|
|
488
|
+
emit("parse_retry_empty_response", {"retries": empty_retries + 1})
|
|
489
|
+
inbox = runtime_ns.get("inbox")
|
|
490
|
+
if not isinstance(inbox, list):
|
|
491
|
+
inbox = []
|
|
492
|
+
runtime_ns["inbox"] = inbox
|
|
493
|
+
inbox.append(
|
|
494
|
+
{
|
|
495
|
+
"content": (
|
|
496
|
+
"[Recover] Your last message was empty. Continue the task now. "
|
|
497
|
+
"If you need info, CALL tools (preferred). Do not output an empty message."
|
|
498
|
+
)
|
|
499
|
+
}
|
|
500
|
+
)
|
|
501
|
+
return StepPlan(node_id="parse", next_node="reason")
|
|
502
|
+
|
|
503
|
+
safe = (
|
|
504
|
+
"I can't proceed: the model repeatedly returned empty outputs (no content, no tool calls).\n"
|
|
505
|
+
"Please retry, reduce context, or switch models."
|
|
506
|
+
)
|
|
507
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=safe, metadata={"kind": "error"}))
|
|
508
|
+
temp["final_answer"] = safe
|
|
509
|
+
temp["pending_tool_calls"] = []
|
|
510
|
+
scratchpad["empty_response_retry_count"] = 0
|
|
511
|
+
return StepPlan(node_id="parse", next_node="maybe_review")
|
|
512
|
+
|
|
202
513
|
code = logic.extract_code(content)
|
|
203
514
|
if code:
|
|
515
|
+
if content:
|
|
516
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=content))
|
|
517
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
518
|
+
updated = _extract_plan_update(content)
|
|
519
|
+
if isinstance(updated, str) and updated.strip():
|
|
520
|
+
scratchpad["plan"] = updated.strip()
|
|
204
521
|
temp["pending_code"] = code
|
|
205
522
|
return StepPlan(node_id="parse", next_node="execute_code")
|
|
206
523
|
|
|
207
|
-
|
|
208
|
-
|
|
524
|
+
def _extract_final_answer(text: str) -> tuple[bool, str]:
|
|
525
|
+
if not isinstance(text, str) or not text.strip():
|
|
526
|
+
return False, ""
|
|
527
|
+
s = text.lstrip()
|
|
528
|
+
if s.upper().startswith("FINAL:"):
|
|
529
|
+
return True, s[len("FINAL:") :].lstrip()
|
|
530
|
+
return False, text
|
|
531
|
+
|
|
532
|
+
raw = str(content or "").strip()
|
|
533
|
+
is_final, final = _extract_final_answer(raw)
|
|
534
|
+
if is_final:
|
|
535
|
+
if final:
|
|
536
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=final))
|
|
537
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
538
|
+
updated = _extract_plan_update(final)
|
|
539
|
+
if isinstance(updated, str) and updated.strip():
|
|
540
|
+
scratchpad["plan"] = updated.strip()
|
|
541
|
+
temp["final_answer"] = final or "No answer provided"
|
|
542
|
+
temp["pending_tool_calls"] = []
|
|
543
|
+
return StepPlan(node_id="parse", next_node="maybe_review")
|
|
544
|
+
|
|
545
|
+
# Default: treat as a final answer even without an explicit FINAL marker.
|
|
546
|
+
if raw:
|
|
547
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=raw))
|
|
548
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
549
|
+
updated = _extract_plan_update(raw)
|
|
550
|
+
if isinstance(updated, str) and updated.strip():
|
|
551
|
+
scratchpad["plan"] = updated.strip()
|
|
552
|
+
temp["final_answer"] = raw or "No answer provided"
|
|
553
|
+
temp["pending_tool_calls"] = []
|
|
554
|
+
scratchpad["empty_response_retry_count"] = 0
|
|
555
|
+
return StepPlan(node_id="parse", next_node="maybe_review")
|
|
209
556
|
|
|
210
557
|
def act_node(run: RunState, ctx) -> StepPlan:
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
558
|
+
# Treat `_temp.pending_tool_calls` as a durable queue to avoid dropping tool calls when
|
|
559
|
+
# schema-only tools (ask_user/memory/etc.) are interleaved with normal tools.
|
|
560
|
+
context, _, runtime_ns, temp, _ = ensure_codeact_vars(run)
|
|
561
|
+
raw_queue = temp.get("pending_tool_calls", [])
|
|
562
|
+
if not isinstance(raw_queue, list) or not raw_queue:
|
|
563
|
+
temp["pending_tool_calls"] = []
|
|
217
564
|
return StepPlan(node_id="act", next_node="reason")
|
|
218
565
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
566
|
+
allow = _effective_allowlist(runtime_ns)
|
|
567
|
+
builtin_effect_tools = {
|
|
568
|
+
"ask_user",
|
|
569
|
+
"recall_memory",
|
|
570
|
+
"inspect_vars",
|
|
571
|
+
"remember",
|
|
572
|
+
"remember_note",
|
|
573
|
+
"compact_memory",
|
|
574
|
+
"delegate_agent",
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
tool_queue: List[Dict[str, Any]] = []
|
|
578
|
+
for idx, item in enumerate(raw_queue, start=1):
|
|
579
|
+
if isinstance(item, ToolCall):
|
|
580
|
+
d: Dict[str, Any] = {"name": item.name, "arguments": item.arguments, "call_id": item.call_id}
|
|
581
|
+
elif isinstance(item, dict):
|
|
582
|
+
d = dict(item)
|
|
583
|
+
else:
|
|
224
584
|
continue
|
|
585
|
+
call_id = str(d.get("call_id") or "").strip()
|
|
586
|
+
if not call_id:
|
|
587
|
+
d["call_id"] = str(idx)
|
|
588
|
+
tool_queue.append(d)
|
|
589
|
+
|
|
590
|
+
if not tool_queue:
|
|
591
|
+
temp["pending_tool_calls"] = []
|
|
592
|
+
return StepPlan(node_id="act", next_node="reason")
|
|
593
|
+
|
|
594
|
+
def _is_builtin(tc: Dict[str, Any]) -> bool:
|
|
595
|
+
name = tc.get("name")
|
|
596
|
+
return isinstance(name, str) and name in builtin_effect_tools
|
|
597
|
+
|
|
598
|
+
if _is_builtin(tool_queue[0]):
|
|
599
|
+
tc = tool_queue[0]
|
|
600
|
+
name = str(tc.get("name") or "").strip()
|
|
225
601
|
args = tc.get("arguments") or {}
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
choices = list(choices) if isinstance(choices, list) else None
|
|
229
|
-
|
|
230
|
-
temp["pending_tool_calls"] = tool_calls[i + 1 :]
|
|
231
|
-
emit("ask_user", {"question": question, "choices": choices or []})
|
|
232
|
-
return StepPlan(
|
|
233
|
-
node_id="act",
|
|
234
|
-
effect=Effect(
|
|
235
|
-
type=EffectType.ASK_USER,
|
|
236
|
-
payload={"prompt": question, "choices": choices, "allow_free_text": True},
|
|
237
|
-
result_key="_temp.user_response",
|
|
238
|
-
),
|
|
239
|
-
next_node="handle_user_response",
|
|
240
|
-
)
|
|
602
|
+
if not isinstance(args, dict):
|
|
603
|
+
args = {}
|
|
241
604
|
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {})})
|
|
605
|
+
# Pop builtin.
|
|
606
|
+
temp["pending_tool_calls"] = list(tool_queue[1:])
|
|
245
607
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
608
|
+
if name and name not in allow:
|
|
609
|
+
temp["tool_results"] = {
|
|
610
|
+
"results": [
|
|
611
|
+
{
|
|
612
|
+
"call_id": str(tc.get("call_id") or ""),
|
|
613
|
+
"name": name,
|
|
614
|
+
"success": False,
|
|
615
|
+
"output": None,
|
|
616
|
+
"error": f"Tool '{name}' is not allowed for this agent",
|
|
617
|
+
}
|
|
618
|
+
]
|
|
619
|
+
}
|
|
620
|
+
emit("act_blocked", {"tool": name})
|
|
621
|
+
return StepPlan(node_id="act", next_node="observe")
|
|
622
|
+
|
|
623
|
+
if name == "ask_user":
|
|
624
|
+
question = str(args.get("question") or "Please provide input:")
|
|
625
|
+
choices = args.get("choices")
|
|
626
|
+
choices = list(choices) if isinstance(choices, list) else None
|
|
627
|
+
|
|
628
|
+
msgs = context.get("messages")
|
|
629
|
+
if isinstance(msgs, list):
|
|
630
|
+
content = f"[Agent question]: {question}"
|
|
631
|
+
last = msgs[-1] if msgs else None
|
|
632
|
+
last_role = last.get("role") if isinstance(last, dict) else None
|
|
633
|
+
last_meta = last.get("metadata") if isinstance(last, dict) else None
|
|
634
|
+
last_kind = last_meta.get("kind") if isinstance(last_meta, dict) else None
|
|
635
|
+
last_content = last.get("content") if isinstance(last, dict) else None
|
|
636
|
+
if not (last_role == "assistant" and last_kind == "ask_user_prompt" and str(last_content or "") == content):
|
|
637
|
+
msgs.append(_new_message(ctx, role="assistant", content=content, metadata={"kind": "ask_user_prompt"}))
|
|
638
|
+
|
|
639
|
+
emit("ask_user", {"question": question, "choices": choices or []})
|
|
640
|
+
return StepPlan(
|
|
641
|
+
node_id="act",
|
|
642
|
+
effect=Effect(
|
|
643
|
+
type=EffectType.ASK_USER,
|
|
644
|
+
payload={"prompt": question, "choices": choices, "allow_free_text": True},
|
|
645
|
+
result_key="_temp.user_response",
|
|
646
|
+
),
|
|
647
|
+
next_node="handle_user_response",
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
if name == "delegate_agent":
|
|
651
|
+
delegated_task = str(args.get("task") or "").strip()
|
|
652
|
+
delegated_context = str(args.get("context") or "").strip()
|
|
653
|
+
|
|
654
|
+
tools_raw = args.get("tools")
|
|
655
|
+
if tools_raw is None:
|
|
656
|
+
# Inherit the current allowlist, but avoid recursive delegation and avoid waiting on ask_user
|
|
657
|
+
# unless explicitly enabled.
|
|
658
|
+
child_allow = [t for t in allow if t not in {"delegate_agent", "ask_user"}]
|
|
659
|
+
else:
|
|
660
|
+
child_allow = _normalize_allowlist(tools_raw)
|
|
661
|
+
|
|
662
|
+
if not delegated_task:
|
|
663
|
+
temp["tool_results"] = {
|
|
664
|
+
"results": [
|
|
665
|
+
{
|
|
666
|
+
"call_id": str(tc.get("call_id") or ""),
|
|
667
|
+
"name": "delegate_agent",
|
|
668
|
+
"success": False,
|
|
669
|
+
"output": None,
|
|
670
|
+
"error": "delegate_agent requires a non-empty task",
|
|
671
|
+
}
|
|
672
|
+
]
|
|
673
|
+
}
|
|
674
|
+
return StepPlan(node_id="act", next_node="observe")
|
|
675
|
+
|
|
676
|
+
combined_task = delegated_task
|
|
677
|
+
if delegated_context:
|
|
678
|
+
combined_task = f"{delegated_task}\n\nContext:\n{delegated_context}"
|
|
679
|
+
|
|
680
|
+
sub_vars: Dict[str, Any] = {
|
|
681
|
+
"context": {"task": combined_task, "messages": []},
|
|
682
|
+
"_runtime": {
|
|
683
|
+
"allowed_tools": list(child_allow),
|
|
684
|
+
"system_prompt_extra": (
|
|
685
|
+
"You are a delegated sub-agent.\n"
|
|
686
|
+
"- Focus ONLY on the delegated task.\n"
|
|
687
|
+
"- Use ONLY the allowed tools when needed.\n"
|
|
688
|
+
"- Do not ask the user questions; if blocked, state assumptions and proceed.\n"
|
|
689
|
+
"- Return a concise result suitable for the parent agent to act on.\n"
|
|
690
|
+
),
|
|
691
|
+
},
|
|
692
|
+
"_limits": {"max_iterations": 10},
|
|
693
|
+
}
|
|
694
|
+
|
|
695
|
+
payload = {
|
|
696
|
+
"workflow_id": str(getattr(run, "workflow_id", "") or "codeact_agent"),
|
|
697
|
+
"vars": sub_vars,
|
|
698
|
+
"async": False,
|
|
699
|
+
"include_traces": False,
|
|
700
|
+
# Tool-mode wrapper so the parent receives a normal tool observation (no run failure on child failure).
|
|
701
|
+
"wrap_as_tool_result": True,
|
|
702
|
+
"tool_name": "delegate_agent",
|
|
703
|
+
"call_id": str(tc.get("call_id") or ""),
|
|
704
|
+
}
|
|
705
|
+
emit("delegate_agent", {"tools": list(child_allow), "call_id": payload.get("call_id")})
|
|
706
|
+
return StepPlan(
|
|
707
|
+
node_id="act",
|
|
708
|
+
effect=Effect(type=EffectType.START_SUBWORKFLOW, payload=payload, result_key="_temp.tool_results"),
|
|
709
|
+
next_node="observe",
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
if name == "recall_memory":
|
|
713
|
+
payload = dict(args)
|
|
714
|
+
payload.setdefault("tool_name", "recall_memory")
|
|
715
|
+
payload.setdefault("call_id", tc.get("call_id") or "memory")
|
|
716
|
+
emit("memory_query", {"query": payload.get("query"), "span_id": payload.get("span_id")})
|
|
717
|
+
return StepPlan(
|
|
718
|
+
node_id="act",
|
|
719
|
+
effect=Effect(type=EffectType.MEMORY_QUERY, payload=payload, result_key="_temp.tool_results"),
|
|
720
|
+
next_node="observe",
|
|
251
721
|
)
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
722
|
+
|
|
723
|
+
if name == "inspect_vars":
|
|
724
|
+
payload = dict(args)
|
|
725
|
+
payload.setdefault("tool_name", "inspect_vars")
|
|
726
|
+
payload.setdefault("call_id", tc.get("call_id") or "vars")
|
|
727
|
+
emit("vars_query", {"path": payload.get("path")})
|
|
728
|
+
return StepPlan(
|
|
729
|
+
node_id="act",
|
|
730
|
+
effect=Effect(type=EffectType.VARS_QUERY, payload=payload, result_key="_temp.tool_results"),
|
|
731
|
+
next_node="observe",
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
if name == "remember":
|
|
735
|
+
payload = dict(args)
|
|
736
|
+
payload.setdefault("tool_name", "remember")
|
|
737
|
+
payload.setdefault("call_id", tc.get("call_id") or "memory")
|
|
738
|
+
emit("memory_tag", {"span_id": payload.get("span_id"), "tags": payload.get("tags")})
|
|
739
|
+
return StepPlan(
|
|
740
|
+
node_id="act",
|
|
741
|
+
effect=Effect(type=EffectType.MEMORY_TAG, payload=payload, result_key="_temp.tool_results"),
|
|
742
|
+
next_node="observe",
|
|
255
743
|
)
|
|
256
744
|
|
|
745
|
+
if name == "remember_note":
|
|
746
|
+
payload = dict(args)
|
|
747
|
+
payload.setdefault("tool_name", "remember_note")
|
|
748
|
+
payload.setdefault("call_id", tc.get("call_id") or "memory")
|
|
749
|
+
emit("memory_note", {"note": payload.get("note"), "tags": payload.get("tags")})
|
|
750
|
+
return StepPlan(
|
|
751
|
+
node_id="act",
|
|
752
|
+
effect=Effect(type=EffectType.MEMORY_NOTE, payload=payload, result_key="_temp.tool_results"),
|
|
753
|
+
next_node="observe",
|
|
754
|
+
)
|
|
755
|
+
|
|
756
|
+
if name == "compact_memory":
|
|
757
|
+
payload = dict(args)
|
|
758
|
+
payload.setdefault("tool_name", "compact_memory")
|
|
759
|
+
payload.setdefault("call_id", tc.get("call_id") or "compact")
|
|
760
|
+
emit(
|
|
761
|
+
"memory_compact",
|
|
762
|
+
{
|
|
763
|
+
"preserve_recent": payload.get("preserve_recent"),
|
|
764
|
+
"mode": payload.get("compression_mode"),
|
|
765
|
+
"focus": payload.get("focus"),
|
|
766
|
+
},
|
|
767
|
+
)
|
|
768
|
+
return StepPlan(
|
|
769
|
+
node_id="act",
|
|
770
|
+
effect=Effect(type=EffectType.MEMORY_COMPACT, payload=payload, result_key="_temp.tool_results"),
|
|
771
|
+
next_node="observe",
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
if temp.get("pending_tool_calls"):
|
|
775
|
+
return StepPlan(node_id="act", next_node="act")
|
|
776
|
+
return StepPlan(node_id="act", next_node="reason")
|
|
777
|
+
|
|
778
|
+
batch: List[Dict[str, Any]] = []
|
|
779
|
+
for tc in tool_queue:
|
|
780
|
+
if _is_builtin(tc):
|
|
781
|
+
break
|
|
782
|
+
batch.append(tc)
|
|
783
|
+
|
|
784
|
+
remaining = tool_queue[len(batch) :]
|
|
785
|
+
temp["pending_tool_calls"] = list(remaining)
|
|
786
|
+
|
|
787
|
+
for tc in batch:
|
|
788
|
+
emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")})
|
|
789
|
+
|
|
790
|
+
formatted_calls: List[Dict[str, Any]] = []
|
|
791
|
+
for tc in batch:
|
|
792
|
+
formatted_calls.append(
|
|
793
|
+
{"name": tc.get("name", ""), "arguments": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")}
|
|
794
|
+
)
|
|
795
|
+
|
|
257
796
|
return StepPlan(
|
|
258
797
|
node_id="act",
|
|
259
798
|
effect=Effect(
|
|
260
799
|
type=EffectType.TOOL_CALLS,
|
|
261
|
-
payload={"tool_calls": formatted_calls},
|
|
800
|
+
payload={"tool_calls": formatted_calls, "allowed_tools": list(allow)},
|
|
262
801
|
result_key="_temp.tool_results",
|
|
263
802
|
),
|
|
264
803
|
next_node="observe",
|
|
265
804
|
)
|
|
266
805
|
|
|
267
806
|
def execute_code_node(run: RunState, ctx) -> StepPlan:
|
|
268
|
-
_, _,
|
|
807
|
+
_, _, runtime_ns, temp, _ = ensure_codeact_vars(run)
|
|
269
808
|
code = temp.get("pending_code")
|
|
270
809
|
if not isinstance(code, str) or not code.strip():
|
|
271
810
|
return StepPlan(node_id="execute_code", next_node="reason")
|
|
272
811
|
|
|
273
812
|
temp.pop("pending_code", None)
|
|
274
813
|
emit("act", {"tool": "execute_python", "args": {"code": "(inline)", "timeout_s": 10.0}})
|
|
814
|
+
allow = _effective_allowlist(runtime_ns)
|
|
275
815
|
|
|
276
816
|
return StepPlan(
|
|
277
817
|
node_id="execute_code",
|
|
@@ -284,7 +824,8 @@ def create_codeact_workflow(
|
|
|
284
824
|
"arguments": {"code": code, "timeout_s": 10.0},
|
|
285
825
|
"call_id": "code",
|
|
286
826
|
}
|
|
287
|
-
]
|
|
827
|
+
],
|
|
828
|
+
"allowed_tools": list(allow),
|
|
288
829
|
},
|
|
289
830
|
result_key="_temp.tool_results",
|
|
290
831
|
),
|
|
@@ -292,7 +833,7 @@ def create_codeact_workflow(
|
|
|
292
833
|
)
|
|
293
834
|
|
|
294
835
|
def observe_node(run: RunState, ctx) -> StepPlan:
|
|
295
|
-
context,
|
|
836
|
+
context, scratchpad, _, temp, _ = ensure_codeact_vars(run)
|
|
296
837
|
tool_results = temp.get("tool_results", {})
|
|
297
838
|
if not isinstance(tool_results, dict):
|
|
298
839
|
tool_results = {}
|
|
@@ -308,12 +849,30 @@ def create_codeact_workflow(
|
|
|
308
849
|
success = bool(r.get("success"))
|
|
309
850
|
output = r.get("output", "")
|
|
310
851
|
error = r.get("error", "")
|
|
852
|
+
# Prefer a tool-supplied human/LLM-friendly rendering when present.
|
|
853
|
+
def _display(v: Any) -> str:
|
|
854
|
+
if isinstance(v, dict):
|
|
855
|
+
rendered = v.get("rendered")
|
|
856
|
+
if isinstance(rendered, str) and rendered.strip():
|
|
857
|
+
return rendered.strip()
|
|
858
|
+
return "" if v is None else str(v)
|
|
859
|
+
|
|
860
|
+
display = _display(output)
|
|
861
|
+
if not success:
|
|
862
|
+
# Preserve structured outputs for provenance, but show a clean string to the LLM/UI.
|
|
863
|
+
display = _display(output) if isinstance(output, dict) else str(error or output)
|
|
311
864
|
rendered = logic.format_observation(
|
|
312
865
|
name=name,
|
|
313
|
-
output=
|
|
866
|
+
output=display,
|
|
314
867
|
success=success,
|
|
315
868
|
)
|
|
316
|
-
|
|
869
|
+
# Observability: avoid truncating normal tool results in step events.
|
|
870
|
+
# Keep a bounded preview for huge tool outputs to avoid bloating traces/ledgers.
|
|
871
|
+
preview = rendered
|
|
872
|
+
if len(preview) > 1000:
|
|
873
|
+
#[WARNING:TRUNCATION] bounded preview for observability payloads
|
|
874
|
+
preview = preview[:1000] + f"\n… (truncated, {len(rendered):,} chars total)"
|
|
875
|
+
emit("observe", {"tool": name, "success": success, "result": preview})
|
|
317
876
|
context["messages"].append(
|
|
318
877
|
_new_message(
|
|
319
878
|
ctx,
|
|
@@ -324,6 +883,12 @@ def create_codeact_workflow(
|
|
|
324
883
|
)
|
|
325
884
|
|
|
326
885
|
temp.pop("tool_results", None)
|
|
886
|
+
# Reset verifier/review rounds after executing tools so the verifier can run
|
|
887
|
+
# again on the next candidate answer.
|
|
888
|
+
scratchpad["review_count"] = 0
|
|
889
|
+
pending = temp.get("pending_tool_calls", [])
|
|
890
|
+
if isinstance(pending, list) and pending:
|
|
891
|
+
return StepPlan(node_id="observe", next_node="act")
|
|
327
892
|
temp["pending_tool_calls"] = []
|
|
328
893
|
return StepPlan(node_id="observe", next_node="reason")
|
|
329
894
|
|
|
@@ -342,6 +907,274 @@ def create_codeact_workflow(
|
|
|
342
907
|
return StepPlan(node_id="handle_user_response", next_node="act")
|
|
343
908
|
return StepPlan(node_id="handle_user_response", next_node="reason")
|
|
344
909
|
|
|
910
|
+
def maybe_review_node(run: RunState, ctx) -> StepPlan:
|
|
911
|
+
_, scratchpad, runtime_ns, _, _ = ensure_codeact_vars(run)
|
|
912
|
+
|
|
913
|
+
if not _flag(runtime_ns, "review_mode", default=False):
|
|
914
|
+
return StepPlan(node_id="maybe_review", next_node="done")
|
|
915
|
+
|
|
916
|
+
max_rounds = _int(runtime_ns, "review_max_rounds", default=1)
|
|
917
|
+
if max_rounds < 0:
|
|
918
|
+
max_rounds = 0
|
|
919
|
+
count = scratchpad.get("review_count")
|
|
920
|
+
try:
|
|
921
|
+
count_int = int(count or 0)
|
|
922
|
+
except Exception:
|
|
923
|
+
count_int = 0
|
|
924
|
+
|
|
925
|
+
if count_int >= max_rounds:
|
|
926
|
+
return StepPlan(node_id="maybe_review", next_node="done")
|
|
927
|
+
|
|
928
|
+
scratchpad["review_count"] = count_int + 1
|
|
929
|
+
return StepPlan(node_id="maybe_review", next_node="review")
|
|
930
|
+
|
|
931
|
+
def review_node(run: RunState, ctx) -> StepPlan:
|
|
932
|
+
context, scratchpad, runtime_ns, _, limits = ensure_codeact_vars(run)
|
|
933
|
+
task = str(context.get("task", "") or "")
|
|
934
|
+
plan = scratchpad.get("plan")
|
|
935
|
+
plan_text = str(plan).strip() if isinstance(plan, str) and plan.strip() else "(no plan)"
|
|
936
|
+
|
|
937
|
+
allow = _effective_allowlist(runtime_ns)
|
|
938
|
+
|
|
939
|
+
def _truncate_block(text: str, *, max_chars: int) -> str:
|
|
940
|
+
s = str(text or "")
|
|
941
|
+
if max_chars <= 0:
|
|
942
|
+
return s
|
|
943
|
+
if len(s) <= max_chars:
|
|
944
|
+
return s
|
|
945
|
+
suffix = f"\n… (truncated, {len(s):,} chars total)"
|
|
946
|
+
keep = max_chars - len(suffix)
|
|
947
|
+
if keep < 200:
|
|
948
|
+
keep = max_chars
|
|
949
|
+
suffix = ""
|
|
950
|
+
#[WARNING:TRUNCATION] bounded transcript blocks for prompt reconstruction
|
|
951
|
+
return s[:keep].rstrip() + suffix
|
|
952
|
+
|
|
953
|
+
def _format_allowed_tools() -> str:
|
|
954
|
+
specs = runtime_ns.get("tool_specs")
|
|
955
|
+
if not isinstance(specs, list) or not specs:
|
|
956
|
+
defs = _allowed_tool_defs(allow)
|
|
957
|
+
specs = [t.to_dict() for t in defs]
|
|
958
|
+
lines: list[str] = []
|
|
959
|
+
for spec in specs:
|
|
960
|
+
if not isinstance(spec, dict):
|
|
961
|
+
continue
|
|
962
|
+
name = str(spec.get("name") or "").strip()
|
|
963
|
+
if not name:
|
|
964
|
+
continue
|
|
965
|
+
params = spec.get("parameters")
|
|
966
|
+
props = params.get("properties", {}) if isinstance(params, dict) else {}
|
|
967
|
+
keys = sorted([k for k in props.keys() if isinstance(k, str)])
|
|
968
|
+
if keys:
|
|
969
|
+
lines.append(f"- {name}({', '.join(keys)})")
|
|
970
|
+
else:
|
|
971
|
+
lines.append(f"- {name}()")
|
|
972
|
+
return "\n".join(lines) if lines else "(no tools available)"
|
|
973
|
+
|
|
974
|
+
messages = list(context.get("messages") or [])
|
|
975
|
+
tool_msgs: list[str] = []
|
|
976
|
+
try:
|
|
977
|
+
tool_limit = int(limits.get("review_max_tool_output_chars", -1))
|
|
978
|
+
except Exception:
|
|
979
|
+
tool_limit = -1
|
|
980
|
+
try:
|
|
981
|
+
answer_limit = int(limits.get("review_max_answer_chars", -1))
|
|
982
|
+
except Exception:
|
|
983
|
+
answer_limit = -1
|
|
984
|
+
|
|
985
|
+
for m in reversed(messages):
|
|
986
|
+
if not isinstance(m, dict) or m.get("role") != "tool":
|
|
987
|
+
continue
|
|
988
|
+
content = m.get("content")
|
|
989
|
+
if isinstance(content, str) and content.strip():
|
|
990
|
+
tool_msgs.append(_truncate_block(content.strip(), max_chars=tool_limit))
|
|
991
|
+
if len(tool_msgs) >= 8:
|
|
992
|
+
break
|
|
993
|
+
tool_msgs.reverse()
|
|
994
|
+
observations = "\n\n".join(tool_msgs) if tool_msgs else "(no tool outputs)"
|
|
995
|
+
|
|
996
|
+
# Include recent user messages (especially ask_user responses) so the reviewer can
|
|
997
|
+
# avoid re-asking questions the user already answered.
|
|
998
|
+
try:
|
|
999
|
+
user_limit = int(limits.get("review_max_user_message_chars", -1))
|
|
1000
|
+
except Exception:
|
|
1001
|
+
user_limit = -1
|
|
1002
|
+
|
|
1003
|
+
user_msgs: list[str] = []
|
|
1004
|
+
ask_prompts: list[str] = []
|
|
1005
|
+
for m in reversed(messages):
|
|
1006
|
+
if not isinstance(m, dict):
|
|
1007
|
+
continue
|
|
1008
|
+
role = m.get("role")
|
|
1009
|
+
content = m.get("content")
|
|
1010
|
+
if role == "user" and isinstance(content, str) and content.strip():
|
|
1011
|
+
if content.strip() != task.strip():
|
|
1012
|
+
user_msgs.append(_truncate_block(content.strip(), max_chars=user_limit))
|
|
1013
|
+
if len(user_msgs) >= 4:
|
|
1014
|
+
break
|
|
1015
|
+
for m in reversed(messages):
|
|
1016
|
+
if not isinstance(m, dict):
|
|
1017
|
+
continue
|
|
1018
|
+
if m.get("role") != "assistant":
|
|
1019
|
+
continue
|
|
1020
|
+
meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
|
|
1021
|
+
if not isinstance(meta, dict) or meta.get("kind") != "ask_user_prompt":
|
|
1022
|
+
continue
|
|
1023
|
+
content = m.get("content")
|
|
1024
|
+
if isinstance(content, str) and content.strip():
|
|
1025
|
+
ask_prompts.append(_truncate_block(content.strip(), max_chars=user_limit))
|
|
1026
|
+
if len(ask_prompts) >= 4:
|
|
1027
|
+
break
|
|
1028
|
+
|
|
1029
|
+
user_msgs.reverse()
|
|
1030
|
+
ask_prompts.reverse()
|
|
1031
|
+
user_context = "\n\n".join(user_msgs) if user_msgs else "(no additional user messages)"
|
|
1032
|
+
asked_context = "\n\n".join(ask_prompts) if ask_prompts else "(no ask_user prompts recorded)"
|
|
1033
|
+
|
|
1034
|
+
answer_raw = str(run.vars.get("_temp", {}).get("final_answer") or "")
|
|
1035
|
+
answer_excerpt = ""
|
|
1036
|
+
if not tool_msgs and answer_raw.strip():
|
|
1037
|
+
answer_excerpt = _truncate_block(answer_raw.strip(), max_chars=answer_limit)
|
|
1038
|
+
|
|
1039
|
+
prompt = (
|
|
1040
|
+
"You are a verifier. Review whether the user's request has been fully satisfied.\n"
|
|
1041
|
+
"Be strict: only count actions that are supported by the tool outputs.\n"
|
|
1042
|
+
"If anything is missing, propose the NEXT ACTIONS.\n"
|
|
1043
|
+
"Prefer returning `next_tool_calls` over `next_prompt`.\n"
|
|
1044
|
+
"Return JSON ONLY.\n\n"
|
|
1045
|
+
f"User request:\n{task}\n\n"
|
|
1046
|
+
f"Plan:\n{plan_text}\n\n"
|
|
1047
|
+
f"Recent ask_user prompts:\n{asked_context}\n\n"
|
|
1048
|
+
f"Recent user messages:\n{user_context}\n\n"
|
|
1049
|
+
+ (f"Current answer (excerpt):\n{answer_excerpt}\n\n" if answer_excerpt else "")
|
|
1050
|
+
+ f"Tool outputs:\n{observations}\n\n"
|
|
1051
|
+
f"Allowed tools:\n{_format_allowed_tools()}\n\n"
|
|
1052
|
+
)
|
|
1053
|
+
|
|
1054
|
+
schema = {
|
|
1055
|
+
"type": "object",
|
|
1056
|
+
"properties": {
|
|
1057
|
+
"complete": {"type": "boolean"},
|
|
1058
|
+
"missing": {"type": "array", "items": {"type": "string"}},
|
|
1059
|
+
"next_prompt": {"type": "string"},
|
|
1060
|
+
"next_tool_calls": {
|
|
1061
|
+
"type": "array",
|
|
1062
|
+
"items": {
|
|
1063
|
+
"type": "object",
|
|
1064
|
+
"properties": {
|
|
1065
|
+
"name": {"type": "string"},
|
|
1066
|
+
"arguments": {"type": "object"},
|
|
1067
|
+
},
|
|
1068
|
+
"required": ["name", "arguments"],
|
|
1069
|
+
"additionalProperties": False,
|
|
1070
|
+
},
|
|
1071
|
+
},
|
|
1072
|
+
},
|
|
1073
|
+
"required": ["complete", "missing", "next_prompt", "next_tool_calls"],
|
|
1074
|
+
"additionalProperties": False,
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
emit("review_request", {"tool_messages": len(tool_msgs)})
|
|
1078
|
+
|
|
1079
|
+
payload: Dict[str, Any] = {
|
|
1080
|
+
"prompt": prompt,
|
|
1081
|
+
"response_schema": schema,
|
|
1082
|
+
"response_schema_name": "CodeActVerifier",
|
|
1083
|
+
"params": runtime_llm_params(runtime_ns, extra={"temperature": 0.2}),
|
|
1084
|
+
}
|
|
1085
|
+
media = extract_media_from_context(context)
|
|
1086
|
+
if media:
|
|
1087
|
+
payload["media"] = media
|
|
1088
|
+
sys = _system_prompt(runtime_ns)
|
|
1089
|
+
if sys is not None:
|
|
1090
|
+
payload["system_prompt"] = sys
|
|
1091
|
+
|
|
1092
|
+
return StepPlan(
|
|
1093
|
+
node_id="review",
|
|
1094
|
+
effect=Effect(
|
|
1095
|
+
type=EffectType.LLM_CALL,
|
|
1096
|
+
payload=payload,
|
|
1097
|
+
result_key="_temp.review_llm_response",
|
|
1098
|
+
),
|
|
1099
|
+
next_node="review_parse",
|
|
1100
|
+
)
|
|
1101
|
+
|
|
1102
|
+
def review_parse_node(run: RunState, ctx) -> StepPlan:
|
|
1103
|
+
_, _, runtime_ns, temp, _ = ensure_codeact_vars(run)
|
|
1104
|
+
resp = temp.get("review_llm_response", {})
|
|
1105
|
+
if not isinstance(resp, dict):
|
|
1106
|
+
resp = {}
|
|
1107
|
+
|
|
1108
|
+
data = resp.get("data")
|
|
1109
|
+
if data is None and isinstance(resp.get("content"), str):
|
|
1110
|
+
try:
|
|
1111
|
+
data = json.loads(resp["content"])
|
|
1112
|
+
except Exception:
|
|
1113
|
+
data = None
|
|
1114
|
+
if not isinstance(data, dict):
|
|
1115
|
+
data = {}
|
|
1116
|
+
|
|
1117
|
+
complete = bool(data.get("complete"))
|
|
1118
|
+
missing = data.get("missing") if isinstance(data.get("missing"), list) else []
|
|
1119
|
+
next_prompt = data.get("next_prompt")
|
|
1120
|
+
next_prompt_text = str(next_prompt or "").strip()
|
|
1121
|
+
next_tool_calls_raw = data.get("next_tool_calls")
|
|
1122
|
+
next_tool_calls: list[dict[str, Any]] = []
|
|
1123
|
+
if isinstance(next_tool_calls_raw, list):
|
|
1124
|
+
for item in next_tool_calls_raw:
|
|
1125
|
+
if not isinstance(item, dict):
|
|
1126
|
+
continue
|
|
1127
|
+
name = str(item.get("name") or "").strip()
|
|
1128
|
+
args = item.get("arguments")
|
|
1129
|
+
if not isinstance(args, dict):
|
|
1130
|
+
args = {}
|
|
1131
|
+
if name:
|
|
1132
|
+
next_tool_calls.append({"name": name, "arguments": args})
|
|
1133
|
+
|
|
1134
|
+
emit("review", {"complete": complete, "missing": missing})
|
|
1135
|
+
temp.pop("review_llm_response", None)
|
|
1136
|
+
|
|
1137
|
+
if complete:
|
|
1138
|
+
return StepPlan(node_id="review_parse", next_node="done")
|
|
1139
|
+
|
|
1140
|
+
if next_tool_calls:
|
|
1141
|
+
temp["pending_tool_calls"] = next_tool_calls
|
|
1142
|
+
emit("review_tool_calls", {"count": len(next_tool_calls)})
|
|
1143
|
+
return StepPlan(node_id="review_parse", next_node="act")
|
|
1144
|
+
|
|
1145
|
+
# Behavioral validation: if incomplete but no tool calls, re-ask reviewer once with stricter rules.
|
|
1146
|
+
if not complete and not next_tool_calls:
|
|
1147
|
+
try:
|
|
1148
|
+
retry_count = int(runtime_ns.get("review_retry_count") or 0)
|
|
1149
|
+
except Exception:
|
|
1150
|
+
retry_count = 0
|
|
1151
|
+
if retry_count < 1:
|
|
1152
|
+
runtime_ns["review_retry_count"] = retry_count + 1
|
|
1153
|
+
inbox = runtime_ns.get("inbox")
|
|
1154
|
+
if not isinstance(inbox, list):
|
|
1155
|
+
inbox = []
|
|
1156
|
+
runtime_ns["inbox"] = inbox
|
|
1157
|
+
inbox.append(
|
|
1158
|
+
{
|
|
1159
|
+
"content": (
|
|
1160
|
+
"[Review] Your last review output was not actionable. "
|
|
1161
|
+
"If incomplete, you MUST return at least one `next_tool_call` "
|
|
1162
|
+
"(use `ask_user` if you need clarification). Return JSON only."
|
|
1163
|
+
)
|
|
1164
|
+
}
|
|
1165
|
+
)
|
|
1166
|
+
emit("review_retry_unactionable", {"retry": retry_count + 1})
|
|
1167
|
+
return StepPlan(node_id="review_parse", next_node="review")
|
|
1168
|
+
|
|
1169
|
+
runtime_ns["review_retry_count"] = 0
|
|
1170
|
+
if next_prompt_text:
|
|
1171
|
+
inbox = runtime_ns.get("inbox")
|
|
1172
|
+
if not isinstance(inbox, list):
|
|
1173
|
+
inbox = []
|
|
1174
|
+
runtime_ns["inbox"] = inbox
|
|
1175
|
+
inbox.append({"content": f"[Review] {next_prompt_text}"})
|
|
1176
|
+
return StepPlan(node_id="review_parse", next_node="reason")
|
|
1177
|
+
|
|
345
1178
|
def done_node(run: RunState, ctx) -> StepPlan:
|
|
346
1179
|
context, scratchpad, _, temp, limits = ensure_codeact_vars(run)
|
|
347
1180
|
answer = str(temp.get("final_answer") or "No answer provided")
|
|
@@ -350,6 +1183,16 @@ def create_codeact_workflow(
|
|
|
350
1183
|
# Prefer _limits.current_iteration, fall back to scratchpad
|
|
351
1184
|
iterations = int(limits.get("current_iteration", 0) or scratchpad.get("iteration", 0) or 0)
|
|
352
1185
|
|
|
1186
|
+
# Persist the final answer into the conversation history so it becomes part of the
|
|
1187
|
+
# next run's seed context and shows up in /history.
|
|
1188
|
+
messages = context.get("messages")
|
|
1189
|
+
if isinstance(messages, list):
|
|
1190
|
+
last = messages[-1] if messages else None
|
|
1191
|
+
last_role = last.get("role") if isinstance(last, dict) else None
|
|
1192
|
+
last_content = last.get("content") if isinstance(last, dict) else None
|
|
1193
|
+
if last_role != "assistant" or str(last_content or "") != answer:
|
|
1194
|
+
messages.append(_new_message(ctx, role="assistant", content=answer, metadata={"kind": "final_answer"}))
|
|
1195
|
+
|
|
353
1196
|
return StepPlan(
|
|
354
1197
|
node_id="done",
|
|
355
1198
|
complete_output={
|
|
@@ -384,14 +1227,18 @@ def create_codeact_workflow(
|
|
|
384
1227
|
entry_node="init",
|
|
385
1228
|
nodes={
|
|
386
1229
|
"init": init_node,
|
|
1230
|
+
"plan": plan_node,
|
|
1231
|
+
"plan_parse": plan_parse_node,
|
|
387
1232
|
"reason": reason_node,
|
|
388
1233
|
"parse": parse_node,
|
|
389
1234
|
"act": act_node,
|
|
390
1235
|
"execute_code": execute_code_node,
|
|
391
1236
|
"observe": observe_node,
|
|
392
1237
|
"handle_user_response": handle_user_response_node,
|
|
1238
|
+
"maybe_review": maybe_review_node,
|
|
1239
|
+
"review": review_node,
|
|
1240
|
+
"review_parse": review_parse_node,
|
|
393
1241
|
"done": done_node,
|
|
394
1242
|
"max_iterations": max_iterations_node,
|
|
395
1243
|
},
|
|
396
1244
|
)
|
|
397
|
-
|