abstractagent 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractagent/adapters/__init__.py +2 -1
- abstractagent/adapters/codeact_runtime.py +823 -57
- abstractagent/adapters/memact_runtime.py +721 -0
- abstractagent/adapters/react_runtime.py +1114 -67
- abstractagent/agents/__init__.py +4 -0
- abstractagent/agents/base.py +58 -1
- abstractagent/agents/codeact.py +89 -18
- abstractagent/agents/memact.py +244 -0
- abstractagent/agents/react.py +91 -18
- abstractagent/logic/__init__.py +2 -0
- abstractagent/logic/builtins.py +212 -5
- abstractagent/logic/codeact.py +87 -80
- abstractagent/logic/memact.py +127 -0
- abstractagent/logic/react.py +108 -48
- abstractagent/repl.py +24 -447
- abstractagent/scripts/__init__.py +5 -0
- abstractagent/scripts/lmstudio_tool_eval.py +426 -0
- abstractagent/tools/__init__.py +3 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/METADATA +10 -11
- abstractagent-0.3.0.dist-info/RECORD +31 -0
- abstractagent/ui/__init__.py +0 -5
- abstractagent/ui/question.py +0 -197
- abstractagent-0.2.0.dist-info/RECORD +0 -28
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/WHEEL +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/entry_points.txt +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -9,10 +9,10 @@ from typing import Any, Callable, Dict, List, Optional
|
|
|
9
9
|
from abstractcore.tools import ToolCall
|
|
10
10
|
from abstractruntime import Effect, EffectType, RunState, StepPlan, WorkflowSpec
|
|
11
11
|
from abstractruntime.core.vars import ensure_limits, ensure_namespaces
|
|
12
|
+
from abstractruntime.memory.active_context import ActiveContextPolicy
|
|
12
13
|
|
|
13
14
|
from ..logic.react import ReActLogic
|
|
14
15
|
|
|
15
|
-
|
|
16
16
|
def _new_message(
|
|
17
17
|
ctx: Any,
|
|
18
18
|
*,
|
|
@@ -29,11 +29,16 @@ def _new_message(
|
|
|
29
29
|
|
|
30
30
|
timestamp = datetime.now(timezone.utc).isoformat()
|
|
31
31
|
|
|
32
|
+
import uuid
|
|
33
|
+
|
|
34
|
+
meta = dict(metadata or {})
|
|
35
|
+
meta.setdefault("message_id", f"msg_{uuid.uuid4().hex}")
|
|
36
|
+
|
|
32
37
|
return {
|
|
33
38
|
"role": role,
|
|
34
39
|
"content": content,
|
|
35
40
|
"timestamp": timestamp,
|
|
36
|
-
"metadata":
|
|
41
|
+
"metadata": meta,
|
|
37
42
|
}
|
|
38
43
|
|
|
39
44
|
|
|
@@ -89,6 +94,13 @@ def ensure_react_vars(run: RunState) -> tuple[Dict[str, Any], Dict[str, Any], Di
|
|
|
89
94
|
if scratchpad["max_iterations"] < 1:
|
|
90
95
|
scratchpad["max_iterations"] = 1
|
|
91
96
|
|
|
97
|
+
# Track whether any external tools were actually executed during this run.
|
|
98
|
+
# This is used to reliably trigger a final "synthesis" pass so the agent
|
|
99
|
+
# returns a user-facing answer instead of echoing tool observations.
|
|
100
|
+
used_tools = scratchpad.get("used_tools")
|
|
101
|
+
if not isinstance(used_tools, bool):
|
|
102
|
+
scratchpad["used_tools"] = bool(used_tools) if used_tools is not None else False
|
|
103
|
+
|
|
92
104
|
return context, scratchpad, runtime_ns, temp, limits
|
|
93
105
|
|
|
94
106
|
|
|
@@ -103,6 +115,10 @@ def create_react_workflow(
|
|
|
103
115
|
*,
|
|
104
116
|
logic: ReActLogic,
|
|
105
117
|
on_step: Optional[Callable[[str, Dict[str, Any]], None]] = None,
|
|
118
|
+
workflow_id: str = "react_agent",
|
|
119
|
+
provider: Optional[str] = None,
|
|
120
|
+
model: Optional[str] = None,
|
|
121
|
+
allowed_tools: Optional[List[str]] = None,
|
|
106
122
|
) -> WorkflowSpec:
|
|
107
123
|
"""Adapt ReActLogic to an AbstractRuntime workflow."""
|
|
108
124
|
|
|
@@ -110,9 +126,200 @@ def create_react_workflow(
|
|
|
110
126
|
if on_step:
|
|
111
127
|
on_step(step, data)
|
|
112
128
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
129
|
+
def _current_tool_defs() -> list[Any]:
|
|
130
|
+
"""Return the current tool definitions from the logic (dynamic)."""
|
|
131
|
+
defs = getattr(logic, "tools", None)
|
|
132
|
+
if not isinstance(defs, list):
|
|
133
|
+
try:
|
|
134
|
+
defs = list(defs) # type: ignore[arg-type]
|
|
135
|
+
except Exception:
|
|
136
|
+
defs = []
|
|
137
|
+
return [t for t in defs if getattr(t, "name", None)]
|
|
138
|
+
|
|
139
|
+
def _tool_by_name() -> dict[str, Any]:
|
|
140
|
+
out: dict[str, Any] = {}
|
|
141
|
+
for t in _current_tool_defs():
|
|
142
|
+
name = getattr(t, "name", None)
|
|
143
|
+
if isinstance(name, str) and name.strip():
|
|
144
|
+
out[name] = t
|
|
145
|
+
return out
|
|
146
|
+
|
|
147
|
+
def _default_allowlist() -> list[str]:
|
|
148
|
+
if isinstance(allowed_tools, list):
|
|
149
|
+
allow = [str(t).strip() for t in allowed_tools if isinstance(t, str) and t.strip()]
|
|
150
|
+
return allow if allow else []
|
|
151
|
+
# Default allowlist: all tools currently known to the logic (deduped, order preserved).
|
|
152
|
+
out: list[str] = []
|
|
153
|
+
seen: set[str] = set()
|
|
154
|
+
for t in _current_tool_defs():
|
|
155
|
+
name = getattr(t, "name", None)
|
|
156
|
+
if not isinstance(name, str) or not name.strip() or name in seen:
|
|
157
|
+
continue
|
|
158
|
+
seen.add(name)
|
|
159
|
+
out.append(name)
|
|
160
|
+
return out
|
|
161
|
+
|
|
162
|
+
def _normalize_allowlist(raw: Any) -> list[str]:
|
|
163
|
+
items: list[Any]
|
|
164
|
+
if isinstance(raw, list):
|
|
165
|
+
items = raw
|
|
166
|
+
elif isinstance(raw, tuple):
|
|
167
|
+
items = list(raw)
|
|
168
|
+
elif isinstance(raw, str):
|
|
169
|
+
items = [raw]
|
|
170
|
+
else:
|
|
171
|
+
items = []
|
|
172
|
+
|
|
173
|
+
out: list[str] = []
|
|
174
|
+
seen: set[str] = set()
|
|
175
|
+
current = _tool_by_name()
|
|
176
|
+
for t in items:
|
|
177
|
+
if not isinstance(t, str):
|
|
178
|
+
continue
|
|
179
|
+
name = t.strip()
|
|
180
|
+
if not name:
|
|
181
|
+
continue
|
|
182
|
+
if name in seen:
|
|
183
|
+
continue
|
|
184
|
+
# Only accept tool names known to the workflow's logic (dynamic).
|
|
185
|
+
if name not in current:
|
|
186
|
+
continue
|
|
187
|
+
seen.add(name)
|
|
188
|
+
out.append(name)
|
|
189
|
+
return out
|
|
190
|
+
|
|
191
|
+
def _effective_allowlist(runtime_ns: Dict[str, Any]) -> list[str]:
|
|
192
|
+
# Allow runtime vars to override tool selection (Visual Agent tools pin).
|
|
193
|
+
if isinstance(runtime_ns, dict) and "allowed_tools" in runtime_ns:
|
|
194
|
+
normalized = _normalize_allowlist(runtime_ns.get("allowed_tools"))
|
|
195
|
+
runtime_ns["allowed_tools"] = normalized
|
|
196
|
+
return normalized
|
|
197
|
+
return _normalize_allowlist(list(_default_allowlist()))
|
|
198
|
+
|
|
199
|
+
def _allowed_tool_defs(allow: list[str]) -> list[Any]:
|
|
200
|
+
out: list[Any] = []
|
|
201
|
+
current = _tool_by_name()
|
|
202
|
+
for name in allow:
|
|
203
|
+
tool = current.get(name)
|
|
204
|
+
if tool is not None:
|
|
205
|
+
out.append(tool)
|
|
206
|
+
return out
|
|
207
|
+
|
|
208
|
+
def _system_prompt(runtime_ns: Dict[str, Any]) -> Optional[str]:
|
|
209
|
+
raw = runtime_ns.get("system_prompt") if isinstance(runtime_ns, dict) else None
|
|
210
|
+
if isinstance(raw, str) and raw.strip():
|
|
211
|
+
return raw
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
def _sanitize_llm_messages(messages: Any, *, limits: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]:
|
|
215
|
+
"""Convert runtime-owned message dicts into OpenAI-style {role, content, ...}.
|
|
216
|
+
|
|
217
|
+
Runtime messages can include extra metadata fields (`timestamp`, `metadata`) that many providers
|
|
218
|
+
will reject. Keep only the fields the LLM API expects.
|
|
219
|
+
"""
|
|
220
|
+
if not isinstance(messages, list) or not messages:
|
|
221
|
+
return []
|
|
222
|
+
# Keep the LLM-visible context bounded even if the durable history contains large
|
|
223
|
+
# tool outputs or code dumps.
|
|
224
|
+
def _limit_int(key: str, default: int) -> int:
|
|
225
|
+
if not isinstance(limits, dict):
|
|
226
|
+
return default
|
|
227
|
+
try:
|
|
228
|
+
return int(limits.get(key, default))
|
|
229
|
+
except Exception:
|
|
230
|
+
return default
|
|
231
|
+
max_message_chars = _limit_int("max_message_chars", -1)
|
|
232
|
+
max_tool_message_chars = _limit_int("max_tool_message_chars", -1)
|
|
233
|
+
|
|
234
|
+
def _truncate(text: str, *, max_chars: int) -> str:
|
|
235
|
+
if max_chars <= 0:
|
|
236
|
+
return text
|
|
237
|
+
if len(text) <= max_chars:
|
|
238
|
+
return text
|
|
239
|
+
suffix = f"\n… (truncated, {len(text):,} chars total)"
|
|
240
|
+
keep = max_chars - len(suffix)
|
|
241
|
+
if keep < 200:
|
|
242
|
+
keep = max_chars
|
|
243
|
+
suffix = ""
|
|
244
|
+
return text[:keep].rstrip() + suffix
|
|
245
|
+
|
|
246
|
+
out: List[Dict[str, str]] = []
|
|
247
|
+
for m in messages:
|
|
248
|
+
if not isinstance(m, dict):
|
|
249
|
+
continue
|
|
250
|
+
role = str(m.get("role") or "").strip()
|
|
251
|
+
content = m.get("content")
|
|
252
|
+
if not role or content is None:
|
|
253
|
+
continue
|
|
254
|
+
content_str = str(content)
|
|
255
|
+
if not content_str.strip():
|
|
256
|
+
continue
|
|
257
|
+
limit = max_tool_message_chars if role == "tool" else max_message_chars
|
|
258
|
+
entry: Dict[str, str] = {"role": role, "content": _truncate(content_str, max_chars=limit)}
|
|
259
|
+
if role == "tool":
|
|
260
|
+
meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
|
|
261
|
+
call_id = meta.get("call_id") if isinstance(meta, dict) else None
|
|
262
|
+
if call_id is not None and str(call_id).strip():
|
|
263
|
+
# OpenAI-compatible servers accept `tool_call_id` for tool messages.
|
|
264
|
+
entry["tool_call_id"] = str(call_id).strip()
|
|
265
|
+
out.append(entry)
|
|
266
|
+
return out
|
|
267
|
+
|
|
268
|
+
def _flag(runtime_ns: Dict[str, Any], key: str, *, default: bool = False) -> bool:
|
|
269
|
+
if not isinstance(runtime_ns, dict) or key not in runtime_ns:
|
|
270
|
+
return bool(default)
|
|
271
|
+
val = runtime_ns.get(key)
|
|
272
|
+
if isinstance(val, bool):
|
|
273
|
+
return val
|
|
274
|
+
if isinstance(val, (int, float)):
|
|
275
|
+
return bool(val)
|
|
276
|
+
if isinstance(val, str):
|
|
277
|
+
lowered = val.strip().lower()
|
|
278
|
+
if lowered in ("1", "true", "yes", "on", "enabled"):
|
|
279
|
+
return True
|
|
280
|
+
if lowered in ("0", "false", "no", "off", "disabled"):
|
|
281
|
+
return False
|
|
282
|
+
return bool(default)
|
|
283
|
+
|
|
284
|
+
def _int(runtime_ns: Dict[str, Any], key: str, *, default: int) -> int:
|
|
285
|
+
if not isinstance(runtime_ns, dict) or key not in runtime_ns:
|
|
286
|
+
return int(default)
|
|
287
|
+
val = runtime_ns.get(key)
|
|
288
|
+
try:
|
|
289
|
+
return int(val) # type: ignore[arg-type]
|
|
290
|
+
except Exception:
|
|
291
|
+
return int(default)
|
|
292
|
+
|
|
293
|
+
def _extract_plan_update(content: str) -> Optional[str]:
|
|
294
|
+
"""Extract a plan update block from model content (best-effort).
|
|
295
|
+
|
|
296
|
+
Convention (prompted in Plan mode): the model appends a final section:
|
|
297
|
+
|
|
298
|
+
Plan Update:
|
|
299
|
+
- [ ] ...
|
|
300
|
+
- [x] ...
|
|
301
|
+
"""
|
|
302
|
+
if not isinstance(content, str) or not content.strip():
|
|
303
|
+
return None
|
|
304
|
+
import re
|
|
305
|
+
|
|
306
|
+
lines = content.splitlines()
|
|
307
|
+
header_idx: Optional[int] = None
|
|
308
|
+
for i, line in enumerate(lines):
|
|
309
|
+
if re.match(r"(?i)^\s*plan\s*update\s*:\s*$", line.strip()):
|
|
310
|
+
header_idx = i
|
|
311
|
+
if header_idx is None:
|
|
312
|
+
return None
|
|
313
|
+
plan_lines = lines[header_idx + 1 :]
|
|
314
|
+
while plan_lines and not plan_lines[0].strip():
|
|
315
|
+
plan_lines.pop(0)
|
|
316
|
+
plan_text = "\n".join(plan_lines).strip()
|
|
317
|
+
if not plan_text:
|
|
318
|
+
return None
|
|
319
|
+
# Require at least one bullet/numbered line to avoid accidental captures.
|
|
320
|
+
if not re.search(r"(?m)^\s*(?:[-*]|\d+\.)\s+", plan_text):
|
|
321
|
+
return None
|
|
322
|
+
return plan_text
|
|
116
323
|
|
|
117
324
|
def init_node(run: RunState, ctx) -> StepPlan:
|
|
118
325
|
context, scratchpad, runtime_ns, _, limits = ensure_react_vars(run)
|
|
@@ -127,13 +334,75 @@ def create_react_workflow(
|
|
|
127
334
|
messages.append(_new_message(ctx, role="user", content=task))
|
|
128
335
|
|
|
129
336
|
# Ensure toolset metadata is present for audit/debug.
|
|
130
|
-
runtime_ns
|
|
131
|
-
|
|
337
|
+
allow = _effective_allowlist(runtime_ns)
|
|
338
|
+
allowed_defs = _allowed_tool_defs(allow)
|
|
339
|
+
tool_specs = [t.to_dict() for t in allowed_defs]
|
|
340
|
+
runtime_ns["tool_specs"] = tool_specs
|
|
341
|
+
runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
|
|
342
|
+
runtime_ns.setdefault("allowed_tools", allow)
|
|
132
343
|
runtime_ns.setdefault("inbox", [])
|
|
133
344
|
|
|
134
345
|
emit("init", {"task": task})
|
|
346
|
+
if _flag(runtime_ns, "plan_mode", default=False) and not isinstance(scratchpad.get("plan"), str):
|
|
347
|
+
return StepPlan(node_id="init", next_node="plan")
|
|
135
348
|
return StepPlan(node_id="init", next_node="reason")
|
|
136
349
|
|
|
350
|
+
def plan_node(run: RunState, ctx) -> StepPlan:
|
|
351
|
+
context, scratchpad, runtime_ns, _, _ = ensure_react_vars(run)
|
|
352
|
+
task = str(context.get("task", "") or "")
|
|
353
|
+
|
|
354
|
+
allow = _effective_allowlist(runtime_ns)
|
|
355
|
+
|
|
356
|
+
prompt = (
|
|
357
|
+
"You are preparing a high-level execution plan for the user's request.\n"
|
|
358
|
+
"Return a concise TODO list (5–12 steps) that is actionable and verifiable.\n"
|
|
359
|
+
"Do not call tools yet. Do not include role prefixes like 'assistant:'.\n\n"
|
|
360
|
+
f"User request:\n{task}\n\n"
|
|
361
|
+
"Plan (markdown checklist):\n"
|
|
362
|
+
"- [ ] ...\n"
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
emit("plan_request", {"tools": allow})
|
|
366
|
+
|
|
367
|
+
payload: Dict[str, Any] = {"prompt": prompt, "params": {"temperature": 0.2}}
|
|
368
|
+
sys = _system_prompt(runtime_ns)
|
|
369
|
+
if isinstance(sys, str) and sys.strip():
|
|
370
|
+
payload["system_prompt"] = sys
|
|
371
|
+
eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
|
|
372
|
+
eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
|
|
373
|
+
if isinstance(eff_provider, str) and eff_provider.strip():
|
|
374
|
+
payload["provider"] = eff_provider.strip()
|
|
375
|
+
if isinstance(eff_model, str) and eff_model.strip():
|
|
376
|
+
payload["model"] = eff_model.strip()
|
|
377
|
+
|
|
378
|
+
return StepPlan(
|
|
379
|
+
node_id="plan",
|
|
380
|
+
effect=Effect(
|
|
381
|
+
type=EffectType.LLM_CALL,
|
|
382
|
+
payload=payload,
|
|
383
|
+
result_key="_temp.plan_llm_response",
|
|
384
|
+
),
|
|
385
|
+
next_node="plan_parse",
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
def plan_parse_node(run: RunState, ctx) -> StepPlan:
|
|
389
|
+
context, scratchpad, _, temp, _ = ensure_react_vars(run)
|
|
390
|
+
resp = temp.get("plan_llm_response", {})
|
|
391
|
+
if not isinstance(resp, dict):
|
|
392
|
+
resp = {}
|
|
393
|
+
plan_text = resp.get("content")
|
|
394
|
+
plan = "" if plan_text is None else str(plan_text).strip()
|
|
395
|
+
if not plan and isinstance(resp.get("data"), dict):
|
|
396
|
+
plan = json.dumps(resp.get("data"), ensure_ascii=False, indent=2).strip()
|
|
397
|
+
|
|
398
|
+
scratchpad["plan"] = plan
|
|
399
|
+
temp.pop("plan_llm_response", None)
|
|
400
|
+
|
|
401
|
+
if plan:
|
|
402
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=plan, metadata={"kind": "plan"}))
|
|
403
|
+
emit("plan", {"plan": plan})
|
|
404
|
+
return StepPlan(node_id="plan_parse", next_node="reason")
|
|
405
|
+
|
|
137
406
|
def reason_node(run: RunState, ctx) -> StepPlan:
|
|
138
407
|
context, scratchpad, runtime_ns, _, limits = ensure_react_vars(run)
|
|
139
408
|
|
|
@@ -157,7 +426,19 @@ def create_react_workflow(
|
|
|
157
426
|
limits["current_iteration"] = iteration + 1
|
|
158
427
|
|
|
159
428
|
task = str(context.get("task", "") or "")
|
|
160
|
-
|
|
429
|
+
messages_view = ActiveContextPolicy.select_active_messages_for_llm_from_run(run)
|
|
430
|
+
|
|
431
|
+
# Refresh tool metadata BEFORE rendering Active Memory so token fitting stays accurate
|
|
432
|
+
# (even though we do not render a "Tools (session)" block into Active Memory prompts).
|
|
433
|
+
allow = _effective_allowlist(runtime_ns)
|
|
434
|
+
allowed_defs = _allowed_tool_defs(allow)
|
|
435
|
+
tool_specs = [t.to_dict() for t in allowed_defs]
|
|
436
|
+
include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
|
|
437
|
+
if not include_examples:
|
|
438
|
+
tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
|
|
439
|
+
runtime_ns["tool_specs"] = tool_specs
|
|
440
|
+
runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
|
|
441
|
+
runtime_ns.setdefault("allowed_tools", allow)
|
|
161
442
|
|
|
162
443
|
inbox = runtime_ns.get("inbox", [])
|
|
163
444
|
guidance = ""
|
|
@@ -165,10 +446,9 @@ def create_react_workflow(
|
|
|
165
446
|
inbox_messages = [str(m.get("content", "") or "") for m in inbox if isinstance(m, dict)]
|
|
166
447
|
guidance = " | ".join([m for m in inbox_messages if m])
|
|
167
448
|
runtime_ns["inbox"] = []
|
|
168
|
-
|
|
169
449
|
req = logic.build_request(
|
|
170
450
|
task=task,
|
|
171
|
-
messages=
|
|
451
|
+
messages=messages_view,
|
|
172
452
|
guidance=guidance,
|
|
173
453
|
iteration=iteration + 1,
|
|
174
454
|
max_iterations=max_iterations,
|
|
@@ -177,9 +457,34 @@ def create_react_workflow(
|
|
|
177
457
|
|
|
178
458
|
emit("reason", {"iteration": iteration + 1, "max_iterations": max_iterations, "has_guidance": bool(guidance)})
|
|
179
459
|
|
|
180
|
-
|
|
460
|
+
# Provide the selected active-context messages as proper chat messages (sanitized).
|
|
461
|
+
#
|
|
462
|
+
# IMPORTANT: When we send `messages`, do not also send a non-empty `prompt`.
|
|
463
|
+
# Some providers/servers will append `prompt` as an extra user message even when the
|
|
464
|
+
# current request is already present in `messages`, which duplicates user turns and
|
|
465
|
+
# wastes context budget.
|
|
466
|
+
payload: Dict[str, Any] = {"prompt": ""}
|
|
467
|
+
payload["messages"] = _sanitize_llm_messages(messages_view, limits=limits)
|
|
468
|
+
tools_payload = list(tool_specs)
|
|
469
|
+
if tools_payload:
|
|
470
|
+
payload["tools"] = tools_payload
|
|
471
|
+
sys = _system_prompt(runtime_ns) or req.system_prompt
|
|
472
|
+
if isinstance(sys, str) and sys.strip():
|
|
473
|
+
payload["system_prompt"] = sys
|
|
474
|
+
# Provider/model can be configured statically (create_react_workflow args)
|
|
475
|
+
# or injected dynamically through durable vars in `_runtime` (Visual Agent pins).
|
|
476
|
+
eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
|
|
477
|
+
eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
|
|
478
|
+
if isinstance(eff_provider, str) and eff_provider.strip():
|
|
479
|
+
payload["provider"] = eff_provider.strip()
|
|
480
|
+
if isinstance(eff_model, str) and eff_model.strip():
|
|
481
|
+
payload["model"] = eff_model.strip()
|
|
482
|
+
params: Dict[str, Any] = {}
|
|
181
483
|
if req.max_tokens is not None:
|
|
182
|
-
|
|
484
|
+
params["max_tokens"] = req.max_tokens
|
|
485
|
+
# Tool calling is formatting-sensitive; bias toward deterministic output when tools are present.
|
|
486
|
+
params["temperature"] = 0.2 if tools_payload else 0.7
|
|
487
|
+
payload["params"] = params
|
|
183
488
|
|
|
184
489
|
return StepPlan(
|
|
185
490
|
node_id="reason",
|
|
@@ -191,96 +496,525 @@ def create_react_workflow(
|
|
|
191
496
|
next_node="parse",
|
|
192
497
|
)
|
|
193
498
|
|
|
499
|
+
def tool_retry_minimal_node(run: RunState, ctx) -> StepPlan:
|
|
500
|
+
"""Recovery path when the model fabricates `observation[...]` logs instead of calling tools.
|
|
501
|
+
|
|
502
|
+
This intentionally sends a minimal prompt (no History/Scratchpad) to reduce
|
|
503
|
+
long-context contamination and force either a real tool call or a direct answer.
|
|
504
|
+
"""
|
|
505
|
+
context, scratchpad, runtime_ns, temp, _ = ensure_react_vars(run)
|
|
506
|
+
task = str(context.get("task", "") or "")
|
|
507
|
+
|
|
508
|
+
allow = _effective_allowlist(runtime_ns)
|
|
509
|
+
allowed_defs = _allowed_tool_defs(allow)
|
|
510
|
+
tool_specs = [t.to_dict() for t in allowed_defs]
|
|
511
|
+
include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
|
|
512
|
+
if not include_examples:
|
|
513
|
+
tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
|
|
514
|
+
runtime_ns["tool_specs"] = tool_specs
|
|
515
|
+
runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
|
|
516
|
+
runtime_ns.setdefault("allowed_tools", allow)
|
|
517
|
+
# Reuse the canonical agent rules from ReActLogic (but do not include history in prompt).
|
|
518
|
+
sys_req = logic.build_request(task=task, messages=[], guidance="", iteration=0, max_iterations=0, vars=run.vars)
|
|
519
|
+
|
|
520
|
+
bad_excerpt = str(temp.get("tool_retry_bad_content") or "").strip()
|
|
521
|
+
temp.pop("tool_retry_bad_content", None)
|
|
522
|
+
if len(bad_excerpt) > 240:
|
|
523
|
+
bad_excerpt = bad_excerpt[:240].rstrip() + "…"
|
|
524
|
+
|
|
525
|
+
prompt = (
|
|
526
|
+
"Task:\n"
|
|
527
|
+
f"{task}\n\n"
|
|
528
|
+
"Your previous message was invalid: it contained fabricated `observation[...]` tool logs, but no tool was called.\n\n"
|
|
529
|
+
"Now do ONE of the following:\n"
|
|
530
|
+
"1) If you need more information to answer correctly, CALL ONE OR MORE TOOLS now using the required tool call format.\n"
|
|
531
|
+
"2) If you can answer without tools, answer directly WITHOUT mentioning any tool calls or observations.\n\n"
|
|
532
|
+
"Rules:\n"
|
|
533
|
+
"- Do NOT write `observation[` anywhere.\n"
|
|
534
|
+
"- Do NOT fabricate tool results.\n"
|
|
535
|
+
"- If you call tools, output ONLY tool call block(s) (no extra text).\n"
|
|
536
|
+
"- You MAY batch multiple tool calls by repeating the tool-call block once per call (prefer independent calls).\n"
|
|
537
|
+
)
|
|
538
|
+
if bad_excerpt:
|
|
539
|
+
prompt += f"\nBad output excerpt (do not copy):\n{bad_excerpt}\n"
|
|
540
|
+
|
|
541
|
+
payload: Dict[str, Any] = {"prompt": prompt}
|
|
542
|
+
if tool_specs:
|
|
543
|
+
payload["tools"] = tool_specs
|
|
544
|
+
sys = _system_prompt(runtime_ns) or sys_req.system_prompt
|
|
545
|
+
if isinstance(sys, str) and sys.strip():
|
|
546
|
+
payload["system_prompt"] = sys
|
|
547
|
+
|
|
548
|
+
eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
|
|
549
|
+
eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
|
|
550
|
+
if isinstance(eff_provider, str) and eff_provider.strip():
|
|
551
|
+
payload["provider"] = eff_provider.strip()
|
|
552
|
+
if isinstance(eff_model, str) and eff_model.strip():
|
|
553
|
+
payload["model"] = eff_model.strip()
|
|
554
|
+
|
|
555
|
+
payload["params"] = {"temperature": 0.2}
|
|
556
|
+
|
|
557
|
+
emit("tool_retry_minimal", {"tools": allow, "has_excerpt": bool(bad_excerpt)})
|
|
558
|
+
return StepPlan(
|
|
559
|
+
node_id="tool_retry_minimal",
|
|
560
|
+
effect=Effect(
|
|
561
|
+
type=EffectType.LLM_CALL,
|
|
562
|
+
payload=payload,
|
|
563
|
+
result_key="_temp.llm_response",
|
|
564
|
+
),
|
|
565
|
+
next_node="parse",
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
def empty_response_retry_node(run: RunState, ctx) -> StepPlan:
|
|
569
|
+
"""Recovery path when the model returns an empty message (no content, no tool calls).
|
|
570
|
+
|
|
571
|
+
This is treated as an invalid agent step. We re-prompt with the original task plus
|
|
572
|
+
recent tool evidence and explicitly require either tool calls or a substantive answer.
|
|
573
|
+
"""
|
|
574
|
+
context, scratchpad, runtime_ns, _, _ = ensure_react_vars(run)
|
|
575
|
+
task = str(context.get("task", "") or "")
|
|
576
|
+
|
|
577
|
+
allow = _effective_allowlist(runtime_ns)
|
|
578
|
+
allowed_defs = _allowed_tool_defs(allow)
|
|
579
|
+
tool_specs = [t.to_dict() for t in allowed_defs]
|
|
580
|
+
include_examples = bool(runtime_ns.get("tool_prompt_examples", True))
|
|
581
|
+
if not include_examples:
|
|
582
|
+
tool_specs = [{k: v for k, v in spec.items() if k != "examples"} for spec in tool_specs if isinstance(spec, dict)]
|
|
583
|
+
runtime_ns["tool_specs"] = tool_specs
|
|
584
|
+
runtime_ns["toolset_id"] = _compute_toolset_id(tool_specs)
|
|
585
|
+
runtime_ns.setdefault("allowed_tools", allow)
|
|
586
|
+
|
|
587
|
+
# Include recent tool outputs and user messages as evidence (bounded).
|
|
588
|
+
messages = list(context.get("messages") or [])
|
|
589
|
+
evidence_lines: list[str] = []
|
|
590
|
+
tool_count = 0
|
|
591
|
+
user_count = 0
|
|
592
|
+
for m in reversed(messages):
|
|
593
|
+
if not isinstance(m, dict):
|
|
594
|
+
continue
|
|
595
|
+
role = m.get("role")
|
|
596
|
+
content = m.get("content")
|
|
597
|
+
if role == "tool" and isinstance(content, str) and content.strip():
|
|
598
|
+
evidence_lines.append(content.strip())
|
|
599
|
+
tool_count += 1
|
|
600
|
+
elif role == "user" and isinstance(content, str) and content.strip():
|
|
601
|
+
# Avoid duplicating the original task.
|
|
602
|
+
if content.strip() != task.strip():
|
|
603
|
+
evidence_lines.append(content.strip())
|
|
604
|
+
user_count += 1
|
|
605
|
+
if tool_count >= 6 and user_count >= 2:
|
|
606
|
+
break
|
|
607
|
+
evidence_lines.reverse()
|
|
608
|
+
evidence = "\n\n".join(evidence_lines) if evidence_lines else "(no prior evidence captured)"
|
|
609
|
+
|
|
610
|
+
# Build a strong corrective prompt. Prefer tools; allow a direct answer if truly possible.
|
|
611
|
+
prompt = (
|
|
612
|
+
"The previous assistant message was EMPTY (no content and no tool calls). This is invalid.\n"
|
|
613
|
+
"Recover by continuing the task using the evidence below.\n\n"
|
|
614
|
+
f"Task:\n{task}\n\n"
|
|
615
|
+
f"Evidence (recent tool outputs + user messages):\n{evidence}\n\n"
|
|
616
|
+
"Now do EXACTLY ONE of the following:\n"
|
|
617
|
+
"1) CALL one or more tools to make progress (preferred).\n"
|
|
618
|
+
"2) If you already have enough evidence, provide a concise final answer.\n\n"
|
|
619
|
+
"Rules:\n"
|
|
620
|
+
"- Do not output an empty message.\n"
|
|
621
|
+
"- Do not ask the user a question in plain text; use the `ask_user` tool.\n"
|
|
622
|
+
"- If you call tools, include the tool call(s) directly (no preamble).\n"
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
payload: Dict[str, Any] = {"prompt": prompt}
|
|
626
|
+
if tool_specs:
|
|
627
|
+
payload["tools"] = list(tool_specs)
|
|
628
|
+
sys = _system_prompt(runtime_ns)
|
|
629
|
+
if isinstance(sys, str) and sys.strip():
|
|
630
|
+
payload["system_prompt"] = sys
|
|
631
|
+
eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
|
|
632
|
+
eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
|
|
633
|
+
if isinstance(eff_provider, str) and eff_provider.strip():
|
|
634
|
+
payload["provider"] = eff_provider.strip()
|
|
635
|
+
if isinstance(eff_model, str) and eff_model.strip():
|
|
636
|
+
payload["model"] = eff_model.strip()
|
|
637
|
+
payload["params"] = {"temperature": 0.2}
|
|
638
|
+
|
|
639
|
+
emit("empty_response_retry", {"tools": allow, "evidence": bool(evidence_lines)})
|
|
640
|
+
return StepPlan(
|
|
641
|
+
node_id="empty_response_retry",
|
|
642
|
+
effect=Effect(type=EffectType.LLM_CALL, payload=payload, result_key="_temp.llm_response"),
|
|
643
|
+
next_node="parse",
|
|
644
|
+
)
|
|
645
|
+
|
|
194
646
|
def parse_node(run: RunState, ctx) -> StepPlan:
|
|
195
|
-
context,
|
|
647
|
+
context, scratchpad, runtime_ns, temp, _ = ensure_react_vars(run)
|
|
196
648
|
response = temp.get("llm_response", {})
|
|
197
649
|
content, tool_calls = logic.parse_response(response)
|
|
198
650
|
|
|
199
|
-
|
|
651
|
+
def _sanitize_tool_call_content(text: str) -> str:
|
|
652
|
+
"""Remove tool-transcript markers from assistant content before persisting to history.
|
|
653
|
+
|
|
654
|
+
Some OSS models may include internal transcript artifacts (e.g. fabricated
|
|
655
|
+
`observation[...]` lines) or embed the tool call itself inside the message
|
|
656
|
+
(`Action:` blocks). We keep only the user-facing prose that appears *before*
|
|
657
|
+
such markers so the runtime doesn't persist fabricated logs into context.
|
|
658
|
+
"""
|
|
659
|
+
if not isinstance(text, str) or not text.strip():
|
|
660
|
+
return ""
|
|
661
|
+
out_lines: list[str] = []
|
|
662
|
+
for line in text.splitlines():
|
|
663
|
+
lowered = line.lstrip().lower()
|
|
664
|
+
if lowered.startswith("observation["):
|
|
665
|
+
break
|
|
666
|
+
if lowered.startswith("action:"):
|
|
667
|
+
break
|
|
668
|
+
if lowered.startswith("<|tool_call|>") or lowered.startswith("<tool_call>"):
|
|
669
|
+
break
|
|
670
|
+
if lowered.startswith("```tool_call") or lowered.startswith("```tool_code"):
|
|
671
|
+
break
|
|
672
|
+
out_lines.append(line)
|
|
673
|
+
return "\n".join(out_lines).rstrip()
|
|
674
|
+
|
|
675
|
+
def _should_retry_for_missing_tool_call(text: str) -> bool:
|
|
676
|
+
if not isinstance(text, str) or not text.strip():
|
|
677
|
+
return False
|
|
678
|
+
# Some models echo our internal History formatting (e.g. `observation[web_search] (success): ...`)
|
|
679
|
+
# as transcript lines. Treat only *line-start* occurrences as suspicious (avoid false positives
|
|
680
|
+
# in JSON/code blocks), and only use this signal when no tools have actually run yet.
|
|
681
|
+
for line in text.splitlines():
|
|
682
|
+
if line.lstrip().lower().startswith("observation["):
|
|
683
|
+
return True
|
|
684
|
+
return False
|
|
685
|
+
|
|
686
|
+
def _extract_final_answer(text: str) -> tuple[bool, str]:
|
|
687
|
+
"""Return (is_explicit_final, stripped_answer)."""
|
|
688
|
+
if not isinstance(text, str) or not text.strip():
|
|
689
|
+
return False, ""
|
|
690
|
+
s = text.lstrip()
|
|
691
|
+
if s.upper().startswith("FINAL:"):
|
|
692
|
+
return True, s[len("FINAL:") :].lstrip()
|
|
693
|
+
return False, text
|
|
200
694
|
|
|
201
695
|
emit(
|
|
202
696
|
"parse",
|
|
203
697
|
{
|
|
204
698
|
"has_tool_calls": bool(tool_calls),
|
|
205
|
-
"
|
|
699
|
+
"content": content,
|
|
700
|
+
"tool_calls": [{"name": tc.name, "arguments": tc.arguments, "call_id": tc.call_id} for tc in tool_calls],
|
|
206
701
|
},
|
|
207
702
|
)
|
|
208
703
|
temp.pop("llm_response", None)
|
|
209
704
|
|
|
705
|
+
# Reset retry counter on any successful tool-call detection.
|
|
210
706
|
if tool_calls:
|
|
707
|
+
scratchpad["tool_retry_count"] = 0
|
|
708
|
+
scratchpad["tool_retry_minimal_used"] = False
|
|
709
|
+
|
|
710
|
+
if tool_calls:
|
|
711
|
+
clean = _sanitize_tool_call_content(content)
|
|
712
|
+
if clean.strip():
|
|
713
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=clean))
|
|
714
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
715
|
+
updated = _extract_plan_update(clean)
|
|
716
|
+
if isinstance(updated, str) and updated.strip():
|
|
717
|
+
scratchpad["plan"] = updated.strip()
|
|
211
718
|
temp["pending_tool_calls"] = [tc.__dict__ for tc in tool_calls]
|
|
212
719
|
return StepPlan(node_id="parse", next_node="act")
|
|
213
720
|
|
|
214
|
-
|
|
215
|
-
|
|
721
|
+
# Empty response is an invalid step: recover with a bounded retry that carries evidence.
|
|
722
|
+
if not isinstance(content, str) or not content.strip():
|
|
723
|
+
try:
|
|
724
|
+
empty_retries = int(scratchpad.get("empty_response_retry_count") or 0)
|
|
725
|
+
except Exception:
|
|
726
|
+
empty_retries = 0
|
|
727
|
+
|
|
728
|
+
if empty_retries < 2:
|
|
729
|
+
scratchpad["empty_response_retry_count"] = empty_retries + 1
|
|
730
|
+
emit("parse_retry_empty_response", {"retries": empty_retries + 1})
|
|
731
|
+
return StepPlan(node_id="parse", next_node="empty_response_retry")
|
|
732
|
+
|
|
733
|
+
safe = (
|
|
734
|
+
"I can't proceed: the model repeatedly returned empty outputs (no content, no tool calls).\n"
|
|
735
|
+
"Please retry, reduce context, or switch models."
|
|
736
|
+
)
|
|
737
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=safe, metadata={"kind": "error"}))
|
|
738
|
+
temp["final_answer"] = safe
|
|
739
|
+
temp["pending_tool_calls"] = []
|
|
740
|
+
scratchpad["empty_response_retry_count"] = 0
|
|
741
|
+
return StepPlan(node_id="parse", next_node="maybe_review")
|
|
742
|
+
|
|
743
|
+
# If the model appears to have produced a fake "observation[tool]" transcript instead of
|
|
744
|
+
# calling tools, give it one corrective retry before treating the message as final.
|
|
745
|
+
if not bool(scratchpad.get("used_tools")) and _should_retry_for_missing_tool_call(content):
|
|
746
|
+
try:
|
|
747
|
+
retries = int(scratchpad.get("tool_retry_count") or 0)
|
|
748
|
+
except Exception:
|
|
749
|
+
retries = 0
|
|
750
|
+
if retries < 2:
|
|
751
|
+
scratchpad["tool_retry_count"] = retries + 1
|
|
752
|
+
inbox = runtime_ns.get("inbox")
|
|
753
|
+
if not isinstance(inbox, list):
|
|
754
|
+
inbox = []
|
|
755
|
+
runtime_ns["inbox"] = inbox
|
|
756
|
+
inbox.append(
|
|
757
|
+
{
|
|
758
|
+
"role": "system",
|
|
759
|
+
"content": (
|
|
760
|
+
"You wrote an `observation[...]` line, but no tool was actually called.\n"
|
|
761
|
+
"Do NOT fabricate tool outputs.\n"
|
|
762
|
+
"If you need to search/fetch/read/write, CALL a tool now using the required tool call format.\n"
|
|
763
|
+
"Never output `observation[...]` markers; those are context-only."
|
|
764
|
+
),
|
|
765
|
+
}
|
|
766
|
+
)
|
|
767
|
+
emit("parse_retry_missing_tool_call", {"retries": retries + 1})
|
|
768
|
+
return StepPlan(node_id="parse", next_node="reason")
|
|
769
|
+
|
|
770
|
+
# If the model still fails after retries, attempt a single minimal-context recovery call
|
|
771
|
+
# instead of accepting a fabricated transcript as the final answer.
|
|
772
|
+
if not bool(scratchpad.get("tool_retry_minimal_used")):
|
|
773
|
+
scratchpad["tool_retry_minimal_used"] = True
|
|
774
|
+
scratchpad["tool_retry_count"] = 0
|
|
775
|
+
temp["tool_retry_bad_content"] = content
|
|
776
|
+
emit("parse_retry_minimal_context", {"retries": retries})
|
|
777
|
+
return StepPlan(node_id="parse", next_node="tool_retry_minimal")
|
|
778
|
+
|
|
779
|
+
safe = (
|
|
780
|
+
"I can't proceed safely: the model repeatedly produced fabricated `observation[...]` tool logs instead of calling tools.\n"
|
|
781
|
+
"Please retry, reduce context, or switch models."
|
|
782
|
+
)
|
|
783
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=safe, metadata={"kind": "error"}))
|
|
784
|
+
temp["final_answer"] = safe
|
|
785
|
+
scratchpad["tool_retry_count"] = 0
|
|
786
|
+
return StepPlan(node_id="parse", next_node="maybe_review")
|
|
787
|
+
|
|
788
|
+
final_raw = _sanitize_tool_call_content(content)
|
|
789
|
+
if not final_raw.strip():
|
|
790
|
+
final_raw = str(content or "").strip()
|
|
791
|
+
|
|
792
|
+
is_final, final_text = _extract_final_answer(final_raw)
|
|
793
|
+
if is_final:
|
|
794
|
+
if final_text:
|
|
795
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=final_text))
|
|
796
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
797
|
+
updated = _extract_plan_update(final_text)
|
|
798
|
+
if isinstance(updated, str) and updated.strip():
|
|
799
|
+
scratchpad["plan"] = updated.strip()
|
|
800
|
+
temp["final_answer"] = final_text or "No answer provided"
|
|
801
|
+
temp["pending_tool_calls"] = []
|
|
802
|
+
scratchpad["tool_retry_count"] = 0
|
|
803
|
+
return StepPlan(node_id="parse", next_node="maybe_review")
|
|
804
|
+
|
|
805
|
+
# Default: treat as a normal final answer even if it lacks an explicit FINAL marker.
|
|
806
|
+
final = final_raw
|
|
807
|
+
if final:
|
|
808
|
+
context["messages"].append(_new_message(ctx, role="assistant", content=final))
|
|
809
|
+
if _flag(runtime_ns, "plan_mode", default=False):
|
|
810
|
+
updated = _extract_plan_update(final)
|
|
811
|
+
if isinstance(updated, str) and updated.strip():
|
|
812
|
+
scratchpad["plan"] = updated.strip()
|
|
813
|
+
|
|
814
|
+
temp["final_answer"] = final or "No answer provided"
|
|
815
|
+
temp["pending_tool_calls"] = []
|
|
816
|
+
scratchpad["tool_retry_count"] = 0
|
|
817
|
+
scratchpad["empty_response_retry_count"] = 0
|
|
818
|
+
return StepPlan(node_id="parse", next_node="maybe_review")
|
|
216
819
|
|
|
217
820
|
def act_node(run: RunState, ctx) -> StepPlan:
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
if not
|
|
821
|
+
# Treat `_temp.pending_tool_calls` as a durable queue.
|
|
822
|
+
# This avoids dropping calls when schema-only tools (ask_user/memory/etc.) are interleaved
|
|
823
|
+
# with normal tools, and avoids re-asking the same question due to missing context.
|
|
824
|
+
context, scratchpad, runtime_ns, temp, _ = ensure_react_vars(run)
|
|
825
|
+
raw_queue = temp.get("pending_tool_calls", [])
|
|
826
|
+
if not isinstance(raw_queue, list) or not raw_queue:
|
|
827
|
+
temp["pending_tool_calls"] = []
|
|
224
828
|
return StepPlan(node_id="act", next_node="reason")
|
|
225
829
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
830
|
+
allow = _effective_allowlist(runtime_ns)
|
|
831
|
+
builtin_effect_tools = {
|
|
832
|
+
"ask_user",
|
|
833
|
+
"recall_memory",
|
|
834
|
+
"inspect_vars",
|
|
835
|
+
"remember",
|
|
836
|
+
"remember_note",
|
|
837
|
+
"compact_memory",
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
# Normalize queue items and assign stable call_ids once so splitting into batches does not
|
|
841
|
+
# introduce duplicate ids.
|
|
842
|
+
tool_queue: List[Dict[str, Any]] = []
|
|
843
|
+
for idx, item in enumerate(raw_queue, start=1):
|
|
844
|
+
if isinstance(item, ToolCall):
|
|
845
|
+
d: Dict[str, Any] = {"name": item.name, "arguments": item.arguments, "call_id": item.call_id}
|
|
846
|
+
elif isinstance(item, dict):
|
|
847
|
+
d = dict(item)
|
|
848
|
+
else:
|
|
231
849
|
continue
|
|
850
|
+
call_id = str(d.get("call_id") or "").strip()
|
|
851
|
+
if not call_id:
|
|
852
|
+
d["call_id"] = str(idx)
|
|
853
|
+
tool_queue.append(d)
|
|
854
|
+
|
|
855
|
+
if not tool_queue:
|
|
856
|
+
temp["pending_tool_calls"] = []
|
|
857
|
+
return StepPlan(node_id="act", next_node="reason")
|
|
858
|
+
|
|
859
|
+
def _is_builtin(tc: Dict[str, Any]) -> bool:
|
|
860
|
+
name = tc.get("name")
|
|
861
|
+
return isinstance(name, str) and name in builtin_effect_tools
|
|
862
|
+
|
|
863
|
+
# Execute one schema-only builtin (if it is next), otherwise execute the longest contiguous
|
|
864
|
+
# prefix of normal tools. Leave the remainder queued for subsequent act/observe cycles.
|
|
865
|
+
if _is_builtin(tool_queue[0]):
|
|
866
|
+
tc = tool_queue[0]
|
|
867
|
+
name = str(tc.get("name") or "").strip()
|
|
232
868
|
args = tc.get("arguments") or {}
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
temp["pending_tool_calls"] =
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
869
|
+
if not isinstance(args, dict):
|
|
870
|
+
args = {}
|
|
871
|
+
|
|
872
|
+
# Pop the builtin from the queue.
|
|
873
|
+
temp["pending_tool_calls"] = list(tool_queue[1:])
|
|
874
|
+
|
|
875
|
+
if name and name not in allow:
|
|
876
|
+
temp["tool_results"] = {
|
|
877
|
+
"results": [
|
|
878
|
+
{
|
|
879
|
+
"call_id": str(tc.get("call_id") or ""),
|
|
880
|
+
"name": name,
|
|
881
|
+
"success": False,
|
|
882
|
+
"output": None,
|
|
883
|
+
"error": f"Tool '{name}' is not allowed for this agent",
|
|
884
|
+
}
|
|
885
|
+
]
|
|
886
|
+
}
|
|
887
|
+
emit("act_blocked", {"tool": name})
|
|
888
|
+
return StepPlan(node_id="act", next_node="observe")
|
|
889
|
+
|
|
890
|
+
if name == "ask_user":
|
|
891
|
+
question = str(args.get("question") or "Please provide input:")
|
|
892
|
+
choices = args.get("choices")
|
|
893
|
+
choices = list(choices) if isinstance(choices, list) else None
|
|
894
|
+
|
|
895
|
+
# Persist the asked question in the durable message history so both the main model
|
|
896
|
+
# and the reviewer can see what was asked (and avoid re-asking).
|
|
897
|
+
msgs = context.get("messages")
|
|
898
|
+
if isinstance(msgs, list):
|
|
899
|
+
content = f"[Agent question]: {question}"
|
|
900
|
+
last = msgs[-1] if msgs else None
|
|
901
|
+
last_role = last.get("role") if isinstance(last, dict) else None
|
|
902
|
+
last_meta = last.get("metadata") if isinstance(last, dict) else None
|
|
903
|
+
last_kind = last_meta.get("kind") if isinstance(last_meta, dict) else None
|
|
904
|
+
last_content = last.get("content") if isinstance(last, dict) else None
|
|
905
|
+
if not (last_role == "assistant" and last_kind == "ask_user_prompt" and str(last_content or "") == content):
|
|
906
|
+
msgs.append(_new_message(ctx, role="assistant", content=content, metadata={"kind": "ask_user_prompt"}))
|
|
907
|
+
|
|
908
|
+
emit("ask_user", {"question": question, "choices": choices or []})
|
|
909
|
+
return StepPlan(
|
|
910
|
+
node_id="act",
|
|
911
|
+
effect=Effect(
|
|
912
|
+
type=EffectType.ASK_USER,
|
|
913
|
+
payload={"prompt": question, "choices": choices, "allow_free_text": True},
|
|
914
|
+
result_key="_temp.user_response",
|
|
915
|
+
),
|
|
916
|
+
next_node="handle_user_response",
|
|
917
|
+
)
|
|
248
918
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
919
|
+
if name == "recall_memory":
|
|
920
|
+
payload = dict(args)
|
|
921
|
+
payload.setdefault("tool_name", "recall_memory")
|
|
922
|
+
payload.setdefault("call_id", tc.get("call_id") or "memory")
|
|
923
|
+
emit("memory_query", {"query": payload.get("query"), "span_id": payload.get("span_id")})
|
|
924
|
+
return StepPlan(
|
|
925
|
+
node_id="act",
|
|
926
|
+
effect=Effect(type=EffectType.MEMORY_QUERY, payload=payload, result_key="_temp.tool_results"),
|
|
927
|
+
next_node="observe",
|
|
928
|
+
)
|
|
252
929
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
930
|
+
if name == "inspect_vars":
|
|
931
|
+
payload = dict(args)
|
|
932
|
+
payload.setdefault("tool_name", "inspect_vars")
|
|
933
|
+
payload.setdefault("call_id", tc.get("call_id") or "vars")
|
|
934
|
+
emit("vars_query", {"path": payload.get("path")})
|
|
935
|
+
return StepPlan(
|
|
936
|
+
node_id="act",
|
|
937
|
+
effect=Effect(type=EffectType.VARS_QUERY, payload=payload, result_key="_temp.tool_results"),
|
|
938
|
+
next_node="observe",
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
if name == "remember":
|
|
942
|
+
payload = dict(args)
|
|
943
|
+
payload.setdefault("tool_name", "remember")
|
|
944
|
+
payload.setdefault("call_id", tc.get("call_id") or "memory")
|
|
945
|
+
emit("memory_tag", {"span_id": payload.get("span_id"), "tags": payload.get("tags")})
|
|
946
|
+
return StepPlan(
|
|
947
|
+
node_id="act",
|
|
948
|
+
effect=Effect(type=EffectType.MEMORY_TAG, payload=payload, result_key="_temp.tool_results"),
|
|
949
|
+
next_node="observe",
|
|
262
950
|
)
|
|
263
|
-
|
|
264
|
-
|
|
951
|
+
|
|
952
|
+
if name == "remember_note":
|
|
953
|
+
payload = dict(args)
|
|
954
|
+
payload.setdefault("tool_name", "remember_note")
|
|
955
|
+
payload.setdefault("call_id", tc.get("call_id") or "memory")
|
|
956
|
+
emit("memory_note", {"note": payload.get("note"), "tags": payload.get("tags")})
|
|
957
|
+
return StepPlan(
|
|
958
|
+
node_id="act",
|
|
959
|
+
effect=Effect(type=EffectType.MEMORY_NOTE, payload=payload, result_key="_temp.tool_results"),
|
|
960
|
+
next_node="observe",
|
|
961
|
+
)
|
|
962
|
+
|
|
963
|
+
if name == "compact_memory":
|
|
964
|
+
payload = dict(args)
|
|
965
|
+
payload.setdefault("tool_name", "compact_memory")
|
|
966
|
+
payload.setdefault("call_id", tc.get("call_id") or "compact")
|
|
967
|
+
emit(
|
|
968
|
+
"memory_compact",
|
|
265
969
|
{
|
|
266
|
-
"
|
|
267
|
-
"
|
|
268
|
-
"
|
|
269
|
-
}
|
|
970
|
+
"preserve_recent": payload.get("preserve_recent"),
|
|
971
|
+
"mode": payload.get("compression_mode"),
|
|
972
|
+
"focus": payload.get("focus"),
|
|
973
|
+
},
|
|
974
|
+
)
|
|
975
|
+
return StepPlan(
|
|
976
|
+
node_id="act",
|
|
977
|
+
effect=Effect(type=EffectType.MEMORY_COMPACT, payload=payload, result_key="_temp.tool_results"),
|
|
978
|
+
next_node="observe",
|
|
270
979
|
)
|
|
271
980
|
|
|
981
|
+
# Unknown builtin: continue with the queue (best-effort).
|
|
982
|
+
if temp.get("pending_tool_calls"):
|
|
983
|
+
return StepPlan(node_id="act", next_node="act")
|
|
984
|
+
return StepPlan(node_id="act", next_node="reason")
|
|
985
|
+
|
|
986
|
+
# Normal tools: execute contiguous prefix until the next builtin.
|
|
987
|
+
batch: List[Dict[str, Any]] = []
|
|
988
|
+
for tc in tool_queue:
|
|
989
|
+
if _is_builtin(tc):
|
|
990
|
+
break
|
|
991
|
+
batch.append(tc)
|
|
992
|
+
|
|
993
|
+
remaining = tool_queue[len(batch) :]
|
|
994
|
+
temp["pending_tool_calls"] = list(remaining)
|
|
995
|
+
|
|
996
|
+
# Emit observability events for the batch.
|
|
997
|
+
for tc in batch:
|
|
998
|
+
emit("act", {"tool": tc.get("name", ""), "args": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")})
|
|
999
|
+
|
|
1000
|
+
formatted_calls: List[Dict[str, Any]] = []
|
|
1001
|
+
for tc in batch:
|
|
1002
|
+
formatted_calls.append(
|
|
1003
|
+
{"name": tc.get("name", ""), "arguments": tc.get("arguments", {}), "call_id": str(tc.get("call_id") or "")}
|
|
1004
|
+
)
|
|
1005
|
+
|
|
272
1006
|
return StepPlan(
|
|
273
1007
|
node_id="act",
|
|
274
1008
|
effect=Effect(
|
|
275
1009
|
type=EffectType.TOOL_CALLS,
|
|
276
|
-
payload={"tool_calls": formatted_calls},
|
|
1010
|
+
payload={"tool_calls": formatted_calls, "allowed_tools": list(allow)},
|
|
277
1011
|
result_key="_temp.tool_results",
|
|
278
1012
|
),
|
|
279
1013
|
next_node="observe",
|
|
280
1014
|
)
|
|
281
1015
|
|
|
282
1016
|
def observe_node(run: RunState, ctx) -> StepPlan:
|
|
283
|
-
context,
|
|
1017
|
+
context, scratchpad, _, temp, _ = ensure_react_vars(run)
|
|
284
1018
|
tool_results = temp.get("tool_results", {})
|
|
285
1019
|
if not isinstance(tool_results, dict):
|
|
286
1020
|
tool_results = {}
|
|
@@ -288,6 +1022,16 @@ def create_react_workflow(
|
|
|
288
1022
|
results = tool_results.get("results", [])
|
|
289
1023
|
if not isinstance(results, list):
|
|
290
1024
|
results = []
|
|
1025
|
+
if results:
|
|
1026
|
+
scratchpad["used_tools"] = True
|
|
1027
|
+
|
|
1028
|
+
# Prefer a tool-supplied human/LLM-friendly rendering when present.
|
|
1029
|
+
def _display(v: Any) -> str:
|
|
1030
|
+
if isinstance(v, dict):
|
|
1031
|
+
rendered = v.get("rendered")
|
|
1032
|
+
if isinstance(rendered, str) and rendered.strip():
|
|
1033
|
+
return rendered.strip()
|
|
1034
|
+
return "" if v is None else str(v)
|
|
291
1035
|
|
|
292
1036
|
for r in results:
|
|
293
1037
|
if not isinstance(r, dict):
|
|
@@ -296,12 +1040,17 @@ def create_react_workflow(
|
|
|
296
1040
|
success = bool(r.get("success"))
|
|
297
1041
|
output = r.get("output", "")
|
|
298
1042
|
error = r.get("error", "")
|
|
1043
|
+
display = _display(output)
|
|
1044
|
+
if not success:
|
|
1045
|
+
# Preserve structured outputs for provenance, but show a clean string to the LLM/UI.
|
|
1046
|
+
display = _display(output) if isinstance(output, dict) else str(error or output)
|
|
299
1047
|
rendered = logic.format_observation(
|
|
300
1048
|
name=name,
|
|
301
|
-
output=
|
|
1049
|
+
output=display,
|
|
302
1050
|
success=success,
|
|
303
1051
|
)
|
|
304
|
-
emit("observe", {"tool": name, "result": rendered
|
|
1052
|
+
emit("observe", {"tool": name, "success": success, "result": rendered})
|
|
1053
|
+
|
|
305
1054
|
context["messages"].append(
|
|
306
1055
|
_new_message(
|
|
307
1056
|
ctx,
|
|
@@ -316,9 +1065,291 @@ def create_react_workflow(
|
|
|
316
1065
|
)
|
|
317
1066
|
|
|
318
1067
|
temp.pop("tool_results", None)
|
|
1068
|
+
# Reset verifier/review rounds after executing tools. This enables repeated
|
|
1069
|
+
# verify→act→observe cycles without immediately hitting review_max_rounds.
|
|
1070
|
+
scratchpad["review_count"] = 0
|
|
1071
|
+
pending = temp.get("pending_tool_calls", [])
|
|
1072
|
+
if isinstance(pending, list) and pending:
|
|
1073
|
+
return StepPlan(node_id="observe", next_node="act")
|
|
319
1074
|
temp["pending_tool_calls"] = []
|
|
320
1075
|
return StepPlan(node_id="observe", next_node="reason")
|
|
321
1076
|
|
|
1077
|
+
def maybe_review_node(run: RunState, ctx) -> StepPlan:
|
|
1078
|
+
_, scratchpad, runtime_ns, _, _ = ensure_react_vars(run)
|
|
1079
|
+
|
|
1080
|
+
if not _flag(runtime_ns, "review_mode", default=False):
|
|
1081
|
+
return StepPlan(node_id="maybe_review", next_node="done")
|
|
1082
|
+
|
|
1083
|
+
max_rounds = _int(runtime_ns, "review_max_rounds", default=1)
|
|
1084
|
+
if max_rounds < 0:
|
|
1085
|
+
max_rounds = 0
|
|
1086
|
+
count = scratchpad.get("review_count")
|
|
1087
|
+
try:
|
|
1088
|
+
count_int = int(count or 0)
|
|
1089
|
+
except Exception:
|
|
1090
|
+
count_int = 0
|
|
1091
|
+
|
|
1092
|
+
if count_int >= max_rounds:
|
|
1093
|
+
return StepPlan(node_id="maybe_review", next_node="done")
|
|
1094
|
+
|
|
1095
|
+
scratchpad["review_count"] = count_int + 1
|
|
1096
|
+
return StepPlan(node_id="maybe_review", next_node="review")
|
|
1097
|
+
|
|
1098
|
+
def review_node(run: RunState, ctx) -> StepPlan:
|
|
1099
|
+
context, scratchpad, runtime_ns, _, limits = ensure_react_vars(run)
|
|
1100
|
+
|
|
1101
|
+
task = str(context.get("task", "") or "")
|
|
1102
|
+
plan = scratchpad.get("plan")
|
|
1103
|
+
plan_text = str(plan).strip() if isinstance(plan, str) and plan.strip() else "(no plan)"
|
|
1104
|
+
|
|
1105
|
+
allow = _effective_allowlist(runtime_ns)
|
|
1106
|
+
|
|
1107
|
+
def _truncate_block(text: str, *, max_chars: int) -> str:
|
|
1108
|
+
s = str(text or "")
|
|
1109
|
+
if max_chars <= 0:
|
|
1110
|
+
return s
|
|
1111
|
+
if len(s) <= max_chars:
|
|
1112
|
+
return s
|
|
1113
|
+
suffix = f"\n… (truncated, {len(s):,} chars total)"
|
|
1114
|
+
keep = max_chars - len(suffix)
|
|
1115
|
+
if keep < 200:
|
|
1116
|
+
keep = max_chars
|
|
1117
|
+
suffix = ""
|
|
1118
|
+
return s[:keep].rstrip() + suffix
|
|
1119
|
+
|
|
1120
|
+
def _format_allowed_tools() -> str:
|
|
1121
|
+
# Prefer the already-computed tool_specs (created in reason_node) to avoid
|
|
1122
|
+
# re-materializing tool definitions and to keep formatting stable.
|
|
1123
|
+
specs = runtime_ns.get("tool_specs")
|
|
1124
|
+
if not isinstance(specs, list) or not specs:
|
|
1125
|
+
defs = _allowed_tool_defs(allow)
|
|
1126
|
+
specs = [t.to_dict() for t in defs]
|
|
1127
|
+
lines: list[str] = []
|
|
1128
|
+
for spec in specs:
|
|
1129
|
+
if not isinstance(spec, dict):
|
|
1130
|
+
continue
|
|
1131
|
+
name = str(spec.get("name") or "").strip()
|
|
1132
|
+
if not name:
|
|
1133
|
+
continue
|
|
1134
|
+
params = spec.get("parameters")
|
|
1135
|
+
props = params.get("properties", {}) if isinstance(params, dict) else {}
|
|
1136
|
+
keys = sorted([k for k in props.keys() if isinstance(k, str)])
|
|
1137
|
+
if keys:
|
|
1138
|
+
lines.append(f"- {name}({', '.join(keys)})")
|
|
1139
|
+
else:
|
|
1140
|
+
lines.append(f"- {name}()")
|
|
1141
|
+
return "\n".join(lines) if lines else "(no tools available)"
|
|
1142
|
+
|
|
1143
|
+
# Include recent tool outputs for evidence-based review.
|
|
1144
|
+
messages = list(context.get("messages") or [])
|
|
1145
|
+
tool_msgs: list[str] = []
|
|
1146
|
+
try:
|
|
1147
|
+
tool_limit = int(limits.get("review_max_tool_output_chars", -1))
|
|
1148
|
+
except Exception:
|
|
1149
|
+
tool_limit = -1
|
|
1150
|
+
try:
|
|
1151
|
+
answer_limit = int(limits.get("review_max_answer_chars", -1))
|
|
1152
|
+
except Exception:
|
|
1153
|
+
answer_limit = -1
|
|
1154
|
+
|
|
1155
|
+
for m in reversed(messages):
|
|
1156
|
+
if not isinstance(m, dict) or m.get("role") != "tool":
|
|
1157
|
+
continue
|
|
1158
|
+
content = m.get("content")
|
|
1159
|
+
if isinstance(content, str) and content.strip():
|
|
1160
|
+
tool_msgs.append(_truncate_block(content.strip(), max_chars=tool_limit))
|
|
1161
|
+
if len(tool_msgs) >= 8:
|
|
1162
|
+
break
|
|
1163
|
+
tool_msgs.reverse()
|
|
1164
|
+
observations = "\n\n".join(tool_msgs) if tool_msgs else "(no tool outputs)"
|
|
1165
|
+
|
|
1166
|
+
# Include recent user messages (especially ask_user responses) so the reviewer can
|
|
1167
|
+
# avoid re-asking questions the user already answered.
|
|
1168
|
+
try:
|
|
1169
|
+
user_limit = int(limits.get("review_max_user_message_chars", -1))
|
|
1170
|
+
except Exception:
|
|
1171
|
+
user_limit = -1
|
|
1172
|
+
|
|
1173
|
+
user_msgs: list[str] = []
|
|
1174
|
+
ask_prompts: list[str] = []
|
|
1175
|
+
for m in reversed(messages):
|
|
1176
|
+
if not isinstance(m, dict):
|
|
1177
|
+
continue
|
|
1178
|
+
role = m.get("role")
|
|
1179
|
+
content = m.get("content")
|
|
1180
|
+
if role == "user" and isinstance(content, str) and content.strip():
|
|
1181
|
+
if content.strip() != task.strip():
|
|
1182
|
+
user_msgs.append(_truncate_block(content.strip(), max_chars=user_limit))
|
|
1183
|
+
if len(user_msgs) >= 4:
|
|
1184
|
+
break
|
|
1185
|
+
for m in reversed(messages):
|
|
1186
|
+
if not isinstance(m, dict):
|
|
1187
|
+
continue
|
|
1188
|
+
if m.get("role") != "assistant":
|
|
1189
|
+
continue
|
|
1190
|
+
meta = m.get("metadata") if isinstance(m.get("metadata"), dict) else {}
|
|
1191
|
+
if not isinstance(meta, dict) or meta.get("kind") != "ask_user_prompt":
|
|
1192
|
+
continue
|
|
1193
|
+
content = m.get("content")
|
|
1194
|
+
if isinstance(content, str) and content.strip():
|
|
1195
|
+
ask_prompts.append(_truncate_block(content.strip(), max_chars=user_limit))
|
|
1196
|
+
if len(ask_prompts) >= 4:
|
|
1197
|
+
break
|
|
1198
|
+
|
|
1199
|
+
user_msgs.reverse()
|
|
1200
|
+
ask_prompts.reverse()
|
|
1201
|
+
user_context = "\n\n".join(user_msgs) if user_msgs else "(no additional user messages)"
|
|
1202
|
+
asked_context = "\n\n".join(ask_prompts) if ask_prompts else "(no ask_user prompts recorded)"
|
|
1203
|
+
|
|
1204
|
+
# The verifier should primarily judge based on tool outputs. Only include an answer
|
|
1205
|
+
# excerpt when we have no tool evidence (pure Q&A runs).
|
|
1206
|
+
answer_raw = str(run.vars.get("_temp", {}).get("final_answer") or "")
|
|
1207
|
+
answer_excerpt = ""
|
|
1208
|
+
if not tool_msgs and answer_raw.strip():
|
|
1209
|
+
answer_excerpt = _truncate_block(answer_raw.strip(), max_chars=answer_limit)
|
|
1210
|
+
|
|
1211
|
+
prompt = (
|
|
1212
|
+
"You are a verifier. Review whether the user's request has been fully satisfied.\n"
|
|
1213
|
+
"Be strict: only count actions that are supported by the tool outputs.\n"
|
|
1214
|
+
"If anything is missing, propose the NEXT ACTIONS.\n"
|
|
1215
|
+
"Prefer returning `next_tool_calls` over `next_prompt`.\n"
|
|
1216
|
+
"Return JSON ONLY.\n\n"
|
|
1217
|
+
f"User request:\n{task}\n\n"
|
|
1218
|
+
f"Plan:\n{plan_text}\n\n"
|
|
1219
|
+
f"Recent ask_user prompts:\n{asked_context}\n\n"
|
|
1220
|
+
f"Recent user messages:\n{user_context}\n\n"
|
|
1221
|
+
+ (f"Current answer (excerpt):\n{answer_excerpt}\n\n" if answer_excerpt else "")
|
|
1222
|
+
+ f"Tool outputs:\n{observations}\n\n"
|
|
1223
|
+
f"Allowed tools:\n{_format_allowed_tools()}\n\n"
|
|
1224
|
+
)
|
|
1225
|
+
|
|
1226
|
+
schema = {
|
|
1227
|
+
"type": "object",
|
|
1228
|
+
"properties": {
|
|
1229
|
+
"complete": {"type": "boolean"},
|
|
1230
|
+
"missing": {"type": "array", "items": {"type": "string"}},
|
|
1231
|
+
"next_prompt": {"type": "string"},
|
|
1232
|
+
"next_tool_calls": {
|
|
1233
|
+
"type": "array",
|
|
1234
|
+
"items": {
|
|
1235
|
+
"type": "object",
|
|
1236
|
+
"properties": {
|
|
1237
|
+
"name": {"type": "string"},
|
|
1238
|
+
"arguments": {"type": "object"},
|
|
1239
|
+
},
|
|
1240
|
+
"required": ["name", "arguments"],
|
|
1241
|
+
"additionalProperties": False,
|
|
1242
|
+
},
|
|
1243
|
+
},
|
|
1244
|
+
},
|
|
1245
|
+
"required": ["complete", "missing", "next_prompt", "next_tool_calls"],
|
|
1246
|
+
"additionalProperties": False,
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
emit("review_request", {"tool_messages": len(tool_msgs)})
|
|
1250
|
+
|
|
1251
|
+
payload: Dict[str, Any] = {
|
|
1252
|
+
"prompt": prompt,
|
|
1253
|
+
"response_schema": schema,
|
|
1254
|
+
"response_schema_name": "ReActVerifier",
|
|
1255
|
+
"params": {"temperature": 0.2},
|
|
1256
|
+
}
|
|
1257
|
+
sys = _system_prompt(runtime_ns)
|
|
1258
|
+
if sys is not None:
|
|
1259
|
+
payload["system_prompt"] = sys
|
|
1260
|
+
eff_provider = provider if isinstance(provider, str) and provider.strip() else runtime_ns.get("provider")
|
|
1261
|
+
eff_model = model if isinstance(model, str) and model.strip() else runtime_ns.get("model")
|
|
1262
|
+
if isinstance(eff_provider, str) and eff_provider.strip():
|
|
1263
|
+
payload["provider"] = eff_provider.strip()
|
|
1264
|
+
if isinstance(eff_model, str) and eff_model.strip():
|
|
1265
|
+
payload["model"] = eff_model.strip()
|
|
1266
|
+
|
|
1267
|
+
return StepPlan(
|
|
1268
|
+
node_id="review",
|
|
1269
|
+
effect=Effect(
|
|
1270
|
+
type=EffectType.LLM_CALL,
|
|
1271
|
+
payload=payload,
|
|
1272
|
+
result_key="_temp.review_llm_response",
|
|
1273
|
+
),
|
|
1274
|
+
next_node="review_parse",
|
|
1275
|
+
)
|
|
1276
|
+
|
|
1277
|
+
def review_parse_node(run: RunState, ctx) -> StepPlan:
|
|
1278
|
+
_, _, runtime_ns, temp, _ = ensure_react_vars(run)
|
|
1279
|
+
resp = temp.get("review_llm_response", {})
|
|
1280
|
+
if not isinstance(resp, dict):
|
|
1281
|
+
resp = {}
|
|
1282
|
+
|
|
1283
|
+
data = resp.get("data")
|
|
1284
|
+
if data is None and isinstance(resp.get("content"), str):
|
|
1285
|
+
try:
|
|
1286
|
+
data = json.loads(resp["content"])
|
|
1287
|
+
except Exception:
|
|
1288
|
+
data = None
|
|
1289
|
+
if not isinstance(data, dict):
|
|
1290
|
+
data = {}
|
|
1291
|
+
|
|
1292
|
+
complete = bool(data.get("complete"))
|
|
1293
|
+
missing = data.get("missing") if isinstance(data.get("missing"), list) else []
|
|
1294
|
+
next_prompt = data.get("next_prompt")
|
|
1295
|
+
next_prompt_text = str(next_prompt or "").strip()
|
|
1296
|
+
next_tool_calls_raw = data.get("next_tool_calls")
|
|
1297
|
+
next_tool_calls: list[dict[str, Any]] = []
|
|
1298
|
+
if isinstance(next_tool_calls_raw, list):
|
|
1299
|
+
for item in next_tool_calls_raw:
|
|
1300
|
+
if not isinstance(item, dict):
|
|
1301
|
+
continue
|
|
1302
|
+
name = str(item.get("name") or "").strip()
|
|
1303
|
+
args = item.get("arguments")
|
|
1304
|
+
if not isinstance(args, dict):
|
|
1305
|
+
args = {}
|
|
1306
|
+
if name:
|
|
1307
|
+
next_tool_calls.append({"name": name, "arguments": args})
|
|
1308
|
+
|
|
1309
|
+
emit("review", {"complete": complete, "missing": missing})
|
|
1310
|
+
temp.pop("review_llm_response", None)
|
|
1311
|
+
|
|
1312
|
+
if complete:
|
|
1313
|
+
return StepPlan(node_id="review_parse", next_node="done")
|
|
1314
|
+
|
|
1315
|
+
if next_tool_calls:
|
|
1316
|
+
temp["pending_tool_calls"] = next_tool_calls
|
|
1317
|
+
emit("review_tool_calls", {"count": len(next_tool_calls)})
|
|
1318
|
+
return StepPlan(node_id="review_parse", next_node="act")
|
|
1319
|
+
|
|
1320
|
+
# Behavioral validation: if incomplete but no tool calls, re-ask reviewer once with stricter rules.
|
|
1321
|
+
if not complete and not next_tool_calls:
|
|
1322
|
+
try:
|
|
1323
|
+
retry_count = int(runtime_ns.get("review_retry_count") or 0)
|
|
1324
|
+
except Exception:
|
|
1325
|
+
retry_count = 0
|
|
1326
|
+
if retry_count < 1:
|
|
1327
|
+
runtime_ns["review_retry_count"] = retry_count + 1
|
|
1328
|
+
inbox = runtime_ns.get("inbox")
|
|
1329
|
+
if not isinstance(inbox, list):
|
|
1330
|
+
inbox = []
|
|
1331
|
+
runtime_ns["inbox"] = inbox
|
|
1332
|
+
inbox.append(
|
|
1333
|
+
{
|
|
1334
|
+
"content": (
|
|
1335
|
+
"[Review] Your last review output was not actionable. "
|
|
1336
|
+
"If incomplete, you MUST return at least one `next_tool_call` "
|
|
1337
|
+
"(use `ask_user` if you need clarification). Return JSON only."
|
|
1338
|
+
)
|
|
1339
|
+
}
|
|
1340
|
+
)
|
|
1341
|
+
emit("review_retry_unactionable", {"retry": retry_count + 1})
|
|
1342
|
+
return StepPlan(node_id="review_parse", next_node="review")
|
|
1343
|
+
|
|
1344
|
+
runtime_ns["review_retry_count"] = 0
|
|
1345
|
+
if next_prompt_text:
|
|
1346
|
+
inbox = runtime_ns.get("inbox")
|
|
1347
|
+
if not isinstance(inbox, list):
|
|
1348
|
+
inbox = []
|
|
1349
|
+
runtime_ns["inbox"] = inbox
|
|
1350
|
+
inbox.append({"content": f"[Review] {next_prompt_text}"})
|
|
1351
|
+
return StepPlan(node_id="review_parse", next_node="reason")
|
|
1352
|
+
|
|
322
1353
|
def handle_user_response_node(run: RunState, ctx) -> StepPlan:
|
|
323
1354
|
context, _, _, temp, _ = ensure_react_vars(run)
|
|
324
1355
|
user_response = temp.get("user_response", {})
|
|
@@ -344,6 +1375,16 @@ def create_react_workflow(
|
|
|
344
1375
|
# Prefer _limits.current_iteration, fall back to scratchpad
|
|
345
1376
|
iterations = int(limits.get("current_iteration", 0) or scratchpad.get("iteration", 0) or 0)
|
|
346
1377
|
|
|
1378
|
+
# Persist the final user-facing answer into the conversation history so it shows up
|
|
1379
|
+
# in /history and becomes part of the next run's seed context.
|
|
1380
|
+
messages = context.get("messages")
|
|
1381
|
+
if isinstance(messages, list):
|
|
1382
|
+
last = messages[-1] if messages else None
|
|
1383
|
+
last_role = last.get("role") if isinstance(last, dict) else None
|
|
1384
|
+
last_content = last.get("content") if isinstance(last, dict) else None
|
|
1385
|
+
if last_role != "assistant" or str(last_content or "") != answer:
|
|
1386
|
+
messages.append(_new_message(ctx, role="assistant", content=answer, metadata={"kind": "final_answer"}))
|
|
1387
|
+
|
|
347
1388
|
return StepPlan(
|
|
348
1389
|
node_id="done",
|
|
349
1390
|
complete_output={
|
|
@@ -374,17 +1415,23 @@ def create_react_workflow(
|
|
|
374
1415
|
)
|
|
375
1416
|
|
|
376
1417
|
return WorkflowSpec(
|
|
377
|
-
workflow_id="react_agent",
|
|
1418
|
+
workflow_id=str(workflow_id or "react_agent"),
|
|
378
1419
|
entry_node="init",
|
|
379
1420
|
nodes={
|
|
380
1421
|
"init": init_node,
|
|
1422
|
+
"plan": plan_node,
|
|
1423
|
+
"plan_parse": plan_parse_node,
|
|
381
1424
|
"reason": reason_node,
|
|
1425
|
+
"tool_retry_minimal": tool_retry_minimal_node,
|
|
1426
|
+
"empty_response_retry": empty_response_retry_node,
|
|
382
1427
|
"parse": parse_node,
|
|
383
1428
|
"act": act_node,
|
|
384
1429
|
"observe": observe_node,
|
|
385
1430
|
"handle_user_response": handle_user_response_node,
|
|
1431
|
+
"maybe_review": maybe_review_node,
|
|
1432
|
+
"review": review_node,
|
|
1433
|
+
"review_parse": review_parse_node,
|
|
386
1434
|
"done": done_node,
|
|
387
1435
|
"max_iterations": max_iterations_node,
|
|
388
1436
|
},
|
|
389
1437
|
)
|
|
390
|
-
|