abstractagent 0.2.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractagent/adapters/__init__.py +2 -1
- abstractagent/adapters/codeact_runtime.py +907 -60
- abstractagent/adapters/generation_params.py +82 -0
- abstractagent/adapters/media.py +45 -0
- abstractagent/adapters/memact_runtime.py +959 -0
- abstractagent/adapters/react_runtime.py +1357 -135
- abstractagent/agents/__init__.py +4 -0
- abstractagent/agents/base.py +89 -1
- abstractagent/agents/codeact.py +125 -18
- abstractagent/agents/memact.py +280 -0
- abstractagent/agents/react.py +129 -18
- abstractagent/logic/__init__.py +2 -0
- abstractagent/logic/builtins.py +270 -5
- abstractagent/logic/codeact.py +91 -81
- abstractagent/logic/memact.py +128 -0
- abstractagent/logic/react.py +91 -50
- abstractagent/repl.py +24 -447
- abstractagent/scripts/__init__.py +5 -0
- abstractagent/scripts/lmstudio_tool_eval.py +426 -0
- abstractagent/tools/__init__.py +9 -0
- abstractagent-0.3.1.dist-info/METADATA +112 -0
- abstractagent-0.3.1.dist-info/RECORD +33 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.1.dist-info}/WHEEL +1 -1
- abstractagent/ui/__init__.py +0 -5
- abstractagent/ui/question.py +0 -197
- abstractagent-0.2.0.dist-info/METADATA +0 -134
- abstractagent-0.2.0.dist-info/RECORD +0 -28
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.1.dist-info}/entry_points.txt +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
"""LMStudio tool-synergy evaluation harness (manual, real LLM).
|
|
2
|
+
|
|
3
|
+
Runs a few deterministic scenarios against a real provider/model (e.g. LMStudio
|
|
4
|
+
`qwen/qwen3-next-80b`) and prints a compact summary of tool usage patterns:
|
|
5
|
+
- whether the model uses analyze_code/search_files before bounded reads
|
|
6
|
+
- whether edits succeed without repeated identical retries
|
|
7
|
+
- how often it reads whole files vs slices
|
|
8
|
+
|
|
9
|
+
This is intentionally NOT a required CI test. It should be run manually.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
ABSTRACTAGENT_TEST_PROVIDER=lmstudio \
|
|
13
|
+
ABSTRACTAGENT_TEST_MODEL=qwen/qwen3-next-80b \
|
|
14
|
+
ABSTRACTAGENT_TEST_BASE_URL=http://localhost:1234/v1 \
|
|
15
|
+
python -m abstractagent.scripts.lmstudio_tool_eval
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import tempfile
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _llm_config() -> Tuple[str, str, Dict[str, Any]]:
|
|
30
|
+
provider = os.getenv("ABSTRACTAGENT_TEST_PROVIDER", "lmstudio")
|
|
31
|
+
model = os.getenv("ABSTRACTAGENT_TEST_MODEL", "qwen/qwen3-next-80b")
|
|
32
|
+
base_url = os.getenv("ABSTRACTAGENT_TEST_BASE_URL")
|
|
33
|
+
|
|
34
|
+
llm_kwargs: Dict[str, Any] = {"temperature": 0}
|
|
35
|
+
llm_kwargs["seed"] = 42
|
|
36
|
+
if base_url:
|
|
37
|
+
llm_kwargs["base_url"] = base_url
|
|
38
|
+
return provider, model, llm_kwargs
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class ToolCallEvent:
|
|
43
|
+
ts: str
|
|
44
|
+
name: str
|
|
45
|
+
arguments: Dict[str, Any]
|
|
46
|
+
success: Optional[bool]
|
|
47
|
+
error: Optional[str]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _iter_tool_calls(traces: Dict[str, Any]) -> List[ToolCallEvent]:
|
|
51
|
+
events: List[ToolCallEvent] = []
|
|
52
|
+
for _node_id, trace in (traces or {}).items():
|
|
53
|
+
steps = trace.get("steps") if isinstance(trace, dict) else None
|
|
54
|
+
if not isinstance(steps, list):
|
|
55
|
+
continue
|
|
56
|
+
for step in steps:
|
|
57
|
+
if not isinstance(step, dict):
|
|
58
|
+
continue
|
|
59
|
+
eff = step.get("effect")
|
|
60
|
+
if not isinstance(eff, dict):
|
|
61
|
+
continue
|
|
62
|
+
if eff.get("type") != "tool_calls":
|
|
63
|
+
continue
|
|
64
|
+
payload = eff.get("payload") if isinstance(eff.get("payload"), dict) else {}
|
|
65
|
+
tool_calls = payload.get("tool_calls")
|
|
66
|
+
if not isinstance(tool_calls, list):
|
|
67
|
+
continue
|
|
68
|
+
result = step.get("result") if isinstance(step.get("result"), dict) else {}
|
|
69
|
+
results = result.get("results") if isinstance(result.get("results"), list) else []
|
|
70
|
+
|
|
71
|
+
for idx, tc in enumerate(tool_calls):
|
|
72
|
+
if not isinstance(tc, dict):
|
|
73
|
+
continue
|
|
74
|
+
name = str(tc.get("name") or "").strip()
|
|
75
|
+
if not name:
|
|
76
|
+
continue
|
|
77
|
+
args = tc.get("arguments")
|
|
78
|
+
if not isinstance(args, dict):
|
|
79
|
+
args = {}
|
|
80
|
+
|
|
81
|
+
success = None
|
|
82
|
+
error = None
|
|
83
|
+
if idx < len(results) and isinstance(results[idx], dict):
|
|
84
|
+
success = results[idx].get("success")
|
|
85
|
+
error = results[idx].get("error")
|
|
86
|
+
if success is not None:
|
|
87
|
+
success = bool(success)
|
|
88
|
+
if error is not None:
|
|
89
|
+
error = str(error)
|
|
90
|
+
|
|
91
|
+
events.append(
|
|
92
|
+
ToolCallEvent(
|
|
93
|
+
ts=str(step.get("ts") or ""),
|
|
94
|
+
name=name,
|
|
95
|
+
arguments=dict(args),
|
|
96
|
+
success=success,
|
|
97
|
+
error=error,
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
events.sort(key=lambda e: e.ts)
|
|
102
|
+
return events
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _json_key(value: Any) -> str:
|
|
106
|
+
try:
|
|
107
|
+
return json.dumps(value, sort_keys=True, ensure_ascii=False, default=str)
|
|
108
|
+
except Exception:
|
|
109
|
+
return str(value)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _summarize_tool_usage(events: List[ToolCallEvent]) -> Dict[str, Any]:
|
|
113
|
+
tool_counts: Dict[str, int] = {}
|
|
114
|
+
full_reads = 0
|
|
115
|
+
sliced_reads = 0
|
|
116
|
+
repeated_calls: Dict[str, int] = {}
|
|
117
|
+
edit_pattern_sizes: List[int] = []
|
|
118
|
+
failures: Dict[str, int] = {}
|
|
119
|
+
|
|
120
|
+
seen_call_keys: Dict[str, int] = {}
|
|
121
|
+
|
|
122
|
+
for e in events:
|
|
123
|
+
tool_counts[e.name] = tool_counts.get(e.name, 0) + 1
|
|
124
|
+
if e.success is False:
|
|
125
|
+
failures[e.name] = failures.get(e.name, 0) + 1
|
|
126
|
+
|
|
127
|
+
key = f"{e.name}:{_json_key(e.arguments)}"
|
|
128
|
+
seen_call_keys[key] = seen_call_keys.get(key, 0) + 1
|
|
129
|
+
|
|
130
|
+
if e.name == "read_file":
|
|
131
|
+
should_entire = e.arguments.get("should_read_entire_file", True)
|
|
132
|
+
start = (
|
|
133
|
+
e.arguments.get("start_line")
|
|
134
|
+
if e.arguments.get("start_line") is not None
|
|
135
|
+
else e.arguments.get("start_line_one_indexed", e.arguments.get("start", 1))
|
|
136
|
+
)
|
|
137
|
+
end = (
|
|
138
|
+
e.arguments.get("end_line")
|
|
139
|
+
if e.arguments.get("end_line") is not None
|
|
140
|
+
else e.arguments.get("end_line_one_indexed_inclusive", e.arguments.get("end"))
|
|
141
|
+
)
|
|
142
|
+
try:
|
|
143
|
+
start_i = int(start or 1)
|
|
144
|
+
except Exception:
|
|
145
|
+
start_i = 1
|
|
146
|
+
|
|
147
|
+
# If a range was requested (even via aliases), treat as a slice read.
|
|
148
|
+
if end is not None or start_i != 1 or bool(should_entire) is False:
|
|
149
|
+
sliced_reads += 1
|
|
150
|
+
else:
|
|
151
|
+
full_reads += 1
|
|
152
|
+
|
|
153
|
+
if e.name == "edit_file":
|
|
154
|
+
pattern = e.arguments.get("pattern")
|
|
155
|
+
if isinstance(pattern, str):
|
|
156
|
+
edit_pattern_sizes.append(len(pattern))
|
|
157
|
+
|
|
158
|
+
for k, v in seen_call_keys.items():
|
|
159
|
+
if v > 1:
|
|
160
|
+
repeated_calls[k] = v
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
"tool_counts": dict(sorted(tool_counts.items(), key=lambda kv: (-kv[1], kv[0]))),
|
|
164
|
+
"failures": dict(sorted(failures.items(), key=lambda kv: (-kv[1], kv[0]))),
|
|
165
|
+
"read_file_full": full_reads,
|
|
166
|
+
"read_file_sliced": sliced_reads,
|
|
167
|
+
"repeated_calls": dict(sorted(repeated_calls.items(), key=lambda kv: (-kv[1], kv[0]))),
|
|
168
|
+
"edit_pattern_sizes": {
|
|
169
|
+
"count": len(edit_pattern_sizes),
|
|
170
|
+
"max": max(edit_pattern_sizes) if edit_pattern_sizes else 0,
|
|
171
|
+
"p95": int(sorted(edit_pattern_sizes)[max(0, int(len(edit_pattern_sizes) * 0.95) - 1)]) if edit_pattern_sizes else 0,
|
|
172
|
+
},
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@dataclass(frozen=True)
|
|
177
|
+
class Scenario:
|
|
178
|
+
name: str
|
|
179
|
+
build: Callable[[Path], Dict[str, Any]]
|
|
180
|
+
prompt: Callable[[Dict[str, Any]], str]
|
|
181
|
+
verify: Callable[[Dict[str, Any]], Tuple[bool, str]]
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _make_python_scenario() -> Scenario:
|
|
185
|
+
def build(root: Path) -> Dict[str, Any]:
|
|
186
|
+
path = root / "main.py"
|
|
187
|
+
|
|
188
|
+
filler = "\n".join([f"# filler {i}" for i in range(1, 460)]) # >400 lines to refuse full read_file
|
|
189
|
+
path.write_text(
|
|
190
|
+
"\n".join(
|
|
191
|
+
[
|
|
192
|
+
"class Player:",
|
|
193
|
+
" def __init__(self):",
|
|
194
|
+
" # BUG: this should store the provided color",
|
|
195
|
+
" self.color = None",
|
|
196
|
+
"",
|
|
197
|
+
"def main():",
|
|
198
|
+
" p = Player('blue')",
|
|
199
|
+
" return p.color",
|
|
200
|
+
"",
|
|
201
|
+
filler,
|
|
202
|
+
"",
|
|
203
|
+
"if __name__ == '__main__':",
|
|
204
|
+
" print(main())",
|
|
205
|
+
"",
|
|
206
|
+
]
|
|
207
|
+
),
|
|
208
|
+
encoding="utf-8",
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
return {"file_path": str(path)}
|
|
212
|
+
|
|
213
|
+
def prompt(ctx: Dict[str, Any]) -> str:
|
|
214
|
+
return (
|
|
215
|
+
"Fix the following runtime error:\n"
|
|
216
|
+
"TypeError: Player.__init__() takes 1 positional argument but 2 were given\n\n"
|
|
217
|
+
f"Target file: {ctx['file_path']}\n\n"
|
|
218
|
+
"Constraints:\n"
|
|
219
|
+
"- Do NOT try to read the entire file (it is >400 lines; read_file(full) will refuse).\n"
|
|
220
|
+
"- Use analyze_code() first to locate the Player definition.\n"
|
|
221
|
+
"- Then use read_file(start_line/end_line) around the relevant blocks.\n"
|
|
222
|
+
"- Apply small, surgical edit_file() calls (short patterns; max_replacements=1).\n\n"
|
|
223
|
+
"Expected fix:\n"
|
|
224
|
+
"- Player.__init__ should accept an optional color parameter and set self.color\n"
|
|
225
|
+
"- Replace the \"self.color = None\" line with \"self.color = color\".\n"
|
|
226
|
+
"- Keep behavior otherwise unchanged.\n"
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
def verify(ctx: Dict[str, Any]) -> Tuple[bool, str]:
|
|
230
|
+
text = Path(ctx["file_path"]).read_text(encoding="utf-8")
|
|
231
|
+
def _sig_has_color(start_idx: int) -> bool:
|
|
232
|
+
if start_idx < 0:
|
|
233
|
+
return False
|
|
234
|
+
window = text[start_idx : start_idx + 300]
|
|
235
|
+
# Check only inside the signature portion (best-effort).
|
|
236
|
+
sig_end = window.find("):")
|
|
237
|
+
if sig_end == -1:
|
|
238
|
+
sig_end = window.find("):\n")
|
|
239
|
+
if sig_end == -1:
|
|
240
|
+
sig_end = window.find("):\r\n")
|
|
241
|
+
if sig_end == -1:
|
|
242
|
+
sig_end = min(len(window), 120)
|
|
243
|
+
signature = window[:sig_end]
|
|
244
|
+
return "color" in signature
|
|
245
|
+
|
|
246
|
+
player_idx = text.find("class Player")
|
|
247
|
+
ok_player = player_idx != -1 and _sig_has_color(text.find("def __init__", player_idx))
|
|
248
|
+
if ok_player:
|
|
249
|
+
window = text[player_idx : player_idx + 300]
|
|
250
|
+
ok_player = "self.color = color" in window or "self.color=color" in window
|
|
251
|
+
|
|
252
|
+
if ok_player:
|
|
253
|
+
return True, "Player constructor updated with color parameter."
|
|
254
|
+
return False, "Did not find expected __init__ signature/body update for Player."
|
|
255
|
+
|
|
256
|
+
return Scenario(name="python_ctor_mismatch_large_file", build=build, prompt=prompt, verify=verify)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _make_js_scenario() -> Scenario:
|
|
260
|
+
def build(root: Path) -> Dict[str, Any]:
|
|
261
|
+
path = root / "app.js"
|
|
262
|
+
filler = "\n".join([f"// filler {i}" for i in range(1, 460)]) # >400 lines to discourage full read_file
|
|
263
|
+
path.write_text(
|
|
264
|
+
"\n".join(
|
|
265
|
+
[
|
|
266
|
+
"export function greet(name) {",
|
|
267
|
+
" return `Hello ${name}`;",
|
|
268
|
+
"}",
|
|
269
|
+
"",
|
|
270
|
+
"export function run() {",
|
|
271
|
+
" // BUG: wrong function name",
|
|
272
|
+
" return greets('world');",
|
|
273
|
+
"}",
|
|
274
|
+
"",
|
|
275
|
+
filler,
|
|
276
|
+
"",
|
|
277
|
+
"console.log(run());",
|
|
278
|
+
"",
|
|
279
|
+
]
|
|
280
|
+
),
|
|
281
|
+
encoding="utf-8",
|
|
282
|
+
)
|
|
283
|
+
return {"file_path": str(path)}
|
|
284
|
+
|
|
285
|
+
def prompt(ctx: Dict[str, Any]) -> str:
|
|
286
|
+
return (
|
|
287
|
+
"Fix the following JavaScript error:\n"
|
|
288
|
+
"ReferenceError: greets is not defined\n\n"
|
|
289
|
+
f"Target file: {ctx['file_path']}\n\n"
|
|
290
|
+
"Constraints:\n"
|
|
291
|
+
"- Use analyze_code() first to locate greet/run.\n"
|
|
292
|
+
"- Use read_file(start_line/end_line) around the run() function.\n"
|
|
293
|
+
"- Make a minimal edit_file() change: call greet('world') instead of greets('world').\n"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
def verify(ctx: Dict[str, Any]) -> Tuple[bool, str]:
|
|
297
|
+
text = Path(ctx["file_path"]).read_text(encoding="utf-8")
|
|
298
|
+
if "return greet('world');" in text:
|
|
299
|
+
return True, "Fixed call to greet()."
|
|
300
|
+
return False, "Did not find expected replacement greets(...) -> greet(...)."
|
|
301
|
+
|
|
302
|
+
return Scenario(name="js_reference_error", build=build, prompt=prompt, verify=verify)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _print_heading(title: str) -> None:
|
|
306
|
+
print("\n" + "=" * 80)
|
|
307
|
+
print(title)
|
|
308
|
+
print("=" * 80)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _run_scenarios(*, scenarios: List[Scenario]) -> int:
|
|
312
|
+
provider, model, llm_kwargs = _llm_config()
|
|
313
|
+
_print_heading(f"LMStudio Tool Eval | provider={provider} | model={model} | base_url={llm_kwargs.get('base_url') or '(default)'}")
|
|
314
|
+
|
|
315
|
+
from abstractagent.agents.react import create_react_agent
|
|
316
|
+
|
|
317
|
+
run_stamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%SZ")
|
|
318
|
+
out_root = Path(os.getenv("AF_TOOL_EVAL_OUT_DIR", "test_results/tool_eval")).expanduser().absolute()
|
|
319
|
+
out_root.mkdir(parents=True, exist_ok=True)
|
|
320
|
+
run_dir = out_root / f"lmstudio_tool_eval_{run_stamp}"
|
|
321
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
|
322
|
+
|
|
323
|
+
aggregate: List[Dict[str, Any]] = []
|
|
324
|
+
|
|
325
|
+
for s in scenarios:
|
|
326
|
+
with tempfile.TemporaryDirectory(prefix="af_tool_eval_") as td:
|
|
327
|
+
root = Path(td)
|
|
328
|
+
ctx = s.build(root)
|
|
329
|
+
scenario_dir = run_dir / s.name
|
|
330
|
+
scenario_dir.mkdir(parents=True, exist_ok=True)
|
|
331
|
+
|
|
332
|
+
agent = create_react_agent(
|
|
333
|
+
provider=provider,
|
|
334
|
+
model=model,
|
|
335
|
+
llm_kwargs=llm_kwargs,
|
|
336
|
+
max_iterations=20,
|
|
337
|
+
max_tokens=8192,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
agent.start(s.prompt(ctx))
|
|
341
|
+
state = agent.run_to_completion()
|
|
342
|
+
|
|
343
|
+
traces = agent.get_node_traces()
|
|
344
|
+
events = _iter_tool_calls(traces)
|
|
345
|
+
summary = _summarize_tool_usage(events)
|
|
346
|
+
|
|
347
|
+
ok, msg = s.verify(ctx)
|
|
348
|
+
answer = ""
|
|
349
|
+
try:
|
|
350
|
+
answer = str((state.output or {}).get("answer") or "")
|
|
351
|
+
except Exception:
|
|
352
|
+
answer = ""
|
|
353
|
+
|
|
354
|
+
record = {
|
|
355
|
+
"scenario": s.name,
|
|
356
|
+
"status": getattr(state.status, "value", str(state.status)),
|
|
357
|
+
"verify_ok": ok,
|
|
358
|
+
"verify_msg": msg,
|
|
359
|
+
"tool_summary": summary,
|
|
360
|
+
"answer": answer,
|
|
361
|
+
}
|
|
362
|
+
aggregate.append(record)
|
|
363
|
+
|
|
364
|
+
_print_heading(f"Scenario: {s.name}")
|
|
365
|
+
print(f"Run status: {record['status']} | verify_ok={ok}")
|
|
366
|
+
print(f"Verify: {msg}")
|
|
367
|
+
print("Tool counts:", summary["tool_counts"])
|
|
368
|
+
if summary["failures"]:
|
|
369
|
+
print("Tool failures:", summary["failures"])
|
|
370
|
+
print(f"read_file(full)={summary['read_file_full']} | read_file(slice)={summary['read_file_sliced']}")
|
|
371
|
+
if summary["repeated_calls"]:
|
|
372
|
+
# Print only the top few repeated calls to keep output small.
|
|
373
|
+
top = list(summary["repeated_calls"].items())[:3]
|
|
374
|
+
print("Repeated identical calls (top 3):", {k: v for k, v in top})
|
|
375
|
+
|
|
376
|
+
# Persist scenario traces + final file snapshot for deep inspection.
|
|
377
|
+
try:
|
|
378
|
+
(scenario_dir / "node_traces.json").write_text(_json_key(traces), encoding="utf-8")
|
|
379
|
+
(scenario_dir / "summary.json").write_text(_json_key(record), encoding="utf-8")
|
|
380
|
+
file_path = Path(str(ctx.get("file_path") or "")).expanduser()
|
|
381
|
+
if file_path.exists() and file_path.is_file():
|
|
382
|
+
(scenario_dir / f"final_{file_path.name}").write_text(file_path.read_text(encoding="utf-8"), encoding="utf-8")
|
|
383
|
+
except Exception:
|
|
384
|
+
pass
|
|
385
|
+
|
|
386
|
+
# Aggregate: report issues that repeat across scenarios.
|
|
387
|
+
_print_heading("Aggregate (repetitive-only)")
|
|
388
|
+
repeats: Dict[str, int] = {}
|
|
389
|
+
for r in aggregate:
|
|
390
|
+
ts = r.get("tool_summary") or {}
|
|
391
|
+
if isinstance(ts, dict):
|
|
392
|
+
if (ts.get("read_file_full") or 0) > 0:
|
|
393
|
+
repeats["read_file_full_used"] = repeats.get("read_file_full_used", 0) + 1
|
|
394
|
+
tool_counts = ts.get("tool_counts") or {}
|
|
395
|
+
if isinstance(tool_counts, dict) and tool_counts.get("analyze_code", 0) == 0:
|
|
396
|
+
repeats["analyze_code_not_used"] = repeats.get("analyze_code_not_used", 0) + 1
|
|
397
|
+
failures = ts.get("failures") or {}
|
|
398
|
+
if isinstance(failures, dict) and failures.get("edit_file", 0) > 0:
|
|
399
|
+
repeats["edit_file_failures"] = repeats.get("edit_file_failures", 0) + 1
|
|
400
|
+
|
|
401
|
+
if not repeats:
|
|
402
|
+
print("No repetitive issues detected across scenarios (based on basic heuristics).")
|
|
403
|
+
else:
|
|
404
|
+
for k, v in sorted(repeats.items(), key=lambda kv: (-kv[1], kv[0])):
|
|
405
|
+
if v > 1:
|
|
406
|
+
print(f"- {k}: occurred in {v}/{len(aggregate)} scenarios")
|
|
407
|
+
|
|
408
|
+
print(f"\nSaved eval artifacts to: {run_dir}")
|
|
409
|
+
|
|
410
|
+
# Exit code: non-zero if any scenario verification failed.
|
|
411
|
+
failed = [r for r in aggregate if not r.get("verify_ok")]
|
|
412
|
+
if failed:
|
|
413
|
+
print(f"\nFailures: {len(failed)}/{len(aggregate)} scenarios did not meet verification checks.")
|
|
414
|
+
for r in failed:
|
|
415
|
+
print(f"- {r['scenario']}: status={r['status']} verify={r['verify_msg']}")
|
|
416
|
+
return 1
|
|
417
|
+
return 0
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def main() -> int:
|
|
421
|
+
scenarios = [_make_python_scenario(), _make_js_scenario()]
|
|
422
|
+
return _run_scenarios(scenarios=scenarios)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
if __name__ == "__main__":
|
|
426
|
+
raise SystemExit(main())
|
abstractagent/tools/__init__.py
CHANGED
|
@@ -7,7 +7,10 @@ Agent-specific tools (execute_python, self_improve) are defined locally.
|
|
|
7
7
|
# Import common tools from AbstractCore (canonical source)
|
|
8
8
|
from abstractcore.tools.common_tools import (
|
|
9
9
|
list_files,
|
|
10
|
+
skim_folders,
|
|
11
|
+
analyze_code,
|
|
10
12
|
read_file,
|
|
13
|
+
skim_files,
|
|
11
14
|
search_files,
|
|
12
15
|
write_file,
|
|
13
16
|
edit_file,
|
|
@@ -24,7 +27,10 @@ from .self_improve import self_improve
|
|
|
24
27
|
ALL_TOOLS = [
|
|
25
28
|
# File operations (from abstractcore)
|
|
26
29
|
list_files,
|
|
30
|
+
skim_folders,
|
|
31
|
+
analyze_code,
|
|
27
32
|
read_file,
|
|
33
|
+
skim_files,
|
|
28
34
|
search_files,
|
|
29
35
|
write_file,
|
|
30
36
|
edit_file,
|
|
@@ -41,7 +47,10 @@ ALL_TOOLS = [
|
|
|
41
47
|
__all__ = [
|
|
42
48
|
# File operations
|
|
43
49
|
"list_files",
|
|
50
|
+
"skim_folders",
|
|
51
|
+
"analyze_code",
|
|
44
52
|
"read_file",
|
|
53
|
+
"skim_files",
|
|
45
54
|
"search_files",
|
|
46
55
|
"write_file",
|
|
47
56
|
"edit_file",
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: abstractagent
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: Agent implementations using AbstractRuntime and AbstractCore
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Dist: abstractcore[tools]
|
|
9
|
+
Requires-Dist: abstractruntime
|
|
10
|
+
Provides-Extra: dev
|
|
11
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
12
|
+
Dynamic: license-file
|
|
13
|
+
|
|
14
|
+
# AbstractAgent
|
|
15
|
+
|
|
16
|
+
Agent patterns (ReAct / CodeAct / MemAct) built on **AbstractRuntime** (durable execution) and **AbstractCore** (tools + LLM integration).
|
|
17
|
+
|
|
18
|
+
Start here: [`docs/getting-started.md`](docs/getting-started.md) (then [`docs/README.md`](docs/README.md) for the full index)
|
|
19
|
+
|
|
20
|
+
## Documentation
|
|
21
|
+
|
|
22
|
+
- Getting started: [`docs/getting-started.md`](docs/getting-started.md)
|
|
23
|
+
- API reference: [`docs/api.md`](docs/api.md)
|
|
24
|
+
- FAQ / troubleshooting: [`docs/faq.md`](docs/faq.md)
|
|
25
|
+
- Architecture (diagrams): [`docs/architecture.md`](docs/architecture.md)
|
|
26
|
+
- Changelog: [`CHANGELOG.md`](CHANGELOG.md)
|
|
27
|
+
- Contributing: [`CONTRIBUTING.md`](CONTRIBUTING.md)
|
|
28
|
+
- Security: [`SECURITY.md`](SECURITY.md)
|
|
29
|
+
- Acknowledgements: [`ACKNOWLEDMENTS.md`](ACKNOWLEDMENTS.md)
|
|
30
|
+
|
|
31
|
+
## What you get
|
|
32
|
+
|
|
33
|
+
- **ReAct**: tool-first Reason → Act → Observe loop
|
|
34
|
+
- **CodeAct**: executes Python (tool call or fenced ` ```python``` ` blocks)
|
|
35
|
+
- **MemAct**: memory-enhanced agent using runtime-owned Active Memory
|
|
36
|
+
- **Durable runs**: pause/resume via `run_id` + runtime stores
|
|
37
|
+
- **Tool control**: explicit tool bundles + per-run allowlists
|
|
38
|
+
- **Observability**: durable ledger of LLM calls, tool calls, and waits
|
|
39
|
+
|
|
40
|
+
## Installation
|
|
41
|
+
|
|
42
|
+
From source (development):
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install -e .
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
With dev dependencies:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install -e ".[dev]"
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
From PyPI:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install abstractagent
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Note: the repository may be ahead of the latest published PyPI release. To verify what you installed:
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
python -c "import importlib.metadata as md; print(md.version('abstractagent'))"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Quick start (ReAct)
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from abstractagent import create_react_agent
|
|
70
|
+
|
|
71
|
+
agent = create_react_agent(provider="ollama", model="qwen3:1.7b-q4_K_M")
|
|
72
|
+
agent.start("List the files in the current directory")
|
|
73
|
+
state = agent.run_to_completion()
|
|
74
|
+
print(state.output["answer"])
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Persistence (resume across restarts)
|
|
78
|
+
|
|
79
|
+
By default, the factory helpers use an in-memory runtime store. For resume across process restarts,
|
|
80
|
+
pass a persistent `RunStore`/`LedgerStore` (example below uses JSON files).
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from abstractagent import create_react_agent
|
|
84
|
+
from abstractruntime.storage.json_files import JsonFileRunStore, JsonlLedgerStore
|
|
85
|
+
|
|
86
|
+
run_store = JsonFileRunStore(".runs")
|
|
87
|
+
ledger_store = JsonlLedgerStore(".runs")
|
|
88
|
+
|
|
89
|
+
agent = create_react_agent(run_store=run_store, ledger_store=ledger_store)
|
|
90
|
+
agent.start("Long running task")
|
|
91
|
+
agent.save_state("agent_state.json")
|
|
92
|
+
|
|
93
|
+
# ... later / after restart ...
|
|
94
|
+
|
|
95
|
+
agent2 = create_react_agent(run_store=run_store, ledger_store=ledger_store)
|
|
96
|
+
agent2.load_state("agent_state.json")
|
|
97
|
+
state = agent2.run_to_completion()
|
|
98
|
+
print(state.output["answer"])
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
More details: [`docs/persistence.md`](docs/persistence.md)
|
|
102
|
+
|
|
103
|
+
## CLI
|
|
104
|
+
|
|
105
|
+
This repository still installs a `react-agent` entrypoint, but it is **deprecated** and only prints a migration hint
|
|
106
|
+
(see `src/abstractagent/repl.py` and `pyproject.toml`).
|
|
107
|
+
|
|
108
|
+
Interactive UX lives in **AbstractCode**.
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
MIT (see `LICENSE`).
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
abstractagent/__init__.py,sha256=-o1xKOyT3rhA67Iey2RCeZzU5SiFLB1mX4F2tAKH98Y,948
|
|
2
|
+
abstractagent/repl.py,sha256=6TWVxEYAAXLR5jjrc3FKmEy4Sm4xzdX5REQKx-lH35g,1089
|
|
3
|
+
abstractagent/adapters/__init__.py,sha256=k_7Lz8nOoMHmGeYrSbIMtLXIvmqJTzOo33g7v8C-qrM,284
|
|
4
|
+
abstractagent/adapters/codeact_runtime.py,sha256=wxFocTOIVAtHujnVH92hVjiPbsisq1AzQkyMzimzfao,52842
|
|
5
|
+
abstractagent/adapters/generation_params.py,sha256=hWAn13Nj30-hItL_3IjvoALX7q77TJ70U1qFj2a5Y7k,2958
|
|
6
|
+
abstractagent/adapters/media.py,sha256=BSNfLswkKIzy9C9d3FwfphHj3JsdAJlUVW3phC9mR44,1295
|
|
7
|
+
abstractagent/adapters/memact_runtime.py,sha256=g0yC4ZXU41lNxqju2OzVxMWz7PyvxdbRRuR0mazLqjc,40179
|
|
8
|
+
abstractagent/adapters/react_runtime.py,sha256=skt4jb9_LIFYUngWdWHItj-lpced5NCMOvWzfWDEx2w,67534
|
|
9
|
+
abstractagent/agents/__init__.py,sha256=9U15lIVgGBZVsmDCKtz4AyTJFTI3nUq4ej8Ugk8lvhM,548
|
|
10
|
+
abstractagent/agents/base.py,sha256=KnexFUCPBFNm7gBrlvDcCfCNigw4YdoQn5rowbk2ivs,18075
|
|
11
|
+
abstractagent/agents/codeact.py,sha256=knqr-ABRDrT4O9kJMJ_YjT36tqN7kWI6L8Fd3MUvT78,11112
|
|
12
|
+
abstractagent/agents/memact.py,sha256=6Z3h4tpFndInSci1_j0J7vo8LW_ENhc3Vmbg6WwPVpg,10082
|
|
13
|
+
abstractagent/agents/react.py,sha256=6sbVsRhXe696WhRXmQUQNou7f1zLBpg5b6PIXwDs9xI,11668
|
|
14
|
+
abstractagent/logic/__init__.py,sha256=QeUxFEyaoi4ymPVEwno1mirBpMP77EtWJa-pTHakY2g,524
|
|
15
|
+
abstractagent/logic/builtins.py,sha256=9gy6oiOjczCnmh5IHin-Oyeqm4D6egZWeFL-p0Ddbok,11296
|
|
16
|
+
abstractagent/logic/codeact.py,sha256=dnPCHXqxO1JajrBeq_AAC8IbrXKL-CrA4InETySTdNg,7075
|
|
17
|
+
abstractagent/logic/memact.py,sha256=xaPfbeVlEMTveDL5FU4HkBeehIGb3D_-a2SY_pXW2yI,5023
|
|
18
|
+
abstractagent/logic/react.py,sha256=AyNYCfingqlbzIXb8M5j6BBmajVHYO3yZqQepCORWpk,7805
|
|
19
|
+
abstractagent/logic/types.py,sha256=idLhxgBaN3LY7tDmwMNL9hgWCKM5P1BRNv5YYkXMy80,595
|
|
20
|
+
abstractagent/sandbox/__init__.py,sha256=Vay_BJzmfMcf-cAUoapDbckCyhHGQHFzObVR98x2j3g,186
|
|
21
|
+
abstractagent/sandbox/interface.py,sha256=DtL_RYaXgPojW-vUIgEPi_69sleSNQU-YCWrwZ_WSoI,462
|
|
22
|
+
abstractagent/sandbox/local.py,sha256=mFQzFgec0qgJ8VNbB0ZjeZi58BgrIE0frMFsasKe3iI,2143
|
|
23
|
+
abstractagent/scripts/__init__.py,sha256=paHuNvq6FzV1nVyGV-ixPW8K1rumoLpSvwcMIbP0y8U,130
|
|
24
|
+
abstractagent/scripts/lmstudio_tool_eval.py,sha256=FX_rvbLFY0KeT38i2e4p6htffzEiAR1mBXQhd6dK3zE,16568
|
|
25
|
+
abstractagent/tools/__init__.py,sha256=p0msxgQCB2p_eB5d5Jk0lnGyA2-BXmqozAY-Od0zDq4,1358
|
|
26
|
+
abstractagent/tools/code_execution.py,sha256=rQBJXfTGdCxVSr7SqTH17mQ9ylImNDcT5R7_bBIvygA,1417
|
|
27
|
+
abstractagent/tools/self_improve.py,sha256=yOtsa0iS5OxVPLpu3MN_fPxPTEMl3Qnm1GWYJHFsZe8,1732
|
|
28
|
+
abstractagent-0.3.1.dist-info/licenses/LICENSE,sha256=6rL4UIO5IdK59THf7fx0q6Hmxp5grSFi7-kWLcczseA,1083
|
|
29
|
+
abstractagent-0.3.1.dist-info/METADATA,sha256=O3LTdZj2V_xJm4PBoEoGPPEcexBezrI0OnQ5scX1kDo,3309
|
|
30
|
+
abstractagent-0.3.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
31
|
+
abstractagent-0.3.1.dist-info/entry_points.txt,sha256=tEaR0KtY-chcgRTd6ZVkvsqqaDpw2rvi-1RFktHGczU,56
|
|
32
|
+
abstractagent-0.3.1.dist-info/top_level.txt,sha256=cgtC3Vjz_piTAMmRkd73tbUxk2jPeG3IxMbo7JK3RTU,14
|
|
33
|
+
abstractagent-0.3.1.dist-info/RECORD,,
|