abstractagent 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractagent/adapters/__init__.py +2 -1
- abstractagent/adapters/codeact_runtime.py +823 -57
- abstractagent/adapters/memact_runtime.py +721 -0
- abstractagent/adapters/react_runtime.py +1114 -67
- abstractagent/agents/__init__.py +4 -0
- abstractagent/agents/base.py +58 -1
- abstractagent/agents/codeact.py +89 -18
- abstractagent/agents/memact.py +244 -0
- abstractagent/agents/react.py +91 -18
- abstractagent/logic/__init__.py +2 -0
- abstractagent/logic/builtins.py +212 -5
- abstractagent/logic/codeact.py +87 -80
- abstractagent/logic/memact.py +127 -0
- abstractagent/logic/react.py +108 -48
- abstractagent/repl.py +24 -447
- abstractagent/scripts/__init__.py +5 -0
- abstractagent/scripts/lmstudio_tool_eval.py +426 -0
- abstractagent/tools/__init__.py +3 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/METADATA +10 -11
- abstractagent-0.3.0.dist-info/RECORD +31 -0
- abstractagent/ui/__init__.py +0 -5
- abstractagent/ui/question.py +0 -197
- abstractagent-0.2.0.dist-info/RECORD +0 -28
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/WHEEL +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/entry_points.txt +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {abstractagent-0.2.0.dist-info → abstractagent-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
"""LMStudio tool-synergy evaluation harness (manual, real LLM).
|
|
2
|
+
|
|
3
|
+
Runs a few deterministic scenarios against a real provider/model (e.g. LMStudio
|
|
4
|
+
`qwen/qwen3-next-80b`) and prints a compact summary of tool usage patterns:
|
|
5
|
+
- whether the model uses analyze_code/search_files before bounded reads
|
|
6
|
+
- whether edits succeed without repeated identical retries
|
|
7
|
+
- how often it reads whole files vs slices
|
|
8
|
+
|
|
9
|
+
This is intentionally NOT a required CI test. It should be run manually.
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
ABSTRACTAGENT_TEST_PROVIDER=lmstudio \
|
|
13
|
+
ABSTRACTAGENT_TEST_MODEL=qwen/qwen3-next-80b \
|
|
14
|
+
ABSTRACTAGENT_TEST_BASE_URL=http://localhost:1234/v1 \
|
|
15
|
+
python -m abstractagent.scripts.lmstudio_tool_eval
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import tempfile
|
|
23
|
+
from datetime import datetime, timezone
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _llm_config() -> Tuple[str, str, Dict[str, Any]]:
|
|
30
|
+
provider = os.getenv("ABSTRACTAGENT_TEST_PROVIDER", "lmstudio")
|
|
31
|
+
model = os.getenv("ABSTRACTAGENT_TEST_MODEL", "qwen/qwen3-next-80b")
|
|
32
|
+
base_url = os.getenv("ABSTRACTAGENT_TEST_BASE_URL")
|
|
33
|
+
|
|
34
|
+
llm_kwargs: Dict[str, Any] = {"temperature": 0}
|
|
35
|
+
llm_kwargs["seed"] = 42
|
|
36
|
+
if base_url:
|
|
37
|
+
llm_kwargs["base_url"] = base_url
|
|
38
|
+
return provider, model, llm_kwargs
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class ToolCallEvent:
|
|
43
|
+
ts: str
|
|
44
|
+
name: str
|
|
45
|
+
arguments: Dict[str, Any]
|
|
46
|
+
success: Optional[bool]
|
|
47
|
+
error: Optional[str]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _iter_tool_calls(traces: Dict[str, Any]) -> List[ToolCallEvent]:
|
|
51
|
+
events: List[ToolCallEvent] = []
|
|
52
|
+
for _node_id, trace in (traces or {}).items():
|
|
53
|
+
steps = trace.get("steps") if isinstance(trace, dict) else None
|
|
54
|
+
if not isinstance(steps, list):
|
|
55
|
+
continue
|
|
56
|
+
for step in steps:
|
|
57
|
+
if not isinstance(step, dict):
|
|
58
|
+
continue
|
|
59
|
+
eff = step.get("effect")
|
|
60
|
+
if not isinstance(eff, dict):
|
|
61
|
+
continue
|
|
62
|
+
if eff.get("type") != "tool_calls":
|
|
63
|
+
continue
|
|
64
|
+
payload = eff.get("payload") if isinstance(eff.get("payload"), dict) else {}
|
|
65
|
+
tool_calls = payload.get("tool_calls")
|
|
66
|
+
if not isinstance(tool_calls, list):
|
|
67
|
+
continue
|
|
68
|
+
result = step.get("result") if isinstance(step.get("result"), dict) else {}
|
|
69
|
+
results = result.get("results") if isinstance(result.get("results"), list) else []
|
|
70
|
+
|
|
71
|
+
for idx, tc in enumerate(tool_calls):
|
|
72
|
+
if not isinstance(tc, dict):
|
|
73
|
+
continue
|
|
74
|
+
name = str(tc.get("name") or "").strip()
|
|
75
|
+
if not name:
|
|
76
|
+
continue
|
|
77
|
+
args = tc.get("arguments")
|
|
78
|
+
if not isinstance(args, dict):
|
|
79
|
+
args = {}
|
|
80
|
+
|
|
81
|
+
success = None
|
|
82
|
+
error = None
|
|
83
|
+
if idx < len(results) and isinstance(results[idx], dict):
|
|
84
|
+
success = results[idx].get("success")
|
|
85
|
+
error = results[idx].get("error")
|
|
86
|
+
if success is not None:
|
|
87
|
+
success = bool(success)
|
|
88
|
+
if error is not None:
|
|
89
|
+
error = str(error)
|
|
90
|
+
|
|
91
|
+
events.append(
|
|
92
|
+
ToolCallEvent(
|
|
93
|
+
ts=str(step.get("ts") or ""),
|
|
94
|
+
name=name,
|
|
95
|
+
arguments=dict(args),
|
|
96
|
+
success=success,
|
|
97
|
+
error=error,
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
events.sort(key=lambda e: e.ts)
|
|
102
|
+
return events
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _json_key(value: Any) -> str:
|
|
106
|
+
try:
|
|
107
|
+
return json.dumps(value, sort_keys=True, ensure_ascii=False, default=str)
|
|
108
|
+
except Exception:
|
|
109
|
+
return str(value)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _summarize_tool_usage(events: List[ToolCallEvent]) -> Dict[str, Any]:
|
|
113
|
+
tool_counts: Dict[str, int] = {}
|
|
114
|
+
full_reads = 0
|
|
115
|
+
sliced_reads = 0
|
|
116
|
+
repeated_calls: Dict[str, int] = {}
|
|
117
|
+
edit_pattern_sizes: List[int] = []
|
|
118
|
+
failures: Dict[str, int] = {}
|
|
119
|
+
|
|
120
|
+
seen_call_keys: Dict[str, int] = {}
|
|
121
|
+
|
|
122
|
+
for e in events:
|
|
123
|
+
tool_counts[e.name] = tool_counts.get(e.name, 0) + 1
|
|
124
|
+
if e.success is False:
|
|
125
|
+
failures[e.name] = failures.get(e.name, 0) + 1
|
|
126
|
+
|
|
127
|
+
key = f"{e.name}:{_json_key(e.arguments)}"
|
|
128
|
+
seen_call_keys[key] = seen_call_keys.get(key, 0) + 1
|
|
129
|
+
|
|
130
|
+
if e.name == "read_file":
|
|
131
|
+
should_entire = e.arguments.get("should_read_entire_file", True)
|
|
132
|
+
start = (
|
|
133
|
+
e.arguments.get("start_line")
|
|
134
|
+
if e.arguments.get("start_line") is not None
|
|
135
|
+
else e.arguments.get("start_line_one_indexed", e.arguments.get("start", 1))
|
|
136
|
+
)
|
|
137
|
+
end = (
|
|
138
|
+
e.arguments.get("end_line")
|
|
139
|
+
if e.arguments.get("end_line") is not None
|
|
140
|
+
else e.arguments.get("end_line_one_indexed_inclusive", e.arguments.get("end"))
|
|
141
|
+
)
|
|
142
|
+
try:
|
|
143
|
+
start_i = int(start or 1)
|
|
144
|
+
except Exception:
|
|
145
|
+
start_i = 1
|
|
146
|
+
|
|
147
|
+
# If a range was requested (even via aliases), treat as a slice read.
|
|
148
|
+
if end is not None or start_i != 1 or bool(should_entire) is False:
|
|
149
|
+
sliced_reads += 1
|
|
150
|
+
else:
|
|
151
|
+
full_reads += 1
|
|
152
|
+
|
|
153
|
+
if e.name == "edit_file":
|
|
154
|
+
pattern = e.arguments.get("pattern")
|
|
155
|
+
if isinstance(pattern, str):
|
|
156
|
+
edit_pattern_sizes.append(len(pattern))
|
|
157
|
+
|
|
158
|
+
for k, v in seen_call_keys.items():
|
|
159
|
+
if v > 1:
|
|
160
|
+
repeated_calls[k] = v
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
"tool_counts": dict(sorted(tool_counts.items(), key=lambda kv: (-kv[1], kv[0]))),
|
|
164
|
+
"failures": dict(sorted(failures.items(), key=lambda kv: (-kv[1], kv[0]))),
|
|
165
|
+
"read_file_full": full_reads,
|
|
166
|
+
"read_file_sliced": sliced_reads,
|
|
167
|
+
"repeated_calls": dict(sorted(repeated_calls.items(), key=lambda kv: (-kv[1], kv[0]))),
|
|
168
|
+
"edit_pattern_sizes": {
|
|
169
|
+
"count": len(edit_pattern_sizes),
|
|
170
|
+
"max": max(edit_pattern_sizes) if edit_pattern_sizes else 0,
|
|
171
|
+
"p95": int(sorted(edit_pattern_sizes)[max(0, int(len(edit_pattern_sizes) * 0.95) - 1)]) if edit_pattern_sizes else 0,
|
|
172
|
+
},
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@dataclass(frozen=True)
|
|
177
|
+
class Scenario:
|
|
178
|
+
name: str
|
|
179
|
+
build: Callable[[Path], Dict[str, Any]]
|
|
180
|
+
prompt: Callable[[Dict[str, Any]], str]
|
|
181
|
+
verify: Callable[[Dict[str, Any]], Tuple[bool, str]]
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _make_python_scenario() -> Scenario:
|
|
185
|
+
def build(root: Path) -> Dict[str, Any]:
|
|
186
|
+
path = root / "main.py"
|
|
187
|
+
|
|
188
|
+
filler = "\n".join([f"# filler {i}" for i in range(1, 460)]) # >400 lines to refuse full read_file
|
|
189
|
+
path.write_text(
|
|
190
|
+
"\n".join(
|
|
191
|
+
[
|
|
192
|
+
"class Player:",
|
|
193
|
+
" def __init__(self):",
|
|
194
|
+
" # BUG: this should store the provided color",
|
|
195
|
+
" self.color = None",
|
|
196
|
+
"",
|
|
197
|
+
"def main():",
|
|
198
|
+
" p = Player('blue')",
|
|
199
|
+
" return p.color",
|
|
200
|
+
"",
|
|
201
|
+
filler,
|
|
202
|
+
"",
|
|
203
|
+
"if __name__ == '__main__':",
|
|
204
|
+
" print(main())",
|
|
205
|
+
"",
|
|
206
|
+
]
|
|
207
|
+
),
|
|
208
|
+
encoding="utf-8",
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
return {"file_path": str(path)}
|
|
212
|
+
|
|
213
|
+
def prompt(ctx: Dict[str, Any]) -> str:
|
|
214
|
+
return (
|
|
215
|
+
"Fix the following runtime error:\n"
|
|
216
|
+
"TypeError: Player.__init__() takes 1 positional argument but 2 were given\n\n"
|
|
217
|
+
f"Target file: {ctx['file_path']}\n\n"
|
|
218
|
+
"Constraints:\n"
|
|
219
|
+
"- Do NOT try to read the entire file (it is >400 lines; read_file(full) will refuse).\n"
|
|
220
|
+
"- Use analyze_code() first to locate the Player definition.\n"
|
|
221
|
+
"- Then use read_file(start_line/end_line) around the relevant blocks.\n"
|
|
222
|
+
"- Apply small, surgical edit_file() calls (short patterns; max_replacements=1).\n\n"
|
|
223
|
+
"Expected fix:\n"
|
|
224
|
+
"- Player.__init__ should accept an optional color parameter and set self.color\n"
|
|
225
|
+
"- Replace the \"self.color = None\" line with \"self.color = color\".\n"
|
|
226
|
+
"- Keep behavior otherwise unchanged.\n"
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
def verify(ctx: Dict[str, Any]) -> Tuple[bool, str]:
|
|
230
|
+
text = Path(ctx["file_path"]).read_text(encoding="utf-8")
|
|
231
|
+
def _sig_has_color(start_idx: int) -> bool:
|
|
232
|
+
if start_idx < 0:
|
|
233
|
+
return False
|
|
234
|
+
window = text[start_idx : start_idx + 300]
|
|
235
|
+
# Check only inside the signature portion (best-effort).
|
|
236
|
+
sig_end = window.find("):")
|
|
237
|
+
if sig_end == -1:
|
|
238
|
+
sig_end = window.find("):\n")
|
|
239
|
+
if sig_end == -1:
|
|
240
|
+
sig_end = window.find("):\r\n")
|
|
241
|
+
if sig_end == -1:
|
|
242
|
+
sig_end = min(len(window), 120)
|
|
243
|
+
signature = window[:sig_end]
|
|
244
|
+
return "color" in signature
|
|
245
|
+
|
|
246
|
+
player_idx = text.find("class Player")
|
|
247
|
+
ok_player = player_idx != -1 and _sig_has_color(text.find("def __init__", player_idx))
|
|
248
|
+
if ok_player:
|
|
249
|
+
window = text[player_idx : player_idx + 300]
|
|
250
|
+
ok_player = "self.color = color" in window or "self.color=color" in window
|
|
251
|
+
|
|
252
|
+
if ok_player:
|
|
253
|
+
return True, "Player constructor updated with color parameter."
|
|
254
|
+
return False, "Did not find expected __init__ signature/body update for Player."
|
|
255
|
+
|
|
256
|
+
return Scenario(name="python_ctor_mismatch_large_file", build=build, prompt=prompt, verify=verify)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _make_js_scenario() -> Scenario:
|
|
260
|
+
def build(root: Path) -> Dict[str, Any]:
|
|
261
|
+
path = root / "app.js"
|
|
262
|
+
filler = "\n".join([f"// filler {i}" for i in range(1, 460)]) # >400 lines to discourage full read_file
|
|
263
|
+
path.write_text(
|
|
264
|
+
"\n".join(
|
|
265
|
+
[
|
|
266
|
+
"export function greet(name) {",
|
|
267
|
+
" return `Hello ${name}`;",
|
|
268
|
+
"}",
|
|
269
|
+
"",
|
|
270
|
+
"export function run() {",
|
|
271
|
+
" // BUG: wrong function name",
|
|
272
|
+
" return greets('world');",
|
|
273
|
+
"}",
|
|
274
|
+
"",
|
|
275
|
+
filler,
|
|
276
|
+
"",
|
|
277
|
+
"console.log(run());",
|
|
278
|
+
"",
|
|
279
|
+
]
|
|
280
|
+
),
|
|
281
|
+
encoding="utf-8",
|
|
282
|
+
)
|
|
283
|
+
return {"file_path": str(path)}
|
|
284
|
+
|
|
285
|
+
def prompt(ctx: Dict[str, Any]) -> str:
|
|
286
|
+
return (
|
|
287
|
+
"Fix the following JavaScript error:\n"
|
|
288
|
+
"ReferenceError: greets is not defined\n\n"
|
|
289
|
+
f"Target file: {ctx['file_path']}\n\n"
|
|
290
|
+
"Constraints:\n"
|
|
291
|
+
"- Use analyze_code() first to locate greet/run.\n"
|
|
292
|
+
"- Use read_file(start_line/end_line) around the run() function.\n"
|
|
293
|
+
"- Make a minimal edit_file() change: call greet('world') instead of greets('world').\n"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
def verify(ctx: Dict[str, Any]) -> Tuple[bool, str]:
|
|
297
|
+
text = Path(ctx["file_path"]).read_text(encoding="utf-8")
|
|
298
|
+
if "return greet('world');" in text:
|
|
299
|
+
return True, "Fixed call to greet()."
|
|
300
|
+
return False, "Did not find expected replacement greets(...) -> greet(...)."
|
|
301
|
+
|
|
302
|
+
return Scenario(name="js_reference_error", build=build, prompt=prompt, verify=verify)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _print_heading(title: str) -> None:
|
|
306
|
+
print("\n" + "=" * 80)
|
|
307
|
+
print(title)
|
|
308
|
+
print("=" * 80)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _run_scenarios(*, scenarios: List[Scenario]) -> int:
|
|
312
|
+
provider, model, llm_kwargs = _llm_config()
|
|
313
|
+
_print_heading(f"LMStudio Tool Eval | provider={provider} | model={model} | base_url={llm_kwargs.get('base_url') or '(default)'}")
|
|
314
|
+
|
|
315
|
+
from abstractagent.agents.react import create_react_agent
|
|
316
|
+
|
|
317
|
+
run_stamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%SZ")
|
|
318
|
+
out_root = Path(os.getenv("AF_TOOL_EVAL_OUT_DIR", "test_results/tool_eval")).expanduser().absolute()
|
|
319
|
+
out_root.mkdir(parents=True, exist_ok=True)
|
|
320
|
+
run_dir = out_root / f"lmstudio_tool_eval_{run_stamp}"
|
|
321
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
|
322
|
+
|
|
323
|
+
aggregate: List[Dict[str, Any]] = []
|
|
324
|
+
|
|
325
|
+
for s in scenarios:
|
|
326
|
+
with tempfile.TemporaryDirectory(prefix="af_tool_eval_") as td:
|
|
327
|
+
root = Path(td)
|
|
328
|
+
ctx = s.build(root)
|
|
329
|
+
scenario_dir = run_dir / s.name
|
|
330
|
+
scenario_dir.mkdir(parents=True, exist_ok=True)
|
|
331
|
+
|
|
332
|
+
agent = create_react_agent(
|
|
333
|
+
provider=provider,
|
|
334
|
+
model=model,
|
|
335
|
+
llm_kwargs=llm_kwargs,
|
|
336
|
+
max_iterations=20,
|
|
337
|
+
max_tokens=8192,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
agent.start(s.prompt(ctx))
|
|
341
|
+
state = agent.run_to_completion()
|
|
342
|
+
|
|
343
|
+
traces = agent.get_node_traces()
|
|
344
|
+
events = _iter_tool_calls(traces)
|
|
345
|
+
summary = _summarize_tool_usage(events)
|
|
346
|
+
|
|
347
|
+
ok, msg = s.verify(ctx)
|
|
348
|
+
answer = ""
|
|
349
|
+
try:
|
|
350
|
+
answer = str((state.output or {}).get("answer") or "")
|
|
351
|
+
except Exception:
|
|
352
|
+
answer = ""
|
|
353
|
+
|
|
354
|
+
record = {
|
|
355
|
+
"scenario": s.name,
|
|
356
|
+
"status": getattr(state.status, "value", str(state.status)),
|
|
357
|
+
"verify_ok": ok,
|
|
358
|
+
"verify_msg": msg,
|
|
359
|
+
"tool_summary": summary,
|
|
360
|
+
"answer": answer,
|
|
361
|
+
}
|
|
362
|
+
aggregate.append(record)
|
|
363
|
+
|
|
364
|
+
_print_heading(f"Scenario: {s.name}")
|
|
365
|
+
print(f"Run status: {record['status']} | verify_ok={ok}")
|
|
366
|
+
print(f"Verify: {msg}")
|
|
367
|
+
print("Tool counts:", summary["tool_counts"])
|
|
368
|
+
if summary["failures"]:
|
|
369
|
+
print("Tool failures:", summary["failures"])
|
|
370
|
+
print(f"read_file(full)={summary['read_file_full']} | read_file(slice)={summary['read_file_sliced']}")
|
|
371
|
+
if summary["repeated_calls"]:
|
|
372
|
+
# Print only the top few repeated calls to keep output small.
|
|
373
|
+
top = list(summary["repeated_calls"].items())[:3]
|
|
374
|
+
print("Repeated identical calls (top 3):", {k: v for k, v in top})
|
|
375
|
+
|
|
376
|
+
# Persist scenario traces + final file snapshot for deep inspection.
|
|
377
|
+
try:
|
|
378
|
+
(scenario_dir / "node_traces.json").write_text(_json_key(traces), encoding="utf-8")
|
|
379
|
+
(scenario_dir / "summary.json").write_text(_json_key(record), encoding="utf-8")
|
|
380
|
+
file_path = Path(str(ctx.get("file_path") or "")).expanduser()
|
|
381
|
+
if file_path.exists() and file_path.is_file():
|
|
382
|
+
(scenario_dir / f"final_{file_path.name}").write_text(file_path.read_text(encoding="utf-8"), encoding="utf-8")
|
|
383
|
+
except Exception:
|
|
384
|
+
pass
|
|
385
|
+
|
|
386
|
+
# Aggregate: report issues that repeat across scenarios.
|
|
387
|
+
_print_heading("Aggregate (repetitive-only)")
|
|
388
|
+
repeats: Dict[str, int] = {}
|
|
389
|
+
for r in aggregate:
|
|
390
|
+
ts = r.get("tool_summary") or {}
|
|
391
|
+
if isinstance(ts, dict):
|
|
392
|
+
if (ts.get("read_file_full") or 0) > 0:
|
|
393
|
+
repeats["read_file_full_used"] = repeats.get("read_file_full_used", 0) + 1
|
|
394
|
+
tool_counts = ts.get("tool_counts") or {}
|
|
395
|
+
if isinstance(tool_counts, dict) and tool_counts.get("analyze_code", 0) == 0:
|
|
396
|
+
repeats["analyze_code_not_used"] = repeats.get("analyze_code_not_used", 0) + 1
|
|
397
|
+
failures = ts.get("failures") or {}
|
|
398
|
+
if isinstance(failures, dict) and failures.get("edit_file", 0) > 0:
|
|
399
|
+
repeats["edit_file_failures"] = repeats.get("edit_file_failures", 0) + 1
|
|
400
|
+
|
|
401
|
+
if not repeats:
|
|
402
|
+
print("No repetitive issues detected across scenarios (based on basic heuristics).")
|
|
403
|
+
else:
|
|
404
|
+
for k, v in sorted(repeats.items(), key=lambda kv: (-kv[1], kv[0])):
|
|
405
|
+
if v > 1:
|
|
406
|
+
print(f"- {k}: occurred in {v}/{len(aggregate)} scenarios")
|
|
407
|
+
|
|
408
|
+
print(f"\nSaved eval artifacts to: {run_dir}")
|
|
409
|
+
|
|
410
|
+
# Exit code: non-zero if any scenario verification failed.
|
|
411
|
+
failed = [r for r in aggregate if not r.get("verify_ok")]
|
|
412
|
+
if failed:
|
|
413
|
+
print(f"\nFailures: {len(failed)}/{len(aggregate)} scenarios did not meet verification checks.")
|
|
414
|
+
for r in failed:
|
|
415
|
+
print(f"- {r['scenario']}: status={r['status']} verify={r['verify_msg']}")
|
|
416
|
+
return 1
|
|
417
|
+
return 0
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def main() -> int:
|
|
421
|
+
scenarios = [_make_python_scenario(), _make_js_scenario()]
|
|
422
|
+
return _run_scenarios(scenarios=scenarios)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
if __name__ == "__main__":
|
|
426
|
+
raise SystemExit(main())
|
abstractagent/tools/__init__.py
CHANGED
|
@@ -7,6 +7,7 @@ Agent-specific tools (execute_python, self_improve) are defined locally.
|
|
|
7
7
|
# Import common tools from AbstractCore (canonical source)
|
|
8
8
|
from abstractcore.tools.common_tools import (
|
|
9
9
|
list_files,
|
|
10
|
+
analyze_code,
|
|
10
11
|
read_file,
|
|
11
12
|
search_files,
|
|
12
13
|
write_file,
|
|
@@ -24,6 +25,7 @@ from .self_improve import self_improve
|
|
|
24
25
|
ALL_TOOLS = [
|
|
25
26
|
# File operations (from abstractcore)
|
|
26
27
|
list_files,
|
|
28
|
+
analyze_code,
|
|
27
29
|
read_file,
|
|
28
30
|
search_files,
|
|
29
31
|
write_file,
|
|
@@ -41,6 +43,7 @@ ALL_TOOLS = [
|
|
|
41
43
|
__all__ = [
|
|
42
44
|
# File operations
|
|
43
45
|
"list_files",
|
|
46
|
+
"analyze_code",
|
|
44
47
|
"read_file",
|
|
45
48
|
"search_files",
|
|
46
49
|
"write_file",
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: abstractagent
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Agent implementations using AbstractRuntime and AbstractCore
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
7
|
License-File: LICENSE
|
|
8
|
-
Requires-Dist: abstractcore
|
|
8
|
+
Requires-Dist: abstractcore[tools]
|
|
9
9
|
Requires-Dist: abstractruntime
|
|
10
10
|
Provides-Extra: dev
|
|
11
11
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
@@ -18,7 +18,7 @@ Agent implementations using AbstractRuntime and AbstractCore.
|
|
|
18
18
|
## Features
|
|
19
19
|
|
|
20
20
|
- **ReAct Agent**: Reason-Act-Observe loop with tool calling
|
|
21
|
-
- **
|
|
21
|
+
- **Host UX in AbstractCode**: the interactive terminal shell lives in **AbstractCode**; AbstractAgent stays focused on agent patterns/workflows
|
|
22
22
|
- **Pause/Resume**: Durable agent state with interrupt/resume capability
|
|
23
23
|
- **Ask User**: Agent can ask questions with multiple choice + free text
|
|
24
24
|
- **Ledger Recording**: All tool calls recorded for auditability
|
|
@@ -102,11 +102,11 @@ state = agent.run_to_completion()
|
|
|
102
102
|
agent.clear_state("agent_state.json")
|
|
103
103
|
```
|
|
104
104
|
|
|
105
|
-
##
|
|
105
|
+
## Interactive Shell (AbstractCode)
|
|
106
106
|
|
|
107
107
|
```bash
|
|
108
|
-
#
|
|
109
|
-
|
|
108
|
+
# The interactive REPL moved to AbstractCode (host UX).
|
|
109
|
+
abstractcode --agent react --provider ollama --model qwen3:4b-instruct-2507-q4_K_M
|
|
110
110
|
```
|
|
111
111
|
|
|
112
112
|
## Architecture
|
|
@@ -127,8 +127,7 @@ AbstractAgent
|
|
|
127
127
|
|
|
128
128
|
## Available Tools
|
|
129
129
|
|
|
130
|
-
- `
|
|
131
|
-
- `
|
|
132
|
-
- `
|
|
133
|
-
-
|
|
134
|
-
- `ask_user(question, choices)` - Ask the user a question (built-in)
|
|
130
|
+
- Default tool callables are re-exported from AbstractCore in `abstractagent.tools` (file ops, web tools, `execute_command`), plus:
|
|
131
|
+
- `execute_python(code, timeout_s=...)`
|
|
132
|
+
- `self_improve(suggestion, ...)`
|
|
133
|
+
- The agent also exposes schema-only built-ins (`ask_user`, `recall_memory`, `inspect_vars`, `remember`, `compact_memory`) which are translated into **Runtime effects** by the workflow adapters (durable; no callable persistence).
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
abstractagent/__init__.py,sha256=-o1xKOyT3rhA67Iey2RCeZzU5SiFLB1mX4F2tAKH98Y,948
|
|
2
|
+
abstractagent/repl.py,sha256=6TWVxEYAAXLR5jjrc3FKmEy4Sm4xzdX5REQKx-lH35g,1089
|
|
3
|
+
abstractagent/adapters/__init__.py,sha256=k_7Lz8nOoMHmGeYrSbIMtLXIvmqJTzOo33g7v8C-qrM,284
|
|
4
|
+
abstractagent/adapters/codeact_runtime.py,sha256=VkmU1LO-5kr6GtKl4S7IeOorbOtE1GUtc6wGmbPzVEk,48774
|
|
5
|
+
abstractagent/adapters/memact_runtime.py,sha256=o_IuAIZTK_Nafp0hNwWBiPBSuyfRzU7SqL9rCuKCekg,29567
|
|
6
|
+
abstractagent/adapters/react_runtime.py,sha256=XEJlAoh3E9cgLfF8D2bDVgdp2Ya0l8RxxM6AGJ4gS4E,63871
|
|
7
|
+
abstractagent/agents/__init__.py,sha256=9U15lIVgGBZVsmDCKtz4AyTJFTI3nUq4ej8Ugk8lvhM,548
|
|
8
|
+
abstractagent/agents/base.py,sha256=HfGvkD24lBCEDbhwjDf4vtmrxti_fyrhMW3AjP12eu0,16745
|
|
9
|
+
abstractagent/agents/codeact.py,sha256=lvt_kgT3DuqOHeOPEyKTzm7H3mbBEk6tiMwMzGcQjIg,9680
|
|
10
|
+
abstractagent/agents/memact.py,sha256=tHemdUSC7vZUFFUznZ0FEJMOnjRZQU9oIx-FiR7FGN8,8648
|
|
11
|
+
abstractagent/agents/react.py,sha256=vxS2vna_gANvGhrEzAUOIfWJljbHeFAZt5tE846Lvp4,10107
|
|
12
|
+
abstractagent/logic/__init__.py,sha256=QeUxFEyaoi4ymPVEwno1mirBpMP77EtWJa-pTHakY2g,524
|
|
13
|
+
abstractagent/logic/builtins.py,sha256=I2IMrTIZmy0j9xOJkuOxgpu4fUMy6cwvdyy545a_HRI,8605
|
|
14
|
+
abstractagent/logic/codeact.py,sha256=BphuRhGL4T0QCoD6fcVDhgx3TQG5Vl4c-L6iKfeI-9o,6480
|
|
15
|
+
abstractagent/logic/memact.py,sha256=v6v5vV79nJvALpvl8vXJJhRY_j0GaM22t6sJAGN661A,4753
|
|
16
|
+
abstractagent/logic/react.py,sha256=SHU1YUE8uT_uuvq7CH2zY51rozAtHtVutS4NICJvep0,8154
|
|
17
|
+
abstractagent/logic/types.py,sha256=idLhxgBaN3LY7tDmwMNL9hgWCKM5P1BRNv5YYkXMy80,595
|
|
18
|
+
abstractagent/sandbox/__init__.py,sha256=Vay_BJzmfMcf-cAUoapDbckCyhHGQHFzObVR98x2j3g,186
|
|
19
|
+
abstractagent/sandbox/interface.py,sha256=DtL_RYaXgPojW-vUIgEPi_69sleSNQU-YCWrwZ_WSoI,462
|
|
20
|
+
abstractagent/sandbox/local.py,sha256=mFQzFgec0qgJ8VNbB0ZjeZi58BgrIE0frMFsasKe3iI,2143
|
|
21
|
+
abstractagent/scripts/__init__.py,sha256=paHuNvq6FzV1nVyGV-ixPW8K1rumoLpSvwcMIbP0y8U,130
|
|
22
|
+
abstractagent/scripts/lmstudio_tool_eval.py,sha256=FX_rvbLFY0KeT38i2e4p6htffzEiAR1mBXQhd6dK3zE,16568
|
|
23
|
+
abstractagent/tools/__init__.py,sha256=wDoWaQ2XqvLChMuXhQspXrdb-RyFWtBp6HQbUM3gGMc,1252
|
|
24
|
+
abstractagent/tools/code_execution.py,sha256=rQBJXfTGdCxVSr7SqTH17mQ9ylImNDcT5R7_bBIvygA,1417
|
|
25
|
+
abstractagent/tools/self_improve.py,sha256=yOtsa0iS5OxVPLpu3MN_fPxPTEMl3Qnm1GWYJHFsZe8,1732
|
|
26
|
+
abstractagent-0.3.0.dist-info/licenses/LICENSE,sha256=6rL4UIO5IdK59THf7fx0q6Hmxp5grSFi7-kWLcczseA,1083
|
|
27
|
+
abstractagent-0.3.0.dist-info/METADATA,sha256=mgdP9LfNgeQ5HOqUVLx7RijgHBWNUzc2kjtHrrRDI8A,3513
|
|
28
|
+
abstractagent-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
29
|
+
abstractagent-0.3.0.dist-info/entry_points.txt,sha256=tEaR0KtY-chcgRTd6ZVkvsqqaDpw2rvi-1RFktHGczU,56
|
|
30
|
+
abstractagent-0.3.0.dist-info/top_level.txt,sha256=cgtC3Vjz_piTAMmRkd73tbUxk2jPeG3IxMbo7JK3RTU,14
|
|
31
|
+
abstractagent-0.3.0.dist-info/RECORD,,
|