devagent-cli 3.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devagent/__init__.py +1 -0
- devagent/app/__init__.py +1 -0
- devagent/app/agent.py +717 -0
- devagent/app/llm.py +83 -0
- devagent/app/memory.py +309 -0
- devagent/app/patcher.py +83 -0
- devagent/app/planner.py +76 -0
- devagent/app/reviewer.py +65 -0
- devagent/app/sandbox.py +105 -0
- devagent/app/state.py +113 -0
- devagent/cli.py +282 -0
- devagent/tools/__init__.py +1 -0
- devagent/tools/benchmark_runner.py +184 -0
- devagent/tools/file_ops.py +52 -0
- devagent/tools/git_tools.py +91 -0
- devagent/tools/linter.py +55 -0
- devagent/tools/search.py +65 -0
- devagent/tools/semantic_search.py +60 -0
- devagent/tools/surgical_patcher.py +39 -0
- devagent/tools/test_runner.py +143 -0
- devagent/utils/__init__.py +1 -0
- devagent/utils/config.py +116 -0
- devagent/utils/logger.py +94 -0
- devagent/utils/metrics.py +130 -0
- devagent_cli-3.2.1.dist-info/METADATA +480 -0
- devagent_cli-3.2.1.dist-info/RECORD +30 -0
- devagent_cli-3.2.1.dist-info/WHEEL +5 -0
- devagent_cli-3.2.1.dist-info/entry_points.txt +2 -0
- devagent_cli-3.2.1.dist-info/licenses/LICENSE +21 -0
- devagent_cli-3.2.1.dist-info/top_level.txt +1 -0
devagent/app/agent.py
ADDED
|
@@ -0,0 +1,717 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core ReAct Agent — the main execution engine.
|
|
3
|
+
|
|
4
|
+
Implements the full production loop:
|
|
5
|
+
PLAN → THOUGHT → ACTION → OBSERVATION → GENERATE FIX →
|
|
6
|
+
SELF-REVIEW → PATCH → TEST → RETRY
|
|
7
|
+
|
|
8
|
+
Integrates:
|
|
9
|
+
- Planner layer
|
|
10
|
+
- Retrieval layer (semantic search + memory)
|
|
11
|
+
- Tool execution layer
|
|
12
|
+
- Self-review layer
|
|
13
|
+
- Patch engine
|
|
14
|
+
- Sandbox execution
|
|
15
|
+
- Metrics tracking
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
import os
|
|
22
|
+
import time
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from devagent.app.llm import query, query_with_context
|
|
26
|
+
from devagent.app.reviewer import review_code, revise_code
|
|
27
|
+
from devagent.app.state import AgentState
|
|
28
|
+
from devagent.app.planner import generate_plan
|
|
29
|
+
from devagent.app.patcher import generate_diff, apply_patch, format_diff_summary
|
|
30
|
+
from devagent.app.memory import WorkingMemory, chunk_project, SemanticIndex
|
|
31
|
+
from devagent.tools.search import search_code
|
|
32
|
+
from devagent.tools.file_ops import read_file, write_file, list_files
|
|
33
|
+
from devagent.tools.test_runner import run_tests
|
|
34
|
+
from devagent.tools.linter import lint_code
|
|
35
|
+
from devagent.tools.git_tools import git_diff, git_status
|
|
36
|
+
from devagent.tools.semantic_search import semantic_search, build_index, get_relevant_chunks
|
|
37
|
+
from devagent.tools.surgical_patcher import apply_surgical_patch
|
|
38
|
+
from devagent.utils.logger import AgentLogger
|
|
39
|
+
from devagent.utils.metrics import RunMetrics, Timer
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ── Prompt templates (kept SHORT for small models) ───────────────────────────
|
|
43
|
+
|
|
44
|
+
THOUGHT_PROMPT = """\
|
|
45
|
+
You are a coding agent. Your task:
|
|
46
|
+
{task}
|
|
47
|
+
|
|
48
|
+
Project root: {project_root}
|
|
49
|
+
Current step: {step}/{max_steps}
|
|
50
|
+
Previous attempts: {attempts}
|
|
51
|
+
|
|
52
|
+
{plan_context}
|
|
53
|
+
|
|
54
|
+
{history_summary}
|
|
55
|
+
|
|
56
|
+
{project_structure}
|
|
57
|
+
|
|
58
|
+
{retrieval_context}
|
|
59
|
+
|
|
60
|
+
Decide the SINGLE next action. Choose ONE:
|
|
61
|
+
- list_files: <relative_path>
|
|
62
|
+
- search_code: <keyword>
|
|
63
|
+
- semantic_search: <query>
|
|
64
|
+
- read_file: <relative_path>
|
|
65
|
+
- write_file: <relative_path>
|
|
66
|
+
- surgical_patch: <file> | <SEARCH> | <REPLACE>
|
|
67
|
+
- run_tests: <optional_path>
|
|
68
|
+
- lint_code: <relative_path>
|
|
69
|
+
- git_diff
|
|
70
|
+
|
|
71
|
+
STRATEGY:
|
|
72
|
+
1. START by using 'run_tests' to identify the exact failure location.
|
|
73
|
+
2. USE 'read_file' on the failing file before attempting a fix.
|
|
74
|
+
3. PREFER 'surgical_patch' for logic fixes. Format: file.py | <SEARCH> | <REPLACE>
|
|
75
|
+
4. USE 'write_file' ONLY for creating brand new files.
|
|
76
|
+
5. ALWAYS use full relative paths (e.g. benchmarks/math/calc.py).
|
|
77
|
+
6. If tests pass but system errors remain, verify the file path is correct.
|
|
78
|
+
|
|
79
|
+
Reply in this EXACT format (two lines only):
|
|
80
|
+
THOUGHT: <your reasoning>
|
|
81
|
+
ACTION: <tool_name>: <argument>
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
FIX_PROMPT = """\
|
|
85
|
+
TASK: {task}
|
|
86
|
+
|
|
87
|
+
FILE: {file_path}
|
|
88
|
+
CURRENT CODE:
|
|
89
|
+
{file_content}
|
|
90
|
+
|
|
91
|
+
{error_context}
|
|
92
|
+
|
|
93
|
+
{retrieval_context}
|
|
94
|
+
|
|
95
|
+
TASK: {task}
|
|
96
|
+
FILE: {file_path}
|
|
97
|
+
|
|
98
|
+
{error_context}
|
|
99
|
+
|
|
100
|
+
{retrieval_context}
|
|
101
|
+
|
|
102
|
+
CURRENT CODE:
|
|
103
|
+
{file_content}
|
|
104
|
+
|
|
105
|
+
Fix the bug. Output ONLY the COMPLETE Python code.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
EXTRACT_ACTION_PATTERN = re.compile(
|
|
109
|
+
r"ACTION:\s*(search_code|semantic_search|read_file|write_file|surgical_patch|run_tests|lint_code|list_files|git_diff)\s*:?\s*(.*)",
|
|
110
|
+
re.IGNORECASE,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class Agent:
|
|
115
|
+
"""ReAct agent with planner, retrieval, self-review, and sandbox support."""
|
|
116
|
+
|
|
117
|
+
def __init__(self, task: str, project_root: str = ".", max_steps: int = 5):
|
|
118
|
+
self.state = AgentState(
|
|
119
|
+
task=task,
|
|
120
|
+
project_root=os.path.abspath(project_root),
|
|
121
|
+
max_steps=max_steps,
|
|
122
|
+
working_root=os.path.abspath(project_root),
|
|
123
|
+
)
|
|
124
|
+
self.logger = AgentLogger(log_dir=os.path.join(project_root, "logs"))
|
|
125
|
+
self.metrics = RunMetrics(task=task)
|
|
126
|
+
self.memory = WorkingMemory()
|
|
127
|
+
self._semantic_index: SemanticIndex | None = None
|
|
128
|
+
|
|
129
|
+
# ── Public entry point ───────────────────────────────────────────────
|
|
130
|
+
|
|
131
|
+
def run(self) -> AgentState:
|
|
132
|
+
"""Execute the full agent loop. Returns final state."""
|
|
133
|
+
self.state.status = "running"
|
|
134
|
+
self.metrics.model = self._get_model()
|
|
135
|
+
self.logger.log_event("agent_start", {
|
|
136
|
+
"task": self.state.task,
|
|
137
|
+
"model": self.metrics.model,
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
# Phase 0: Initial scan + plan
|
|
141
|
+
self._initial_scan()
|
|
142
|
+
self._build_retrieval_index()
|
|
143
|
+
self._run_planner()
|
|
144
|
+
|
|
145
|
+
print("\n" + "=" * 60)
|
|
146
|
+
print(" DEVELOPER CODE INTELLIGENCE AGENT")
|
|
147
|
+
print(f" Task: {self.state.task}")
|
|
148
|
+
print(f" Project: {self.state.project_root}")
|
|
149
|
+
print(f" Max iterations: {self.state.max_steps}")
|
|
150
|
+
print("=" * 60)
|
|
151
|
+
|
|
152
|
+
if self.state.plan:
|
|
153
|
+
print(f"\n [PLAN] {self.state.plan.get('raw_plan', '')[:200]}")
|
|
154
|
+
|
|
155
|
+
for step in range(1, self.state.max_steps + 1):
|
|
156
|
+
self.state.current_step = step
|
|
157
|
+
self.state.attempts += 1
|
|
158
|
+
|
|
159
|
+
print(f"\n{'-' * 40}")
|
|
160
|
+
print(f" ITERATION {step}/{self.state.max_steps}")
|
|
161
|
+
print(f"{'-' * 40}")
|
|
162
|
+
|
|
163
|
+
with Timer() as t:
|
|
164
|
+
step_result = self._run_iteration(step)
|
|
165
|
+
|
|
166
|
+
self.metrics.record_step(
|
|
167
|
+
step=step,
|
|
168
|
+
action=self.state.last_action,
|
|
169
|
+
latency_s=t.elapsed,
|
|
170
|
+
prompt_chars=getattr(query, '_last_prompt_chars', 0),
|
|
171
|
+
response_chars=getattr(query, '_last_response_chars', 0),
|
|
172
|
+
status=step_result,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if step_result == "success":
|
|
176
|
+
self.state.status = "success"
|
|
177
|
+
self._calculate_confidence()
|
|
178
|
+
self.metrics.finalize()
|
|
179
|
+
self.logger.log_event("agent_complete", {
|
|
180
|
+
"status": "success", "steps": step,
|
|
181
|
+
**self.metrics.summary(),
|
|
182
|
+
})
|
|
183
|
+
print("\n[OK] AGENT COMPLETED SUCCESSFULLY")
|
|
184
|
+
return self.state
|
|
185
|
+
# Exhausted all iterations
|
|
186
|
+
self.state.status = "fail"
|
|
187
|
+
self._calculate_confidence()
|
|
188
|
+
self.metrics.finalize()
|
|
189
|
+
self.logger.log_event("agent_complete", {
|
|
190
|
+
"status": "fail", "steps": self.state.max_steps,
|
|
191
|
+
**self.metrics.summary(),
|
|
192
|
+
})
|
|
193
|
+
print("\n[FAIL] AGENT FAILED -- max iterations reached")
|
|
194
|
+
return self.state
|
|
195
|
+
|
|
196
|
+
def _calculate_confidence(self) -> float:
|
|
197
|
+
"""Calculate a trust score (0.0 - 1.0) for the final result."""
|
|
198
|
+
score = 0.0
|
|
199
|
+
reasons = []
|
|
200
|
+
|
|
201
|
+
# 1. Test Success (+0.50)
|
|
202
|
+
if self.state.test_exit_code == 0 and "collected 0 items" not in self.state.test_output:
|
|
203
|
+
score += 0.50
|
|
204
|
+
reasons.append("Tests passed successfully")
|
|
205
|
+
elif self.state.test_exit_code == 0:
|
|
206
|
+
reasons.append("No tests found to validate")
|
|
207
|
+
else:
|
|
208
|
+
reasons.append("Tests failed or not fully resolved")
|
|
209
|
+
|
|
210
|
+
# 2. Surgical Precision (+0.20)
|
|
211
|
+
# Check if any patch was a surgical_patch (which is more precise)
|
|
212
|
+
surgical_used = any("surgical_patch" in h.get("action", "") for h in self.state.history)
|
|
213
|
+
if surgical_used:
|
|
214
|
+
score += 0.20
|
|
215
|
+
reasons.append("Used precise surgical patching")
|
|
216
|
+
else:
|
|
217
|
+
reasons.append("Used full-file replacement (less precise)")
|
|
218
|
+
|
|
219
|
+
# 3. Self-Review Reliability (+0.15)
|
|
220
|
+
# If last review was approved on first try
|
|
221
|
+
if self.state.last_review and "APPROVED" in self.state.last_review.upper():
|
|
222
|
+
score += 0.15
|
|
223
|
+
reasons.append("Fix passed internal self-review")
|
|
224
|
+
|
|
225
|
+
# 4. Step Efficiency (+0.15)
|
|
226
|
+
# Fewer steps means higher confidence in the solution's clarity
|
|
227
|
+
if self.state.current_step <= self.state.max_steps // 2:
|
|
228
|
+
score += 0.15
|
|
229
|
+
reasons.append("Solution reached efficiently")
|
|
230
|
+
|
|
231
|
+
self.state.confidence_score = min(1.0, score)
|
|
232
|
+
self.state.confidence_reasons = reasons
|
|
233
|
+
return self.state.confidence_score
|
|
234
|
+
|
|
235
|
+
# ── Phase 0: Initialization ──────────────────────────────────────────
|
|
236
|
+
|
|
237
|
+
def _initial_scan(self) -> None:
|
|
238
|
+
"""Identify key files to give the agent immediate context."""
|
|
239
|
+
files = list_files(self.state.project_root, extension=".py")
|
|
240
|
+
if files:
|
|
241
|
+
rel_files = [os.path.relpath(f, self.state.project_root) for f in files]
|
|
242
|
+
self.state.history.append({
|
|
243
|
+
"step": 0, "thought": "Initializing project scan.",
|
|
244
|
+
"action": "system_scan",
|
|
245
|
+
"observation": f"Found {len(rel_files)} Python files: {', '.join(rel_files[:15])}",
|
|
246
|
+
"review": "", "test_result": "", "status": "info",
|
|
247
|
+
})
|
|
248
|
+
|
|
249
|
+
def _build_retrieval_index(self) -> None:
|
|
250
|
+
"""Build semantic retrieval index for the project."""
|
|
251
|
+
try:
|
|
252
|
+
build_index(self.state.project_root)
|
|
253
|
+
except Exception as exc:
|
|
254
|
+
print(f"[RETRIEVAL] Index build skipped: {exc}")
|
|
255
|
+
|
|
256
|
+
def _run_planner(self) -> None:
|
|
257
|
+
"""Run the planner to generate an action plan."""
|
|
258
|
+
files = list_files(self.state.project_root, extension=".py")
|
|
259
|
+
rel_files = [os.path.relpath(f, self.state.project_root) for f in files[:30]]
|
|
260
|
+
plan = generate_plan(self.state.task, rel_files)
|
|
261
|
+
self.state.plan = plan
|
|
262
|
+
|
|
263
|
+
# ── Single iteration ─────────────────────────────────────────────────
|
|
264
|
+
|
|
265
|
+
def _run_iteration(self, step: int) -> str:
|
|
266
|
+
"""Run one full ReAct iteration. Returns 'success' or 'continue'."""
|
|
267
|
+
|
|
268
|
+
# Retrieve relevant context for this step
|
|
269
|
+
self._retrieve_context()
|
|
270
|
+
|
|
271
|
+
# STEP 1 — THOUGHT + ACTION
|
|
272
|
+
thought, action_name, action_arg = self._think(step)
|
|
273
|
+
self.state.last_thought = thought
|
|
274
|
+
self.state.last_action = f"{action_name}: {action_arg}"
|
|
275
|
+
self.state.thoughts.append(thought)
|
|
276
|
+
self.state.actions.append(f"{action_name}: {action_arg}")
|
|
277
|
+
|
|
278
|
+
# STEP 2 — EXECUTE ACTION → OBSERVATION
|
|
279
|
+
observation = self._execute_action(action_name, action_arg)
|
|
280
|
+
self.state.last_observation = observation
|
|
281
|
+
self.state.observations.append(observation[:2000])
|
|
282
|
+
|
|
283
|
+
# STEP 3 — GENERATE FIX (if we have a file in context)
|
|
284
|
+
code_fix = ""
|
|
285
|
+
review_text = ""
|
|
286
|
+
patch_summary = ""
|
|
287
|
+
|
|
288
|
+
if self.state.current_file and action_name in ("read_file", "search_code", "semantic_search", "write_file"):
|
|
289
|
+
code_fix = self._generate_fix()
|
|
290
|
+
self.state.last_code_fix = code_fix
|
|
291
|
+
|
|
292
|
+
# STEP 4 — SELF-REVIEW
|
|
293
|
+
if code_fix:
|
|
294
|
+
code_fix, review_text = self._self_review(code_fix)
|
|
295
|
+
self.state.last_review = review_text
|
|
296
|
+
|
|
297
|
+
# STEP 5 — APPLY PATCH
|
|
298
|
+
original = self.state.current_file_content or ""
|
|
299
|
+
patch_result = apply_patch(self.state.current_file, original, code_fix)
|
|
300
|
+
patch_summary = format_diff_summary(patch_result)
|
|
301
|
+
self.state.patches_applied.append(patch_result)
|
|
302
|
+
observation += f"\n{patch_summary}"
|
|
303
|
+
|
|
304
|
+
# STEP 6 — RUN TESTS
|
|
305
|
+
test_exit, test_output, failing = run_tests(self.state.working_root or self.state.project_root)
|
|
306
|
+
self.state.test_exit_code = test_exit
|
|
307
|
+
self.state.test_output = test_output
|
|
308
|
+
self.state.failing_functions = failing
|
|
309
|
+
|
|
310
|
+
# Determine success
|
|
311
|
+
if test_exit == 0 and "collected 0 items" not in test_output:
|
|
312
|
+
status = "success"
|
|
313
|
+
else:
|
|
314
|
+
status = "fail"
|
|
315
|
+
|
|
316
|
+
# Log this step
|
|
317
|
+
self.logger.log_step(
|
|
318
|
+
step=step,
|
|
319
|
+
thought=thought,
|
|
320
|
+
action=f"{action_name}: {action_arg}",
|
|
321
|
+
observation=observation,
|
|
322
|
+
review=review_text,
|
|
323
|
+
test_result=test_output,
|
|
324
|
+
status=status,
|
|
325
|
+
latency=getattr(query, '_last_latency', 0),
|
|
326
|
+
model=self._get_model(),
|
|
327
|
+
patch_summary=patch_summary,
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# Store in history
|
|
331
|
+
self.state.history.append({
|
|
332
|
+
"step": step, "thought": thought,
|
|
333
|
+
"action": action_name, "action_arg": action_arg,
|
|
334
|
+
"observation": observation[:500],
|
|
335
|
+
"review": review_text, "test_status": status,
|
|
336
|
+
})
|
|
337
|
+
|
|
338
|
+
return status
|
|
339
|
+
|
|
340
|
+
# ── Retrieval ────────────────────────────────────────────────────────
|
|
341
|
+
|
|
342
|
+
def _retrieve_context(self) -> None:
|
|
343
|
+
"""Retrieve relevant code chunks for the current task, prioritizing failures."""
|
|
344
|
+
query_text = self.state.task
|
|
345
|
+
if self.state.failing_functions:
|
|
346
|
+
query_text += " " + " ".join(self.state.failing_functions)
|
|
347
|
+
|
|
348
|
+
chunks = get_relevant_chunks(query_text, top_k=5)
|
|
349
|
+
|
|
350
|
+
# Prioritize chunks that mention failing functions explicitly
|
|
351
|
+
if self.state.failing_functions:
|
|
352
|
+
prioritized = []
|
|
353
|
+
others = []
|
|
354
|
+
for chunk in chunks:
|
|
355
|
+
if any(func in chunk.content for func in self.state.failing_functions):
|
|
356
|
+
prioritized.append(chunk)
|
|
357
|
+
else:
|
|
358
|
+
others.append(chunk)
|
|
359
|
+
chunks = prioritized + others
|
|
360
|
+
|
|
361
|
+
self.state.retrieved_chunks = chunks[:3]
|
|
362
|
+
for chunk in chunks[:3]:
|
|
363
|
+
self.memory.add_chunk(chunk)
|
|
364
|
+
|
|
365
|
+
def _get_retrieval_context(self) -> str:
|
|
366
|
+
"""Format retrieved context for prompts."""
|
|
367
|
+
ctx = self.memory.get_context(max_chars=1500)
|
|
368
|
+
if ctx:
|
|
369
|
+
return f"RETRIEVED CONTEXT:\n{ctx}"
|
|
370
|
+
return ""
|
|
371
|
+
|
|
372
|
+
# ── THOUGHT phase ────────────────────────────────────────────────────
|
|
373
|
+
|
|
374
|
+
def _think(self, step: int) -> tuple[str, str, str]:
|
|
375
|
+
"""Ask the LLM to decide the next action."""
|
|
376
|
+
history_summary = self._format_history()
|
|
377
|
+
retrieval_context = self._get_retrieval_context()
|
|
378
|
+
plan_context = ""
|
|
379
|
+
if self.state.plan:
|
|
380
|
+
plan_context = f"CURRENT PLAN:\n{self.state.plan.get('raw_plan', 'No plan available')}"
|
|
381
|
+
|
|
382
|
+
# Get project structure summary
|
|
383
|
+
structure = ""
|
|
384
|
+
scan_step = next((h for h in self.state.history if h["action"] == "system_scan"), None)
|
|
385
|
+
if scan_step:
|
|
386
|
+
structure = f"FILE SYSTEM STRUCTURE:\n{scan_step['observation']}"
|
|
387
|
+
|
|
388
|
+
prompt = THOUGHT_PROMPT.format(
|
|
389
|
+
task=self.state.task,
|
|
390
|
+
project_root=self.state.project_root,
|
|
391
|
+
step=step,
|
|
392
|
+
max_steps=self.state.max_steps,
|
|
393
|
+
attempts=self.state.attempts,
|
|
394
|
+
history_summary=history_summary,
|
|
395
|
+
retrieval_context=retrieval_context,
|
|
396
|
+
plan_context=plan_context,
|
|
397
|
+
project_structure=structure,
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
response = query(prompt)
|
|
401
|
+
thought, action_name, action_arg = self._parse_thought_response(response)
|
|
402
|
+
|
|
403
|
+
# Fallback logic
|
|
404
|
+
if not action_name:
|
|
405
|
+
if not self.state.current_file:
|
|
406
|
+
file_hint = self._extract_filename_from_task()
|
|
407
|
+
if file_hint and step > 1:
|
|
408
|
+
action_name, action_arg = "read_file", file_hint
|
|
409
|
+
thought = f"Falling back to reading {file_hint} directly."
|
|
410
|
+
else:
|
|
411
|
+
action_name = "search_code"
|
|
412
|
+
action_arg = self._extract_search_keyword()
|
|
413
|
+
thought = "Falling back to keyword search."
|
|
414
|
+
else:
|
|
415
|
+
action_name, action_arg = "run_tests", ""
|
|
416
|
+
thought = "Falling back to run_tests."
|
|
417
|
+
|
|
418
|
+
action_arg = action_arg.strip('"').strip("'")
|
|
419
|
+
|
|
420
|
+
# Dedup: if same action tried before, try alternate
|
|
421
|
+
if self._already_tried(action_name, action_arg):
|
|
422
|
+
# If we keep trying run_tests/search_code, force read_file if possible
|
|
423
|
+
if action_name in ("run_tests", "search_code", "semantic_search"):
|
|
424
|
+
file_hint = self._extract_filename_from_task() or self._extract_file_from_plan()
|
|
425
|
+
if file_hint:
|
|
426
|
+
action_name, action_arg = "read_file", file_hint
|
|
427
|
+
thought = f"Action {action_name} already tried. Forcing read_file on {file_hint}."
|
|
428
|
+
elif self.state.current_file:
|
|
429
|
+
action_name, action_arg = "read_file", self.state.current_file
|
|
430
|
+
thought = f"Action {action_name} already tried. Reading current file again."
|
|
431
|
+
else:
|
|
432
|
+
action_name, action_arg = "list_files", "."
|
|
433
|
+
thought = f"Action {action_name} already tried. Listing files to find something new."
|
|
434
|
+
|
|
435
|
+
return thought, action_name, action_arg
|
|
436
|
+
|
|
437
|
+
def _parse_thought_response(self, response: str) -> tuple[str, str, str]:
|
|
438
|
+
"""Extract thought and action from LLM response."""
|
|
439
|
+
thought = ""
|
|
440
|
+
for line in response.splitlines():
|
|
441
|
+
if line.strip().upper().startswith("THOUGHT:"):
|
|
442
|
+
thought = line.split(":", 1)[1].strip()
|
|
443
|
+
break
|
|
444
|
+
|
|
445
|
+
match = EXTRACT_ACTION_PATTERN.search(response)
|
|
446
|
+
action_name = match.group(1).lower().strip() if match else ""
|
|
447
|
+
action_arg = match.group(2).strip() if match else ""
|
|
448
|
+
return thought, action_name, action_arg
|
|
449
|
+
|
|
450
|
+
# ── ACTION execution ─────────────────────────────────────────────────
|
|
451
|
+
|
|
452
|
+
def _execute_action(self, action_name: str, action_arg: str) -> str:
|
|
453
|
+
"""Execute a tool and return the observation string."""
|
|
454
|
+
print(f" [TOOL] Executing: {action_name}({action_arg})")
|
|
455
|
+
root = self.state.working_root or self.state.project_root
|
|
456
|
+
|
|
457
|
+
if action_name == "search_code":
|
|
458
|
+
result = search_code(action_arg, root)
|
|
459
|
+
self._extract_file_from_search(result)
|
|
460
|
+
return result
|
|
461
|
+
|
|
462
|
+
elif action_name == "semantic_search":
|
|
463
|
+
result = semantic_search(action_arg, root)
|
|
464
|
+
# Try to extract file from results
|
|
465
|
+
self._extract_file_from_search(result)
|
|
466
|
+
return result
|
|
467
|
+
|
|
468
|
+
elif action_name == "read_file":
|
|
469
|
+
path = self._resolve_path(action_arg)
|
|
470
|
+
content = read_file(path)
|
|
471
|
+
self.state.current_file = path
|
|
472
|
+
self.state.current_file_content = "" if content.startswith("[ERROR]") else content
|
|
473
|
+
return content
|
|
474
|
+
|
|
475
|
+
elif action_name == "write_file":
|
|
476
|
+
path = self._resolve_path(action_arg)
|
|
477
|
+
self.state.current_file = path
|
|
478
|
+
if not self.state.current_file_content:
|
|
479
|
+
self.state.current_file_content = ""
|
|
480
|
+
return f"[OK] Targeting {path} for writing."
|
|
481
|
+
|
|
482
|
+
elif action_name == "run_tests":
|
|
483
|
+
code, out, failing = run_tests(root, action_arg)
|
|
484
|
+
self.state.test_output = out
|
|
485
|
+
self.state.failing_functions = failing
|
|
486
|
+
|
|
487
|
+
# Detect stagnation
|
|
488
|
+
if out == self.state.last_test_output and out:
|
|
489
|
+
self.state.stagnant_steps += 1
|
|
490
|
+
else:
|
|
491
|
+
self.state.stagnant_steps = 0
|
|
492
|
+
self.state.last_test_output = out
|
|
493
|
+
|
|
494
|
+
return out
|
|
495
|
+
|
|
496
|
+
elif action_name == "surgical_patch":
|
|
497
|
+
# Expected arg: "file.py | SEARCH_TEXT | REPLACE_TEXT"
|
|
498
|
+
parts = action_arg.split("|")
|
|
499
|
+
if len(parts) < 3:
|
|
500
|
+
return "[ERROR] surgical_patch requires format: file | SEARCH | REPLACE"
|
|
501
|
+
path = self._resolve_path(parts[0].strip())
|
|
502
|
+
search = parts[1].strip()
|
|
503
|
+
replace = parts[2].strip()
|
|
504
|
+
return apply_surgical_patch(path, search, replace)
|
|
505
|
+
|
|
506
|
+
elif action_name == "lint_code":
|
|
507
|
+
path = self._resolve_path(action_arg)
|
|
508
|
+
_, output = lint_code(path)
|
|
509
|
+
return output
|
|
510
|
+
|
|
511
|
+
elif action_name == "list_files":
|
|
512
|
+
path = self._resolve_path(action_arg)
|
|
513
|
+
files = list_files(path)
|
|
514
|
+
if not files:
|
|
515
|
+
return f"[INFO] No .py files found in {action_arg}"
|
|
516
|
+
rel_files = [os.path.relpath(f, root) for f in files]
|
|
517
|
+
return f"Found {len(rel_files)} files: " + ", ".join(rel_files[:20])
|
|
518
|
+
|
|
519
|
+
elif action_name == "git_diff":
|
|
520
|
+
return git_diff(root)
|
|
521
|
+
|
|
522
|
+
else:
|
|
523
|
+
return f"[ERROR] Unknown action: {action_name}"
|
|
524
|
+
|
|
525
|
+
# ── FIX generation ───────────────────────────────────────────────────
|
|
526
|
+
|
|
527
|
+
def _generate_fix(self) -> str:
|
|
528
|
+
"""Ask the LLM to generate a code fix for the current file."""
|
|
529
|
+
error_context = ""
|
|
530
|
+
if self.state.test_output and self.state.test_exit_code != 0:
|
|
531
|
+
error_context = f"TEST ERRORS (FIX THESE):\n{self.state.test_output[:2000]}"
|
|
532
|
+
|
|
533
|
+
retrieval_context = self._get_retrieval_context()
|
|
534
|
+
|
|
535
|
+
prompt = FIX_PROMPT.format(
|
|
536
|
+
task=self.state.task,
|
|
537
|
+
file_path=self.state.current_file,
|
|
538
|
+
file_content=self.state.current_file_content[:2000],
|
|
539
|
+
error_context=error_context,
|
|
540
|
+
retrieval_context=retrieval_context,
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
fix = query(prompt)
|
|
544
|
+
return self._strip_code_fences(fix)
|
|
545
|
+
|
|
546
|
+
# ── SELF-REVIEW loop ─────────────────────────────────────────────────
|
|
547
|
+
|
|
548
|
+
def _self_review(self, code_fix: str, max_revisions: int = 2) -> tuple[str, str]:
|
|
549
|
+
"""Self-review loop: critique → revise until APPROVED."""
|
|
550
|
+
for revision in range(max_revisions):
|
|
551
|
+
approved, review_text = review_code(
|
|
552
|
+
self.state.current_file_content, code_fix, self.state.task,
|
|
553
|
+
)
|
|
554
|
+
print(f" [REVIEW] #{revision + 1}: {review_text[:100]}")
|
|
555
|
+
|
|
556
|
+
if approved:
|
|
557
|
+
return code_fix, review_text
|
|
558
|
+
|
|
559
|
+
print(f" [REVISE] Revising code (attempt {revision + 1})...")
|
|
560
|
+
code_fix = revise_code(code_fix, review_text, self.state.task)
|
|
561
|
+
code_fix = self._strip_code_fences(code_fix)
|
|
562
|
+
|
|
563
|
+
return code_fix, f"Auto-approved after {max_revisions} revisions"
|
|
564
|
+
|
|
565
|
+
# ── Helpers ───────────────────────────────────────────────────────────
|
|
566
|
+
|
|
567
|
+
def _format_history(self) -> str:
|
|
568
|
+
"""Format recent history for the prompt."""
|
|
569
|
+
if not self.state.history:
|
|
570
|
+
return "No previous actions."
|
|
571
|
+
|
|
572
|
+
lines = ["PROJECT CONTEXT:"]
|
|
573
|
+
scan_step = next((h for h in self.state.history if h["action"] == "system_scan"), None)
|
|
574
|
+
if scan_step:
|
|
575
|
+
lines.append(f" {scan_step['observation']}")
|
|
576
|
+
|
|
577
|
+
lines.append("\nPREVIOUS ACTIONS:")
|
|
578
|
+
real_steps = [h for h in self.state.history if h["action"] != "system_scan"]
|
|
579
|
+
for h in real_steps[-3:]:
|
|
580
|
+
lines.append(
|
|
581
|
+
f" Step {h['step']}: {h['action']}({h.get('action_arg', '')}) "
|
|
582
|
+
f"-> {h.get('status', h.get('test_status', 'info'))}"
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
if self.state.failing_functions:
|
|
586
|
+
lines.append(f"\nFAILING FUNCTIONS DETECTED: {', '.join(self.state.failing_functions)}")
|
|
587
|
+
lines.append("Look for these functions in the codebase. They are likely where the bug is.")
|
|
588
|
+
|
|
589
|
+
if self.state.stagnant_steps >= 1:
|
|
590
|
+
lines.append("\n[CRITICAL PIVOT] YOUR PREVIOUS FIXES ARE NOT WORKING.")
|
|
591
|
+
if self.state.failing_functions:
|
|
592
|
+
lines.append(f"STOP focusing on the original task name. FOCUS EXCLUSIVELY ON THESE FAILURES: {', '.join(self.state.failing_functions)}")
|
|
593
|
+
lines.append("Read the code of the FAILING functions. The bug is there, not where you are currently looking.")
|
|
594
|
+
|
|
595
|
+
if self.state.test_output:
|
|
596
|
+
lines.append(f"\nLAST TEST ERROR:\n{self.state.test_output[:1500]}")
|
|
597
|
+
|
|
598
|
+
return "\n".join(lines)
|
|
599
|
+
|
|
600
|
+
def _extract_file_from_search(self, search_output: str) -> None:
|
|
601
|
+
"""Try to find a file path in search output and set it as current."""
|
|
602
|
+
root = self.state.working_root or self.state.project_root
|
|
603
|
+
for line in search_output.splitlines():
|
|
604
|
+
if ":" in line:
|
|
605
|
+
candidate = line.split(":")[0].strip()
|
|
606
|
+
if candidate.endswith(".py"):
|
|
607
|
+
full = os.path.join(root, candidate) if not os.path.isabs(candidate) else candidate
|
|
608
|
+
if os.path.isfile(full):
|
|
609
|
+
self.state.current_file = full
|
|
610
|
+
self.state.current_file_content = read_file(full)
|
|
611
|
+
return
|
|
612
|
+
|
|
613
|
+
def _resolve_path(self, path: str) -> str:
|
|
614
|
+
"""Resolve a relative path against the working root.
|
|
615
|
+
|
|
616
|
+
Includes 'Path Anchoring' — if the file isn't at root, search for it
|
|
617
|
+
in subdirectories to prevent hallucination errors.
|
|
618
|
+
"""
|
|
619
|
+
root = self.state.working_root or self.state.project_root
|
|
620
|
+
if os.path.isabs(path):
|
|
621
|
+
return path
|
|
622
|
+
|
|
623
|
+
target = os.path.join(root, path)
|
|
624
|
+
if os.path.exists(target):
|
|
625
|
+
return target
|
|
626
|
+
|
|
627
|
+
# Path Anchoring: Look for the file elsewhere
|
|
628
|
+
filename = os.path.basename(path)
|
|
629
|
+
for r, _, files in os.walk(root):
|
|
630
|
+
if filename in files:
|
|
631
|
+
found_path = os.path.join(r, filename)
|
|
632
|
+
print(f" [PATH] Auto-anchored '{path}' to '{os.path.relpath(found_path, root)}'")
|
|
633
|
+
return found_path
|
|
634
|
+
|
|
635
|
+
return target
|
|
636
|
+
|
|
637
|
+
@staticmethod
|
|
638
|
+
def _strip_code_fences(text: str) -> str:
|
|
639
|
+
"""Extract Python code from LLM response, stripping markdown and commentary."""
|
|
640
|
+
if not text:
|
|
641
|
+
return ""
|
|
642
|
+
|
|
643
|
+
# 1. Look for ```python ... ```
|
|
644
|
+
match = re.search(r"```python\s+(.*?)\s+```", text, re.DOTALL | re.IGNORECASE)
|
|
645
|
+
if match:
|
|
646
|
+
return match.group(1).strip()
|
|
647
|
+
|
|
648
|
+
# 2. Look for ``` ... ```
|
|
649
|
+
match = re.search(r"```\s+(.*?)\s+```", text, re.DOTALL)
|
|
650
|
+
if match:
|
|
651
|
+
return match.group(1).strip()
|
|
652
|
+
|
|
653
|
+
# 3. Aggressive extraction: find first import/def/class and last line of code
|
|
654
|
+
lines = text.splitlines()
|
|
655
|
+
start_idx = 0
|
|
656
|
+
found_start = False
|
|
657
|
+
for i, line in enumerate(lines):
|
|
658
|
+
if re.match(r"^\s*(import|from|def|class|#|@)", line):
|
|
659
|
+
start_idx = i
|
|
660
|
+
found_start = True
|
|
661
|
+
break
|
|
662
|
+
|
|
663
|
+
if not found_start:
|
|
664
|
+
return text.strip()
|
|
665
|
+
|
|
666
|
+
# Strip trailing non-code (explanations)
|
|
667
|
+
end_idx = len(lines)
|
|
668
|
+
for i in range(len(lines) - 1, start_idx, -1):
|
|
669
|
+
line = lines[i].strip()
|
|
670
|
+
# If line ends with typical code markers, it's likely code
|
|
671
|
+
if line and (line.endswith(":") or line.endswith(")") or line.endswith("]") or
|
|
672
|
+
line.endswith("}") or line.endswith("'") or line.endswith('"') or
|
|
673
|
+
line in {"True", "False", "None"} or re.match(r"^\s*#", line)):
|
|
674
|
+
end_idx = i + 1
|
|
675
|
+
break
|
|
676
|
+
|
|
677
|
+
return "\n".join(lines[start_idx:end_idx]).strip()
|
|
678
|
+
|
|
679
|
+
def _extract_filename_from_task(self) -> str:
|
|
680
|
+
"""Extract a .py filename mentioned in the task description."""
|
|
681
|
+
match = re.search(r"(\w+\.py)", self.state.task)
|
|
682
|
+
return match.group(1) if match else ""
|
|
683
|
+
|
|
684
|
+
def _extract_search_keyword(self) -> str:
|
|
685
|
+
"""Extract a short keyword from the task for searching."""
|
|
686
|
+
task = self.state.task.lower()
|
|
687
|
+
for keyword in ["divide", "add", "subtract", "multiply", "error",
|
|
688
|
+
"bug", "fix", "test", "function", "class", "import",
|
|
689
|
+
"validate", "parse", "process", "batch", "register"]:
|
|
690
|
+
if keyword in task:
|
|
691
|
+
return keyword
|
|
692
|
+
words = [w for w in task.split() if len(w) > 3 and w not in
|
|
693
|
+
("the", "that", "this", "with", "from", "should", "when")]
|
|
694
|
+
return words[0] if words else "def"
|
|
695
|
+
|
|
696
|
+
def _extract_file_from_plan(self) -> str:
|
|
697
|
+
"""Extract the first likely file from the plan."""
|
|
698
|
+
if self.state.plan and self.state.plan.get("likely_files"):
|
|
699
|
+
files = self.state.plan.get("likely_files")
|
|
700
|
+
if isinstance(files, list) and files:
|
|
701
|
+
return files[0]
|
|
702
|
+
if isinstance(files, str):
|
|
703
|
+
return files.split(",")[0].strip()
|
|
704
|
+
return ""
|
|
705
|
+
|
|
706
|
+
def _already_tried(self, action_name: str, action_arg: str) -> bool:
|
|
707
|
+
"""Check if we already tried this exact action in the last 2 steps."""
|
|
708
|
+
# Look back at last 2 steps to avoid immediate loops
|
|
709
|
+
for h in self.state.history[-2:]:
|
|
710
|
+
if h.get("action") == action_name and h.get("action_arg") == action_arg:
|
|
711
|
+
return True
|
|
712
|
+
return False
|
|
713
|
+
|
|
714
|
+
def _get_model(self) -> str:
|
|
715
|
+
"""Get the current model name."""
|
|
716
|
+
from devagent.app.llm import MODEL
|
|
717
|
+
return MODEL
|