devagent-cli 3.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
devagent/app/agent.py ADDED
@@ -0,0 +1,717 @@
1
+ """
2
+ Core ReAct Agent — the main execution engine.
3
+
4
+ Implements the full production loop:
5
+ PLAN → THOUGHT → ACTION → OBSERVATION → GENERATE FIX →
6
+ SELF-REVIEW → PATCH → TEST → RETRY
7
+
8
+ Integrates:
9
+ - Planner layer
10
+ - Retrieval layer (semantic search + memory)
11
+ - Tool execution layer
12
+ - Self-review layer
13
+ - Patch engine
14
+ - Sandbox execution
15
+ - Metrics tracking
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ import os
22
+ import time
23
+ from typing import Any
24
+
25
+ from devagent.app.llm import query, query_with_context
26
+ from devagent.app.reviewer import review_code, revise_code
27
+ from devagent.app.state import AgentState
28
+ from devagent.app.planner import generate_plan
29
+ from devagent.app.patcher import generate_diff, apply_patch, format_diff_summary
30
+ from devagent.app.memory import WorkingMemory, chunk_project, SemanticIndex
31
+ from devagent.tools.search import search_code
32
+ from devagent.tools.file_ops import read_file, write_file, list_files
33
+ from devagent.tools.test_runner import run_tests
34
+ from devagent.tools.linter import lint_code
35
+ from devagent.tools.git_tools import git_diff, git_status
36
+ from devagent.tools.semantic_search import semantic_search, build_index, get_relevant_chunks
37
+ from devagent.tools.surgical_patcher import apply_surgical_patch
38
+ from devagent.utils.logger import AgentLogger
39
+ from devagent.utils.metrics import RunMetrics, Timer
40
+
41
+
42
+ # ── Prompt templates (kept SHORT for small models) ───────────────────────────
43
+
44
+ THOUGHT_PROMPT = """\
45
+ You are a coding agent. Your task:
46
+ {task}
47
+
48
+ Project root: {project_root}
49
+ Current step: {step}/{max_steps}
50
+ Previous attempts: {attempts}
51
+
52
+ {plan_context}
53
+
54
+ {history_summary}
55
+
56
+ {project_structure}
57
+
58
+ {retrieval_context}
59
+
60
+ Decide the SINGLE next action. Choose ONE:
61
+ - list_files: <relative_path>
62
+ - search_code: <keyword>
63
+ - semantic_search: <query>
64
+ - read_file: <relative_path>
65
+ - write_file: <relative_path>
66
+ - surgical_patch: <file> | <SEARCH> | <REPLACE>
67
+ - run_tests: <optional_path>
68
+ - lint_code: <relative_path>
69
+ - git_diff
70
+
71
+ STRATEGY:
72
+ 1. START by using 'run_tests' to identify the exact failure location.
73
+ 2. USE 'read_file' on the failing file before attempting a fix.
74
+ 3. PREFER 'surgical_patch' for logic fixes. Format: file.py | <SEARCH> | <REPLACE>
75
+ 4. USE 'write_file' ONLY for creating brand new files.
76
+ 5. ALWAYS use full relative paths (e.g. benchmarks/math/calc.py).
77
+ 6. If tests pass but system errors remain, verify the file path is correct.
78
+
79
+ Reply in this EXACT format (two lines only):
80
+ THOUGHT: <your reasoning>
81
+ ACTION: <tool_name>: <argument>
82
+ """
83
+
84
+ FIX_PROMPT = """\
85
+ TASK: {task}
86
+
87
+ FILE: {file_path}
88
+ CURRENT CODE:
89
+ {file_content}
90
+
91
+ {error_context}
92
+
93
+ {retrieval_context}
94
+
95
+ TASK: {task}
96
+ FILE: {file_path}
97
+
98
+ {error_context}
99
+
100
+ {retrieval_context}
101
+
102
+ CURRENT CODE:
103
+ {file_content}
104
+
105
+ Fix the bug. Output ONLY the COMPLETE Python code.
106
+ """
107
+
108
+ EXTRACT_ACTION_PATTERN = re.compile(
109
+ r"ACTION:\s*(search_code|semantic_search|read_file|write_file|surgical_patch|run_tests|lint_code|list_files|git_diff)\s*:?\s*(.*)",
110
+ re.IGNORECASE,
111
+ )
112
+
113
+
114
+ class Agent:
115
+ """ReAct agent with planner, retrieval, self-review, and sandbox support."""
116
+
117
+ def __init__(self, task: str, project_root: str = ".", max_steps: int = 5):
118
+ self.state = AgentState(
119
+ task=task,
120
+ project_root=os.path.abspath(project_root),
121
+ max_steps=max_steps,
122
+ working_root=os.path.abspath(project_root),
123
+ )
124
+ self.logger = AgentLogger(log_dir=os.path.join(project_root, "logs"))
125
+ self.metrics = RunMetrics(task=task)
126
+ self.memory = WorkingMemory()
127
+ self._semantic_index: SemanticIndex | None = None
128
+
129
+ # ── Public entry point ───────────────────────────────────────────────
130
+
131
+ def run(self) -> AgentState:
132
+ """Execute the full agent loop. Returns final state."""
133
+ self.state.status = "running"
134
+ self.metrics.model = self._get_model()
135
+ self.logger.log_event("agent_start", {
136
+ "task": self.state.task,
137
+ "model": self.metrics.model,
138
+ })
139
+
140
+ # Phase 0: Initial scan + plan
141
+ self._initial_scan()
142
+ self._build_retrieval_index()
143
+ self._run_planner()
144
+
145
+ print("\n" + "=" * 60)
146
+ print(" DEVELOPER CODE INTELLIGENCE AGENT")
147
+ print(f" Task: {self.state.task}")
148
+ print(f" Project: {self.state.project_root}")
149
+ print(f" Max iterations: {self.state.max_steps}")
150
+ print("=" * 60)
151
+
152
+ if self.state.plan:
153
+ print(f"\n [PLAN] {self.state.plan.get('raw_plan', '')[:200]}")
154
+
155
+ for step in range(1, self.state.max_steps + 1):
156
+ self.state.current_step = step
157
+ self.state.attempts += 1
158
+
159
+ print(f"\n{'-' * 40}")
160
+ print(f" ITERATION {step}/{self.state.max_steps}")
161
+ print(f"{'-' * 40}")
162
+
163
+ with Timer() as t:
164
+ step_result = self._run_iteration(step)
165
+
166
+ self.metrics.record_step(
167
+ step=step,
168
+ action=self.state.last_action,
169
+ latency_s=t.elapsed,
170
+ prompt_chars=getattr(query, '_last_prompt_chars', 0),
171
+ response_chars=getattr(query, '_last_response_chars', 0),
172
+ status=step_result,
173
+ )
174
+
175
+ if step_result == "success":
176
+ self.state.status = "success"
177
+ self._calculate_confidence()
178
+ self.metrics.finalize()
179
+ self.logger.log_event("agent_complete", {
180
+ "status": "success", "steps": step,
181
+ **self.metrics.summary(),
182
+ })
183
+ print("\n[OK] AGENT COMPLETED SUCCESSFULLY")
184
+ return self.state
185
+ # Exhausted all iterations
186
+ self.state.status = "fail"
187
+ self._calculate_confidence()
188
+ self.metrics.finalize()
189
+ self.logger.log_event("agent_complete", {
190
+ "status": "fail", "steps": self.state.max_steps,
191
+ **self.metrics.summary(),
192
+ })
193
+ print("\n[FAIL] AGENT FAILED -- max iterations reached")
194
+ return self.state
195
+
196
+ def _calculate_confidence(self) -> float:
197
+ """Calculate a trust score (0.0 - 1.0) for the final result."""
198
+ score = 0.0
199
+ reasons = []
200
+
201
+ # 1. Test Success (+0.50)
202
+ if self.state.test_exit_code == 0 and "collected 0 items" not in self.state.test_output:
203
+ score += 0.50
204
+ reasons.append("Tests passed successfully")
205
+ elif self.state.test_exit_code == 0:
206
+ reasons.append("No tests found to validate")
207
+ else:
208
+ reasons.append("Tests failed or not fully resolved")
209
+
210
+ # 2. Surgical Precision (+0.20)
211
+ # Check if any patch was a surgical_patch (which is more precise)
212
+ surgical_used = any("surgical_patch" in h.get("action", "") for h in self.state.history)
213
+ if surgical_used:
214
+ score += 0.20
215
+ reasons.append("Used precise surgical patching")
216
+ else:
217
+ reasons.append("Used full-file replacement (less precise)")
218
+
219
+ # 3. Self-Review Reliability (+0.15)
220
+ # If last review was approved on first try
221
+ if self.state.last_review and "APPROVED" in self.state.last_review.upper():
222
+ score += 0.15
223
+ reasons.append("Fix passed internal self-review")
224
+
225
+ # 4. Step Efficiency (+0.15)
226
+ # Fewer steps means higher confidence in the solution's clarity
227
+ if self.state.current_step <= self.state.max_steps // 2:
228
+ score += 0.15
229
+ reasons.append("Solution reached efficiently")
230
+
231
+ self.state.confidence_score = min(1.0, score)
232
+ self.state.confidence_reasons = reasons
233
+ return self.state.confidence_score
234
+
235
+ # ── Phase 0: Initialization ──────────────────────────────────────────
236
+
237
+ def _initial_scan(self) -> None:
238
+ """Identify key files to give the agent immediate context."""
239
+ files = list_files(self.state.project_root, extension=".py")
240
+ if files:
241
+ rel_files = [os.path.relpath(f, self.state.project_root) for f in files]
242
+ self.state.history.append({
243
+ "step": 0, "thought": "Initializing project scan.",
244
+ "action": "system_scan",
245
+ "observation": f"Found {len(rel_files)} Python files: {', '.join(rel_files[:15])}",
246
+ "review": "", "test_result": "", "status": "info",
247
+ })
248
+
249
+ def _build_retrieval_index(self) -> None:
250
+ """Build semantic retrieval index for the project."""
251
+ try:
252
+ build_index(self.state.project_root)
253
+ except Exception as exc:
254
+ print(f"[RETRIEVAL] Index build skipped: {exc}")
255
+
256
+ def _run_planner(self) -> None:
257
+ """Run the planner to generate an action plan."""
258
+ files = list_files(self.state.project_root, extension=".py")
259
+ rel_files = [os.path.relpath(f, self.state.project_root) for f in files[:30]]
260
+ plan = generate_plan(self.state.task, rel_files)
261
+ self.state.plan = plan
262
+
263
+ # ── Single iteration ─────────────────────────────────────────────────
264
+
265
+ def _run_iteration(self, step: int) -> str:
266
+ """Run one full ReAct iteration. Returns 'success' or 'continue'."""
267
+
268
+ # Retrieve relevant context for this step
269
+ self._retrieve_context()
270
+
271
+ # STEP 1 — THOUGHT + ACTION
272
+ thought, action_name, action_arg = self._think(step)
273
+ self.state.last_thought = thought
274
+ self.state.last_action = f"{action_name}: {action_arg}"
275
+ self.state.thoughts.append(thought)
276
+ self.state.actions.append(f"{action_name}: {action_arg}")
277
+
278
+ # STEP 2 — EXECUTE ACTION → OBSERVATION
279
+ observation = self._execute_action(action_name, action_arg)
280
+ self.state.last_observation = observation
281
+ self.state.observations.append(observation[:2000])
282
+
283
+ # STEP 3 — GENERATE FIX (if we have a file in context)
284
+ code_fix = ""
285
+ review_text = ""
286
+ patch_summary = ""
287
+
288
+ if self.state.current_file and action_name in ("read_file", "search_code", "semantic_search", "write_file"):
289
+ code_fix = self._generate_fix()
290
+ self.state.last_code_fix = code_fix
291
+
292
+ # STEP 4 — SELF-REVIEW
293
+ if code_fix:
294
+ code_fix, review_text = self._self_review(code_fix)
295
+ self.state.last_review = review_text
296
+
297
+ # STEP 5 — APPLY PATCH
298
+ original = self.state.current_file_content or ""
299
+ patch_result = apply_patch(self.state.current_file, original, code_fix)
300
+ patch_summary = format_diff_summary(patch_result)
301
+ self.state.patches_applied.append(patch_result)
302
+ observation += f"\n{patch_summary}"
303
+
304
+ # STEP 6 — RUN TESTS
305
+ test_exit, test_output, failing = run_tests(self.state.working_root or self.state.project_root)
306
+ self.state.test_exit_code = test_exit
307
+ self.state.test_output = test_output
308
+ self.state.failing_functions = failing
309
+
310
+ # Determine success
311
+ if test_exit == 0 and "collected 0 items" not in test_output:
312
+ status = "success"
313
+ else:
314
+ status = "fail"
315
+
316
+ # Log this step
317
+ self.logger.log_step(
318
+ step=step,
319
+ thought=thought,
320
+ action=f"{action_name}: {action_arg}",
321
+ observation=observation,
322
+ review=review_text,
323
+ test_result=test_output,
324
+ status=status,
325
+ latency=getattr(query, '_last_latency', 0),
326
+ model=self._get_model(),
327
+ patch_summary=patch_summary,
328
+ )
329
+
330
+ # Store in history
331
+ self.state.history.append({
332
+ "step": step, "thought": thought,
333
+ "action": action_name, "action_arg": action_arg,
334
+ "observation": observation[:500],
335
+ "review": review_text, "test_status": status,
336
+ })
337
+
338
+ return status
339
+
340
+ # ── Retrieval ────────────────────────────────────────────────────────
341
+
342
+ def _retrieve_context(self) -> None:
343
+ """Retrieve relevant code chunks for the current task, prioritizing failures."""
344
+ query_text = self.state.task
345
+ if self.state.failing_functions:
346
+ query_text += " " + " ".join(self.state.failing_functions)
347
+
348
+ chunks = get_relevant_chunks(query_text, top_k=5)
349
+
350
+ # Prioritize chunks that mention failing functions explicitly
351
+ if self.state.failing_functions:
352
+ prioritized = []
353
+ others = []
354
+ for chunk in chunks:
355
+ if any(func in chunk.content for func in self.state.failing_functions):
356
+ prioritized.append(chunk)
357
+ else:
358
+ others.append(chunk)
359
+ chunks = prioritized + others
360
+
361
+ self.state.retrieved_chunks = chunks[:3]
362
+ for chunk in chunks[:3]:
363
+ self.memory.add_chunk(chunk)
364
+
365
+ def _get_retrieval_context(self) -> str:
366
+ """Format retrieved context for prompts."""
367
+ ctx = self.memory.get_context(max_chars=1500)
368
+ if ctx:
369
+ return f"RETRIEVED CONTEXT:\n{ctx}"
370
+ return ""
371
+
372
+ # ── THOUGHT phase ────────────────────────────────────────────────────
373
+
374
+ def _think(self, step: int) -> tuple[str, str, str]:
375
+ """Ask the LLM to decide the next action."""
376
+ history_summary = self._format_history()
377
+ retrieval_context = self._get_retrieval_context()
378
+ plan_context = ""
379
+ if self.state.plan:
380
+ plan_context = f"CURRENT PLAN:\n{self.state.plan.get('raw_plan', 'No plan available')}"
381
+
382
+ # Get project structure summary
383
+ structure = ""
384
+ scan_step = next((h for h in self.state.history if h["action"] == "system_scan"), None)
385
+ if scan_step:
386
+ structure = f"FILE SYSTEM STRUCTURE:\n{scan_step['observation']}"
387
+
388
+ prompt = THOUGHT_PROMPT.format(
389
+ task=self.state.task,
390
+ project_root=self.state.project_root,
391
+ step=step,
392
+ max_steps=self.state.max_steps,
393
+ attempts=self.state.attempts,
394
+ history_summary=history_summary,
395
+ retrieval_context=retrieval_context,
396
+ plan_context=plan_context,
397
+ project_structure=structure,
398
+ )
399
+
400
+ response = query(prompt)
401
+ thought, action_name, action_arg = self._parse_thought_response(response)
402
+
403
+ # Fallback logic
404
+ if not action_name:
405
+ if not self.state.current_file:
406
+ file_hint = self._extract_filename_from_task()
407
+ if file_hint and step > 1:
408
+ action_name, action_arg = "read_file", file_hint
409
+ thought = f"Falling back to reading {file_hint} directly."
410
+ else:
411
+ action_name = "search_code"
412
+ action_arg = self._extract_search_keyword()
413
+ thought = "Falling back to keyword search."
414
+ else:
415
+ action_name, action_arg = "run_tests", ""
416
+ thought = "Falling back to run_tests."
417
+
418
+ action_arg = action_arg.strip('"').strip("'")
419
+
420
+ # Dedup: if same action tried before, try alternate
421
+ if self._already_tried(action_name, action_arg):
422
+ # If we keep trying run_tests/search_code, force read_file if possible
423
+ if action_name in ("run_tests", "search_code", "semantic_search"):
424
+ file_hint = self._extract_filename_from_task() or self._extract_file_from_plan()
425
+ if file_hint:
426
+ action_name, action_arg = "read_file", file_hint
427
+ thought = f"Action {action_name} already tried. Forcing read_file on {file_hint}."
428
+ elif self.state.current_file:
429
+ action_name, action_arg = "read_file", self.state.current_file
430
+ thought = f"Action {action_name} already tried. Reading current file again."
431
+ else:
432
+ action_name, action_arg = "list_files", "."
433
+ thought = f"Action {action_name} already tried. Listing files to find something new."
434
+
435
+ return thought, action_name, action_arg
436
+
437
+ def _parse_thought_response(self, response: str) -> tuple[str, str, str]:
438
+ """Extract thought and action from LLM response."""
439
+ thought = ""
440
+ for line in response.splitlines():
441
+ if line.strip().upper().startswith("THOUGHT:"):
442
+ thought = line.split(":", 1)[1].strip()
443
+ break
444
+
445
+ match = EXTRACT_ACTION_PATTERN.search(response)
446
+ action_name = match.group(1).lower().strip() if match else ""
447
+ action_arg = match.group(2).strip() if match else ""
448
+ return thought, action_name, action_arg
449
+
450
+ # ── ACTION execution ─────────────────────────────────────────────────
451
+
452
+ def _execute_action(self, action_name: str, action_arg: str) -> str:
453
+ """Execute a tool and return the observation string."""
454
+ print(f" [TOOL] Executing: {action_name}({action_arg})")
455
+ root = self.state.working_root or self.state.project_root
456
+
457
+ if action_name == "search_code":
458
+ result = search_code(action_arg, root)
459
+ self._extract_file_from_search(result)
460
+ return result
461
+
462
+ elif action_name == "semantic_search":
463
+ result = semantic_search(action_arg, root)
464
+ # Try to extract file from results
465
+ self._extract_file_from_search(result)
466
+ return result
467
+
468
+ elif action_name == "read_file":
469
+ path = self._resolve_path(action_arg)
470
+ content = read_file(path)
471
+ self.state.current_file = path
472
+ self.state.current_file_content = "" if content.startswith("[ERROR]") else content
473
+ return content
474
+
475
+ elif action_name == "write_file":
476
+ path = self._resolve_path(action_arg)
477
+ self.state.current_file = path
478
+ if not self.state.current_file_content:
479
+ self.state.current_file_content = ""
480
+ return f"[OK] Targeting {path} for writing."
481
+
482
+ elif action_name == "run_tests":
483
+ code, out, failing = run_tests(root, action_arg)
484
+ self.state.test_output = out
485
+ self.state.failing_functions = failing
486
+
487
+ # Detect stagnation
488
+ if out == self.state.last_test_output and out:
489
+ self.state.stagnant_steps += 1
490
+ else:
491
+ self.state.stagnant_steps = 0
492
+ self.state.last_test_output = out
493
+
494
+ return out
495
+
496
+ elif action_name == "surgical_patch":
497
+ # Expected arg: "file.py | SEARCH_TEXT | REPLACE_TEXT"
498
+ parts = action_arg.split("|")
499
+ if len(parts) < 3:
500
+ return "[ERROR] surgical_patch requires format: file | SEARCH | REPLACE"
501
+ path = self._resolve_path(parts[0].strip())
502
+ search = parts[1].strip()
503
+ replace = parts[2].strip()
504
+ return apply_surgical_patch(path, search, replace)
505
+
506
+ elif action_name == "lint_code":
507
+ path = self._resolve_path(action_arg)
508
+ _, output = lint_code(path)
509
+ return output
510
+
511
+ elif action_name == "list_files":
512
+ path = self._resolve_path(action_arg)
513
+ files = list_files(path)
514
+ if not files:
515
+ return f"[INFO] No .py files found in {action_arg}"
516
+ rel_files = [os.path.relpath(f, root) for f in files]
517
+ return f"Found {len(rel_files)} files: " + ", ".join(rel_files[:20])
518
+
519
+ elif action_name == "git_diff":
520
+ return git_diff(root)
521
+
522
+ else:
523
+ return f"[ERROR] Unknown action: {action_name}"
524
+
525
+ # ── FIX generation ───────────────────────────────────────────────────
526
+
527
+ def _generate_fix(self) -> str:
528
+ """Ask the LLM to generate a code fix for the current file."""
529
+ error_context = ""
530
+ if self.state.test_output and self.state.test_exit_code != 0:
531
+ error_context = f"TEST ERRORS (FIX THESE):\n{self.state.test_output[:2000]}"
532
+
533
+ retrieval_context = self._get_retrieval_context()
534
+
535
+ prompt = FIX_PROMPT.format(
536
+ task=self.state.task,
537
+ file_path=self.state.current_file,
538
+ file_content=self.state.current_file_content[:2000],
539
+ error_context=error_context,
540
+ retrieval_context=retrieval_context,
541
+ )
542
+
543
+ fix = query(prompt)
544
+ return self._strip_code_fences(fix)
545
+
546
+ # ── SELF-REVIEW loop ─────────────────────────────────────────────────
547
+
548
+ def _self_review(self, code_fix: str, max_revisions: int = 2) -> tuple[str, str]:
549
+ """Self-review loop: critique → revise until APPROVED."""
550
+ for revision in range(max_revisions):
551
+ approved, review_text = review_code(
552
+ self.state.current_file_content, code_fix, self.state.task,
553
+ )
554
+ print(f" [REVIEW] #{revision + 1}: {review_text[:100]}")
555
+
556
+ if approved:
557
+ return code_fix, review_text
558
+
559
+ print(f" [REVISE] Revising code (attempt {revision + 1})...")
560
+ code_fix = revise_code(code_fix, review_text, self.state.task)
561
+ code_fix = self._strip_code_fences(code_fix)
562
+
563
+ return code_fix, f"Auto-approved after {max_revisions} revisions"
564
+
565
+ # ── Helpers ───────────────────────────────────────────────────────────
566
+
567
+ def _format_history(self) -> str:
568
+ """Format recent history for the prompt."""
569
+ if not self.state.history:
570
+ return "No previous actions."
571
+
572
+ lines = ["PROJECT CONTEXT:"]
573
+ scan_step = next((h for h in self.state.history if h["action"] == "system_scan"), None)
574
+ if scan_step:
575
+ lines.append(f" {scan_step['observation']}")
576
+
577
+ lines.append("\nPREVIOUS ACTIONS:")
578
+ real_steps = [h for h in self.state.history if h["action"] != "system_scan"]
579
+ for h in real_steps[-3:]:
580
+ lines.append(
581
+ f" Step {h['step']}: {h['action']}({h.get('action_arg', '')}) "
582
+ f"-> {h.get('status', h.get('test_status', 'info'))}"
583
+ )
584
+
585
+ if self.state.failing_functions:
586
+ lines.append(f"\nFAILING FUNCTIONS DETECTED: {', '.join(self.state.failing_functions)}")
587
+ lines.append("Look for these functions in the codebase. They are likely where the bug is.")
588
+
589
+ if self.state.stagnant_steps >= 1:
590
+ lines.append("\n[CRITICAL PIVOT] YOUR PREVIOUS FIXES ARE NOT WORKING.")
591
+ if self.state.failing_functions:
592
+ lines.append(f"STOP focusing on the original task name. FOCUS EXCLUSIVELY ON THESE FAILURES: {', '.join(self.state.failing_functions)}")
593
+ lines.append("Read the code of the FAILING functions. The bug is there, not where you are currently looking.")
594
+
595
+ if self.state.test_output:
596
+ lines.append(f"\nLAST TEST ERROR:\n{self.state.test_output[:1500]}")
597
+
598
+ return "\n".join(lines)
599
+
600
+ def _extract_file_from_search(self, search_output: str) -> None:
601
+ """Try to find a file path in search output and set it as current."""
602
+ root = self.state.working_root or self.state.project_root
603
+ for line in search_output.splitlines():
604
+ if ":" in line:
605
+ candidate = line.split(":")[0].strip()
606
+ if candidate.endswith(".py"):
607
+ full = os.path.join(root, candidate) if not os.path.isabs(candidate) else candidate
608
+ if os.path.isfile(full):
609
+ self.state.current_file = full
610
+ self.state.current_file_content = read_file(full)
611
+ return
612
+
613
+ def _resolve_path(self, path: str) -> str:
614
+ """Resolve a relative path against the working root.
615
+
616
+ Includes 'Path Anchoring' — if the file isn't at root, search for it
617
+ in subdirectories to prevent hallucination errors.
618
+ """
619
+ root = self.state.working_root or self.state.project_root
620
+ if os.path.isabs(path):
621
+ return path
622
+
623
+ target = os.path.join(root, path)
624
+ if os.path.exists(target):
625
+ return target
626
+
627
+ # Path Anchoring: Look for the file elsewhere
628
+ filename = os.path.basename(path)
629
+ for r, _, files in os.walk(root):
630
+ if filename in files:
631
+ found_path = os.path.join(r, filename)
632
+ print(f" [PATH] Auto-anchored '{path}' to '{os.path.relpath(found_path, root)}'")
633
+ return found_path
634
+
635
+ return target
636
+
637
+ @staticmethod
638
+ def _strip_code_fences(text: str) -> str:
639
+ """Extract Python code from LLM response, stripping markdown and commentary."""
640
+ if not text:
641
+ return ""
642
+
643
+ # 1. Look for ```python ... ```
644
+ match = re.search(r"```python\s+(.*?)\s+```", text, re.DOTALL | re.IGNORECASE)
645
+ if match:
646
+ return match.group(1).strip()
647
+
648
+ # 2. Look for ``` ... ```
649
+ match = re.search(r"```\s+(.*?)\s+```", text, re.DOTALL)
650
+ if match:
651
+ return match.group(1).strip()
652
+
653
+ # 3. Aggressive extraction: find first import/def/class and last line of code
654
+ lines = text.splitlines()
655
+ start_idx = 0
656
+ found_start = False
657
+ for i, line in enumerate(lines):
658
+ if re.match(r"^\s*(import|from|def|class|#|@)", line):
659
+ start_idx = i
660
+ found_start = True
661
+ break
662
+
663
+ if not found_start:
664
+ return text.strip()
665
+
666
+ # Strip trailing non-code (explanations)
667
+ end_idx = len(lines)
668
+ for i in range(len(lines) - 1, start_idx, -1):
669
+ line = lines[i].strip()
670
+ # If line ends with typical code markers, it's likely code
671
+ if line and (line.endswith(":") or line.endswith(")") or line.endswith("]") or
672
+ line.endswith("}") or line.endswith("'") or line.endswith('"') or
673
+ line in {"True", "False", "None"} or re.match(r"^\s*#", line)):
674
+ end_idx = i + 1
675
+ break
676
+
677
+ return "\n".join(lines[start_idx:end_idx]).strip()
678
+
679
+ def _extract_filename_from_task(self) -> str:
680
+ """Extract a .py filename mentioned in the task description."""
681
+ match = re.search(r"(\w+\.py)", self.state.task)
682
+ return match.group(1) if match else ""
683
+
684
+ def _extract_search_keyword(self) -> str:
685
+ """Extract a short keyword from the task for searching."""
686
+ task = self.state.task.lower()
687
+ for keyword in ["divide", "add", "subtract", "multiply", "error",
688
+ "bug", "fix", "test", "function", "class", "import",
689
+ "validate", "parse", "process", "batch", "register"]:
690
+ if keyword in task:
691
+ return keyword
692
+ words = [w for w in task.split() if len(w) > 3 and w not in
693
+ ("the", "that", "this", "with", "from", "should", "when")]
694
+ return words[0] if words else "def"
695
+
696
+ def _extract_file_from_plan(self) -> str:
697
+ """Extract the first likely file from the plan."""
698
+ if self.state.plan and self.state.plan.get("likely_files"):
699
+ files = self.state.plan.get("likely_files")
700
+ if isinstance(files, list) and files:
701
+ return files[0]
702
+ if isinstance(files, str):
703
+ return files.split(",")[0].strip()
704
+ return ""
705
+
706
+ def _already_tried(self, action_name: str, action_arg: str) -> bool:
707
+ """Check if we already tried this exact action in the last 2 steps."""
708
+ # Look back at last 2 steps to avoid immediate loops
709
+ for h in self.state.history[-2:]:
710
+ if h.get("action") == action_name and h.get("action_arg") == action_arg:
711
+ return True
712
+ return False
713
+
714
+ def _get_model(self) -> str:
715
+ """Get the current model name."""
716
+ from devagent.app.llm import MODEL
717
+ return MODEL