devagent-cli 3.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devagent/__init__.py +1 -0
- devagent/app/__init__.py +1 -0
- devagent/app/agent.py +717 -0
- devagent/app/llm.py +83 -0
- devagent/app/memory.py +309 -0
- devagent/app/patcher.py +83 -0
- devagent/app/planner.py +76 -0
- devagent/app/reviewer.py +65 -0
- devagent/app/sandbox.py +105 -0
- devagent/app/state.py +113 -0
- devagent/cli.py +282 -0
- devagent/tools/__init__.py +1 -0
- devagent/tools/benchmark_runner.py +184 -0
- devagent/tools/file_ops.py +52 -0
- devagent/tools/git_tools.py +91 -0
- devagent/tools/linter.py +55 -0
- devagent/tools/search.py +65 -0
- devagent/tools/semantic_search.py +60 -0
- devagent/tools/surgical_patcher.py +39 -0
- devagent/tools/test_runner.py +143 -0
- devagent/utils/__init__.py +1 -0
- devagent/utils/config.py +116 -0
- devagent/utils/logger.py +94 -0
- devagent/utils/metrics.py +130 -0
- devagent_cli-3.2.1.dist-info/METADATA +480 -0
- devagent_cli-3.2.1.dist-info/RECORD +30 -0
- devagent_cli-3.2.1.dist-info/WHEEL +5 -0
- devagent_cli-3.2.1.dist-info/entry_points.txt +2 -0
- devagent_cli-3.2.1.dist-info/licenses/LICENSE +21 -0
- devagent_cli-3.2.1.dist-info/top_level.txt +1 -0
devagent/app/llm.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM integration layer — talks to Ollama via the Python SDK.
|
|
3
|
+
All prompts are kept SHORT to respect low-VRAM constraints.
|
|
4
|
+
|
|
5
|
+
Supports:
|
|
6
|
+
- Configurable model + inference options
|
|
7
|
+
- Latency tracking
|
|
8
|
+
- Graceful fallback on errors
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import time
|
|
14
|
+
import ollama
|
|
15
|
+
|
|
16
|
+
from devagent.utils.config import MODELS, DEFAULT_INFERENCE_OPTIONS
|
|
17
|
+
|
|
18
|
+
# ── Configuration (mutable at runtime via CLI) ───────────────────────────────
|
|
19
|
+
MODEL = MODELS["primary"]
|
|
20
|
+
OPTIONS = DEFAULT_INFERENCE_OPTIONS.copy()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def set_model(model: str) -> None:
|
|
24
|
+
"""Override the active model."""
|
|
25
|
+
global MODEL
|
|
26
|
+
MODEL = model
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def set_options(options: dict) -> None:
|
|
30
|
+
"""Override inference options."""
|
|
31
|
+
global OPTIONS
|
|
32
|
+
OPTIONS = {**DEFAULT_INFERENCE_OPTIONS, **options}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def query(prompt: str, *, system: str = "", model: str | None = None) -> str:
|
|
36
|
+
"""Send a prompt to Ollama and return the response text.
|
|
37
|
+
|
|
38
|
+
Falls back to empty string on error so the agent loop can continue.
|
|
39
|
+
Returns the response text.
|
|
40
|
+
"""
|
|
41
|
+
use_model = model or MODEL
|
|
42
|
+
messages: list[dict[str, str]] = []
|
|
43
|
+
if system:
|
|
44
|
+
messages.append({"role": "system", "content": system})
|
|
45
|
+
messages.append({"role": "user", "content": prompt})
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
t0 = time.time()
|
|
49
|
+
response = ollama.chat(
|
|
50
|
+
model=use_model,
|
|
51
|
+
messages=messages,
|
|
52
|
+
options=OPTIONS,
|
|
53
|
+
)
|
|
54
|
+
elapsed = time.time() - t0
|
|
55
|
+
text = response["message"]["content"].strip()
|
|
56
|
+
|
|
57
|
+
# Track latency for metrics (stored globally for metrics collector)
|
|
58
|
+
query._last_latency = elapsed
|
|
59
|
+
query._last_prompt_chars = len(prompt)
|
|
60
|
+
query._last_response_chars = len(text)
|
|
61
|
+
|
|
62
|
+
return text
|
|
63
|
+
except Exception as exc: # noqa: BLE001
|
|
64
|
+
print(f"[LLM ERROR] {exc}")
|
|
65
|
+
query._last_latency = 0
|
|
66
|
+
query._last_prompt_chars = len(prompt)
|
|
67
|
+
query._last_response_chars = 0
|
|
68
|
+
return ""
|
|
69
|
+
|
|
70
|
+
# Initialize tracking attributes
|
|
71
|
+
query._last_latency = 0.0
|
|
72
|
+
query._last_prompt_chars = 0
|
|
73
|
+
query._last_response_chars = 0
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def query_with_context(prompt: str, context: str, *, model: str | None = None) -> str:
|
|
77
|
+
"""Query with additional context injected into the system message."""
|
|
78
|
+
system = (
|
|
79
|
+
"You are a senior Python developer. "
|
|
80
|
+
"Be concise. Output ONLY what is asked — no markdown fences unless requested.\n\n"
|
|
81
|
+
f"CONTEXT:\n{context[:2000]}"
|
|
82
|
+
)
|
|
83
|
+
return query(prompt, system=system, model=model)
|
devagent/app/memory.py
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Memory Layer — Short-term runtime state + Long-term FAISS-backed semantic retrieval.
|
|
3
|
+
|
|
4
|
+
Implements:
|
|
5
|
+
- Semantic code chunking
|
|
6
|
+
- Sentence-transformer embeddings
|
|
7
|
+
- FAISS vector index for Top-K retrieval
|
|
8
|
+
- Compressed memory summaries
|
|
9
|
+
- Context pruning for low-VRAM operation
|
|
10
|
+
|
|
11
|
+
Falls back gracefully if sentence-transformers or faiss are not installed.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import hashlib
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from devagent.utils.config import (
|
|
23
|
+
SUPPORTED_EXTENSIONS,
|
|
24
|
+
IGNORE_DIRS,
|
|
25
|
+
MAX_FILE_SIZE_BYTES,
|
|
26
|
+
MAX_CHUNK_CHARS,
|
|
27
|
+
TOP_K_RETRIEVAL,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# ── Lazy imports for optional heavy deps ──────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
_FAISS_AVAILABLE = False
|
|
33
|
+
_TRANSFORMERS_AVAILABLE = False
|
|
34
|
+
_np = None
|
|
35
|
+
_faiss = None
|
|
36
|
+
_SentenceTransformer = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _load_deps() -> None:
|
|
40
|
+
"""Lazy-load heavy dependencies only when needed."""
|
|
41
|
+
global _FAISS_AVAILABLE, _TRANSFORMERS_AVAILABLE, _np, _faiss, _SentenceTransformer
|
|
42
|
+
try:
|
|
43
|
+
import numpy as np
|
|
44
|
+
_np = np
|
|
45
|
+
except ImportError:
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
import faiss
|
|
50
|
+
_faiss = faiss
|
|
51
|
+
_FAISS_AVAILABLE = True
|
|
52
|
+
except ImportError:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
from sentence_transformers import SentenceTransformer
|
|
57
|
+
_SentenceTransformer = SentenceTransformer
|
|
58
|
+
_TRANSFORMERS_AVAILABLE = True
|
|
59
|
+
except ImportError:
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ── Code Chunking ─────────────────────────────────────────────────────────────
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class CodeChunk:
|
|
67
|
+
"""A chunk of source code with metadata."""
|
|
68
|
+
file_path: str
|
|
69
|
+
start_line: int
|
|
70
|
+
end_line: int
|
|
71
|
+
content: str
|
|
72
|
+
language: str = "python"
|
|
73
|
+
chunk_hash: str = ""
|
|
74
|
+
|
|
75
|
+
def __post_init__(self) -> None:
|
|
76
|
+
if not self.chunk_hash:
|
|
77
|
+
self.chunk_hash = hashlib.md5(
|
|
78
|
+
self.content.encode("utf-8", errors="replace")
|
|
79
|
+
).hexdigest()[:12]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def chunk_file(file_path: str, max_chars: int = MAX_CHUNK_CHARS) -> list[CodeChunk]:
|
|
83
|
+
"""Split a source file into semantic chunks.
|
|
84
|
+
|
|
85
|
+
Chunks on function/class boundaries when possible, falls back to
|
|
86
|
+
fixed-size splitting.
|
|
87
|
+
"""
|
|
88
|
+
try:
|
|
89
|
+
content = Path(file_path).read_text(encoding="utf-8", errors="replace")
|
|
90
|
+
except Exception:
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
if len(content) > MAX_FILE_SIZE_BYTES:
|
|
94
|
+
return [] # Skip very large files
|
|
95
|
+
|
|
96
|
+
ext = Path(file_path).suffix
|
|
97
|
+
lines = content.splitlines(keepends=True)
|
|
98
|
+
chunks: list[CodeChunk] = []
|
|
99
|
+
current_lines: list[str] = []
|
|
100
|
+
current_start = 1
|
|
101
|
+
|
|
102
|
+
def _flush(end: int) -> None:
|
|
103
|
+
text = "".join(current_lines).strip()
|
|
104
|
+
if text:
|
|
105
|
+
chunks.append(CodeChunk(
|
|
106
|
+
file_path=file_path,
|
|
107
|
+
start_line=current_start,
|
|
108
|
+
end_line=end,
|
|
109
|
+
content=text[:max_chars],
|
|
110
|
+
language=ext.lstrip("."),
|
|
111
|
+
))
|
|
112
|
+
|
|
113
|
+
for i, line in enumerate(lines, start=1):
|
|
114
|
+
stripped = line.strip()
|
|
115
|
+
|
|
116
|
+
# Split on function/class boundaries (Python-aware)
|
|
117
|
+
is_boundary = (
|
|
118
|
+
stripped.startswith("def ") or
|
|
119
|
+
stripped.startswith("class ") or
|
|
120
|
+
stripped.startswith("async def ")
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
if is_boundary and current_lines:
|
|
124
|
+
_flush(i - 1)
|
|
125
|
+
current_lines = [line]
|
|
126
|
+
current_start = i
|
|
127
|
+
else:
|
|
128
|
+
current_lines.append(line)
|
|
129
|
+
|
|
130
|
+
# Force split if chunk gets too large
|
|
131
|
+
if len("".join(current_lines)) > max_chars:
|
|
132
|
+
_flush(i)
|
|
133
|
+
current_lines = []
|
|
134
|
+
current_start = i + 1
|
|
135
|
+
|
|
136
|
+
# Flush remaining
|
|
137
|
+
if current_lines:
|
|
138
|
+
_flush(len(lines))
|
|
139
|
+
|
|
140
|
+
return chunks
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def chunk_project(project_root: str) -> list[CodeChunk]:
|
|
144
|
+
"""Recursively chunk all source files in a project."""
|
|
145
|
+
all_chunks: list[CodeChunk] = []
|
|
146
|
+
for root, dirs, files in os.walk(project_root):
|
|
147
|
+
# Skip ignored directories
|
|
148
|
+
dirs[:] = [d for d in dirs if d not in IGNORE_DIRS]
|
|
149
|
+
for f in files:
|
|
150
|
+
if Path(f).suffix in SUPPORTED_EXTENSIONS:
|
|
151
|
+
full_path = os.path.join(root, f)
|
|
152
|
+
all_chunks.extend(chunk_file(full_path))
|
|
153
|
+
return all_chunks
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
# ── FAISS Vector Store ────────────────────────────────────────────────────────
|
|
157
|
+
|
|
158
|
+
class SemanticIndex:
|
|
159
|
+
"""FAISS-backed semantic search index for code chunks.
|
|
160
|
+
|
|
161
|
+
Falls back to keyword matching if FAISS or sentence-transformers
|
|
162
|
+
are not available.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
EMBED_MODEL = "all-MiniLM-L6-v2" # 80 MB, fast, good quality
|
|
166
|
+
|
|
167
|
+
def __init__(self) -> None:
|
|
168
|
+
_load_deps()
|
|
169
|
+
self.chunks: list[CodeChunk] = []
|
|
170
|
+
self._index = None
|
|
171
|
+
self._embedder = None
|
|
172
|
+
self._dimension = 0
|
|
173
|
+
self._ready = False
|
|
174
|
+
|
|
175
|
+
def build(self, chunks: list[CodeChunk]) -> bool:
|
|
176
|
+
"""Build the FAISS index from code chunks.
|
|
177
|
+
|
|
178
|
+
Returns True if semantic index was built, False if falling back.
|
|
179
|
+
"""
|
|
180
|
+
self.chunks = chunks
|
|
181
|
+
if not chunks:
|
|
182
|
+
return False
|
|
183
|
+
|
|
184
|
+
if not (_FAISS_AVAILABLE and _TRANSFORMERS_AVAILABLE):
|
|
185
|
+
print("[MEMORY] FAISS/sentence-transformers not available — using keyword fallback")
|
|
186
|
+
return False
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
print(f"[MEMORY] Building semantic index over {len(chunks)} chunks...")
|
|
190
|
+
self._embedder = _SentenceTransformer(self.EMBED_MODEL)
|
|
191
|
+
texts = [c.content for c in chunks]
|
|
192
|
+
embeddings = self._embedder.encode(texts, show_progress_bar=False)
|
|
193
|
+
embeddings = _np.array(embeddings, dtype="float32")
|
|
194
|
+
|
|
195
|
+
self._dimension = embeddings.shape[1]
|
|
196
|
+
self._index = _faiss.IndexFlatIP(self._dimension) # Inner product
|
|
197
|
+
_faiss.normalize_L2(embeddings)
|
|
198
|
+
self._index.add(embeddings)
|
|
199
|
+
|
|
200
|
+
self._ready = True
|
|
201
|
+
print(f"[MEMORY] Semantic index ready ({self._dimension}d, {len(chunks)} chunks)")
|
|
202
|
+
return True
|
|
203
|
+
except Exception as exc:
|
|
204
|
+
print(f"[MEMORY] Index build failed: {exc}")
|
|
205
|
+
self._ready = False
|
|
206
|
+
return False
|
|
207
|
+
|
|
208
|
+
def search(self, query: str, top_k: int = TOP_K_RETRIEVAL) -> list[CodeChunk]:
|
|
209
|
+
"""Retrieve the most relevant chunks for a query."""
|
|
210
|
+
if not self.chunks:
|
|
211
|
+
return []
|
|
212
|
+
|
|
213
|
+
if self._ready and self._embedder and self._index:
|
|
214
|
+
return self._semantic_search(query, top_k)
|
|
215
|
+
else:
|
|
216
|
+
return self._keyword_search(query, top_k)
|
|
217
|
+
|
|
218
|
+
def _semantic_search(self, query: str, top_k: int) -> list[CodeChunk]:
|
|
219
|
+
"""FAISS-powered semantic search."""
|
|
220
|
+
try:
|
|
221
|
+
q_embed = self._embedder.encode([query])
|
|
222
|
+
q_embed = _np.array(q_embed, dtype="float32")
|
|
223
|
+
_faiss.normalize_L2(q_embed)
|
|
224
|
+
|
|
225
|
+
k = min(top_k, len(self.chunks))
|
|
226
|
+
scores, indices = self._index.search(q_embed, k)
|
|
227
|
+
|
|
228
|
+
results = []
|
|
229
|
+
for idx in indices[0]:
|
|
230
|
+
if 0 <= idx < len(self.chunks):
|
|
231
|
+
results.append(self.chunks[idx])
|
|
232
|
+
return results
|
|
233
|
+
except Exception as exc:
|
|
234
|
+
print(f"[MEMORY] Semantic search failed: {exc}")
|
|
235
|
+
return self._keyword_search(query, top_k)
|
|
236
|
+
|
|
237
|
+
def _keyword_search(self, query: str, top_k: int) -> list[CodeChunk]:
|
|
238
|
+
"""Fallback keyword-based search."""
|
|
239
|
+
query_lower = query.lower()
|
|
240
|
+
scored = []
|
|
241
|
+
for chunk in self.chunks:
|
|
242
|
+
content_lower = chunk.content.lower()
|
|
243
|
+
score = content_lower.count(query_lower)
|
|
244
|
+
# Boost exact matches in function/class names
|
|
245
|
+
for line in chunk.content.splitlines()[:3]:
|
|
246
|
+
if query_lower in line.lower():
|
|
247
|
+
score += 5
|
|
248
|
+
if score > 0:
|
|
249
|
+
scored.append((score, chunk))
|
|
250
|
+
|
|
251
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
252
|
+
return [c for _, c in scored[:top_k]]
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
# ── Working Memory (Short-Term) ──────────────────────────────────────────────
|
|
256
|
+
|
|
257
|
+
@dataclass
|
|
258
|
+
class WorkingMemory:
|
|
259
|
+
"""Short-term memory for the current agent run.
|
|
260
|
+
|
|
261
|
+
Stores retrieved chunks, compressed summaries, and recent
|
|
262
|
+
observations for context assembly.
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
retrieved_chunks: list[CodeChunk] = field(default_factory=list)
|
|
266
|
+
summaries: list[str] = field(default_factory=list)
|
|
267
|
+
observations: list[str] = field(default_factory=list)
|
|
268
|
+
thoughts: list[str] = field(default_factory=list)
|
|
269
|
+
|
|
270
|
+
def add_retrieval(self, chunks: list[CodeChunk]) -> None:
|
|
271
|
+
"""Add retrieved chunks, deduplicating by hash."""
|
|
272
|
+
seen = {c.chunk_hash for c in self.retrieved_chunks}
|
|
273
|
+
for c in chunks:
|
|
274
|
+
if c.chunk_hash not in seen:
|
|
275
|
+
self.retrieved_chunks.append(c)
|
|
276
|
+
seen.add(c.chunk_hash)
|
|
277
|
+
|
|
278
|
+
def add_chunk(self, chunk: CodeChunk) -> None:
|
|
279
|
+
"""Add a single chunk to memory."""
|
|
280
|
+
self.add_retrieval([chunk])
|
|
281
|
+
|
|
282
|
+
def get_context(self, max_chars: int = 3000) -> str:
|
|
283
|
+
"""Assemble compressed context from retrieved chunks."""
|
|
284
|
+
parts: list[str] = []
|
|
285
|
+
total = 0
|
|
286
|
+
|
|
287
|
+
for chunk in self.retrieved_chunks:
|
|
288
|
+
header = f"# {chunk.file_path} (L{chunk.start_line}-{chunk.end_line})"
|
|
289
|
+
block = f"{header}\n{chunk.content}\n"
|
|
290
|
+
if total + len(block) > max_chars:
|
|
291
|
+
break
|
|
292
|
+
parts.append(block)
|
|
293
|
+
total += len(block)
|
|
294
|
+
|
|
295
|
+
return "\n".join(parts)
|
|
296
|
+
|
|
297
|
+
def add_summary(self, summary: str) -> None:
|
|
298
|
+
"""Store a compressed summary."""
|
|
299
|
+
self.summaries.append(summary[:500])
|
|
300
|
+
# Keep only last 10 summaries
|
|
301
|
+
if len(self.summaries) > 10:
|
|
302
|
+
self.summaries = self.summaries[-10:]
|
|
303
|
+
|
|
304
|
+
def clear(self) -> None:
|
|
305
|
+
"""Reset working memory."""
|
|
306
|
+
self.retrieved_chunks.clear()
|
|
307
|
+
self.summaries.clear()
|
|
308
|
+
self.observations.clear()
|
|
309
|
+
self.thoughts.clear()
|
devagent/app/patcher.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Patch Engine — generates and applies line-level diffs instead of rewriting full files.
|
|
3
|
+
|
|
4
|
+
Supports:
|
|
5
|
+
- Unified diff generation
|
|
6
|
+
- Patch application with context validation
|
|
7
|
+
- Minimal change preservation
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import difflib
|
|
13
|
+
import os
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def generate_diff(original: str, modified: str, file_path: str = "file.py") -> str:
|
|
19
|
+
"""Generate a unified diff between original and modified content."""
|
|
20
|
+
orig_lines = original.splitlines(keepends=True)
|
|
21
|
+
mod_lines = modified.splitlines(keepends=True)
|
|
22
|
+
|
|
23
|
+
diff = difflib.unified_diff(
|
|
24
|
+
orig_lines, mod_lines,
|
|
25
|
+
fromfile=f"a/{file_path}",
|
|
26
|
+
tofile=f"b/{file_path}",
|
|
27
|
+
lineterm="",
|
|
28
|
+
)
|
|
29
|
+
return "".join(diff)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def apply_patch(file_path: str, original_content: str, patched_content: str) -> dict[str, Any]:
|
|
33
|
+
"""Apply a patch to a file with validation.
|
|
34
|
+
|
|
35
|
+
Returns a result dict with status, diff, and stats.
|
|
36
|
+
"""
|
|
37
|
+
result: dict[str, Any] = {
|
|
38
|
+
"status": "error",
|
|
39
|
+
"file": file_path,
|
|
40
|
+
"diff": "",
|
|
41
|
+
"lines_changed": 0,
|
|
42
|
+
"lines_added": 0,
|
|
43
|
+
"lines_removed": 0,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if not patched_content.strip():
|
|
47
|
+
result["error"] = "Empty patch content"
|
|
48
|
+
return result
|
|
49
|
+
|
|
50
|
+
# Generate diff for logging
|
|
51
|
+
diff_text = generate_diff(original_content, patched_content, file_path)
|
|
52
|
+
result["diff"] = diff_text
|
|
53
|
+
|
|
54
|
+
# Count changes
|
|
55
|
+
for line in diff_text.splitlines():
|
|
56
|
+
if line.startswith("+") and not line.startswith("+++"):
|
|
57
|
+
result["lines_added"] += 1
|
|
58
|
+
elif line.startswith("-") and not line.startswith("---"):
|
|
59
|
+
result["lines_removed"] += 1
|
|
60
|
+
result["lines_changed"] = result["lines_added"] + result["lines_removed"]
|
|
61
|
+
|
|
62
|
+
# Write the patched file
|
|
63
|
+
try:
|
|
64
|
+
p = Path(file_path)
|
|
65
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
66
|
+
p.write_text(patched_content, encoding="utf-8")
|
|
67
|
+
result["status"] = "success"
|
|
68
|
+
except Exception as exc:
|
|
69
|
+
result["status"] = "error"
|
|
70
|
+
result["error"] = str(exc)
|
|
71
|
+
|
|
72
|
+
return result
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def format_diff_summary(patch_result: dict[str, Any]) -> str:
|
|
76
|
+
"""Format a human-readable patch summary."""
|
|
77
|
+
if patch_result["status"] != "success":
|
|
78
|
+
return f"[PATCH ERROR] {patch_result.get('error', 'Unknown error')}"
|
|
79
|
+
|
|
80
|
+
return (
|
|
81
|
+
f"[PATCH] {patch_result['file']}: "
|
|
82
|
+
f"+{patch_result['lines_added']} / -{patch_result['lines_removed']} lines"
|
|
83
|
+
)
|
devagent/app/planner.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Planner Layer — interprets the task and generates a short action plan.
|
|
3
|
+
|
|
4
|
+
The planner is NOT the coder. It decides:
|
|
5
|
+
1. Which files are likely relevant
|
|
6
|
+
2. Which tools to use first
|
|
7
|
+
3. A short execution strategy
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import re
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from devagent.app.llm import query
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
PLAN_PROMPT = """\
|
|
19
|
+
You are a coding task planner. Given a task and project files, create a SHORT plan.
|
|
20
|
+
|
|
21
|
+
TASK: {task}
|
|
22
|
+
|
|
23
|
+
PROJECT FILES:
|
|
24
|
+
{file_list}
|
|
25
|
+
|
|
26
|
+
Create a plan with exactly 3-5 steps. Each step should be ONE action.
|
|
27
|
+
Available actions: search_code, semantic_search, read_file, write_patch, run_tests, lint_code, git_diff
|
|
28
|
+
|
|
29
|
+
Reply in this EXACT format:
|
|
30
|
+
LIKELY_FILES: file1.py, file2.py
|
|
31
|
+
PLAN:
|
|
32
|
+
1. <action>: <target>
|
|
33
|
+
2. <action>: <target>
|
|
34
|
+
3. <action>: <target>
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def generate_plan(task: str, file_list: list[str]) -> dict[str, Any]:
|
|
39
|
+
"""Generate an execution plan for the given task."""
|
|
40
|
+
files_str = "\n".join(f" - {f}" for f in file_list[:30])
|
|
41
|
+
prompt = PLAN_PROMPT.format(task=task, file_list=files_str)
|
|
42
|
+
response = query(prompt)
|
|
43
|
+
|
|
44
|
+
if not response:
|
|
45
|
+
return _fallback_plan(task, file_list)
|
|
46
|
+
return _parse_plan(response, task, file_list)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _parse_plan(response: str, task: str, file_list: list[str]) -> dict[str, Any]:
|
|
50
|
+
"""Parse the LLM's plan response."""
|
|
51
|
+
result: dict[str, Any] = {"likely_files": [], "steps": [], "raw_plan": response}
|
|
52
|
+
|
|
53
|
+
files_match = re.search(r"LIKELY_FILES:\s*(.+)", response, re.IGNORECASE)
|
|
54
|
+
if files_match:
|
|
55
|
+
result["likely_files"] = [f.strip() for f in files_match.group(1).split(",") if f.strip()]
|
|
56
|
+
|
|
57
|
+
step_pattern = re.compile(r"\d+\.\s*(\w+):\s*(.+)")
|
|
58
|
+
for match in step_pattern.finditer(response):
|
|
59
|
+
result["steps"].append((match.group(1).strip().lower(), match.group(2).strip()))
|
|
60
|
+
|
|
61
|
+
if not result["steps"]:
|
|
62
|
+
return _fallback_plan(task, file_list)
|
|
63
|
+
return result
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _fallback_plan(task: str, file_list: list[str]) -> dict[str, Any]:
|
|
67
|
+
"""Generate a sensible fallback plan from the task description."""
|
|
68
|
+
likely_files = [m.group(1) for m in re.finditer(r"(\w+\.py)", task)]
|
|
69
|
+
steps = []
|
|
70
|
+
if likely_files:
|
|
71
|
+
steps.append(("read_file", likely_files[0]))
|
|
72
|
+
else:
|
|
73
|
+
keywords = [w for w in task.lower().split() if len(w) > 3 and w not in {"the", "that", "this", "with", "from"}]
|
|
74
|
+
steps.append(("search_code", keywords[0] if keywords else "def"))
|
|
75
|
+
steps.append(("run_tests", "."))
|
|
76
|
+
return {"likely_files": likely_files, "steps": steps, "raw_plan": "Fallback plan."}
|
devagent/app/reviewer.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Self-review module.
|
|
3
|
+
The LLM critiques its own code fix and returns APPROVED or REVISE + reason.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from devagent.app.llm import query
|
|
9
|
+
|
|
10
|
+
REVIEW_SYSTEM = (
|
|
11
|
+
"You are a strict code reviewer. "
|
|
12
|
+
"Review the proposed code fix for correctness, edge cases, and syntax. "
|
|
13
|
+
"CRITICAL RULES:\n"
|
|
14
|
+
"1. If the code has a SyntaxError or unclosed blocks, return REVISE.\n"
|
|
15
|
+
"2. If the proposed fix adds unnecessary tests, example usages, or 'if __name__ == \"__main__\":' blocks, return REVISE.\n"
|
|
16
|
+
"3. If the fix is correct and minimal, return APPROVED.\n\n"
|
|
17
|
+
"Respond with EXACTLY one line:\n"
|
|
18
|
+
" APPROVED\n"
|
|
19
|
+
"or\n"
|
|
20
|
+
" REVISE: <one-line reason>\n"
|
|
21
|
+
"Do NOT output anything else."
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
REVISE_SYSTEM = (
|
|
25
|
+
"You are a senior Python developer. "
|
|
26
|
+
"Fix the code based on the review feedback. "
|
|
27
|
+
"Output ONLY the corrected Python code — no explanations, no markdown fences."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def review_code(original_code: str, fixed_code: str, task: str) -> tuple[bool, str]:
|
|
32
|
+
"""Review a code fix.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
(approved: bool, review_text: str)
|
|
36
|
+
"""
|
|
37
|
+
prompt = (
|
|
38
|
+
f"TASK: {task}\n\n"
|
|
39
|
+
f"ORIGINAL CODE:\n{original_code[:1500]}\n\n"
|
|
40
|
+
f"PROPOSED FIX:\n{fixed_code[:1500]}\n\n"
|
|
41
|
+
"Is the fix correct? Reply APPROVED or REVISE: <reason>"
|
|
42
|
+
)
|
|
43
|
+
response = query(prompt, system=REVIEW_SYSTEM)
|
|
44
|
+
|
|
45
|
+
if not response:
|
|
46
|
+
# LLM failure → cautiously approve to avoid infinite loop
|
|
47
|
+
return True, "APPROVED (LLM unavailable — auto-approved)"
|
|
48
|
+
|
|
49
|
+
approved = response.strip().upper().startswith("APPROVED")
|
|
50
|
+
return approved, response.strip()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def revise_code(code: str, review_feedback: str, task: str) -> str:
|
|
54
|
+
"""Ask the LLM to revise code based on review feedback.
|
|
55
|
+
|
|
56
|
+
Returns the revised code string.
|
|
57
|
+
"""
|
|
58
|
+
prompt = (
|
|
59
|
+
f"TASK: {task}\n\n"
|
|
60
|
+
f"CODE:\n{code[:1500]}\n\n"
|
|
61
|
+
f"REVIEW FEEDBACK: {review_feedback}\n\n"
|
|
62
|
+
"Fix the code. Output ONLY the corrected Python code."
|
|
63
|
+
)
|
|
64
|
+
response = query(prompt, system=REVISE_SYSTEM)
|
|
65
|
+
return response if response else code # fallback to original on failure
|