devagent-cli 3.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,94 @@
1
+ """
2
+ Structured JSON logger — writes every agent step to logs/run.json.
3
+
4
+ Tracks: thoughts, actions, observations, reviews, test results,
5
+ latency, model info, and benchmark performance.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import os
12
+ from datetime import datetime, timezone
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+
17
+ class AgentLogger:
18
+ """Append-only JSON logger that writes each step as an object in an array."""
19
+
20
+ def __init__(self, log_dir: str = "logs"):
21
+ self._log_dir = Path(log_dir)
22
+ self._log_dir.mkdir(parents=True, exist_ok=True)
23
+ self._log_file = self._log_dir / "run.json"
24
+ self._entries: list[dict[str, Any]] = []
25
+
26
+ def log_step(
27
+ self,
28
+ step: int,
29
+ thought: str,
30
+ action: str,
31
+ observation: str,
32
+ review: str,
33
+ test_result: str,
34
+ status: str,
35
+ *,
36
+ latency: float = 0.0,
37
+ model: str = "",
38
+ patch_summary: str = "",
39
+ ) -> None:
40
+ """Log a complete agent iteration step."""
41
+ entry = {
42
+ "timestamp": datetime.now(timezone.utc).isoformat(),
43
+ "step": step,
44
+ "thought": thought,
45
+ "action": action,
46
+ "observation": observation[:2000],
47
+ "review": review,
48
+ "test_result": test_result[:2000],
49
+ "latency": f"{latency:.2f}s" if latency else "",
50
+ "model": model,
51
+ "patch": patch_summary,
52
+ "status": status,
53
+ }
54
+ self._entries.append(entry)
55
+ self._flush()
56
+ self._print_step(entry)
57
+
58
+ def log_event(self, event: str, data: dict[str, Any] | None = None) -> None:
59
+ """Log a freeform event (startup, shutdown, error, etc.)."""
60
+ entry = {
61
+ "timestamp": datetime.now(timezone.utc).isoformat(),
62
+ "event": event,
63
+ **(data or {}),
64
+ }
65
+ self._entries.append(entry)
66
+ self._flush()
67
+
68
+ def _flush(self) -> None:
69
+ """Write the full log array to disk."""
70
+ try:
71
+ self._log_file.write_text(
72
+ json.dumps(self._entries, indent=2, ensure_ascii=False),
73
+ encoding="utf-8",
74
+ )
75
+ except Exception as exc: # noqa: BLE001
76
+ print(f"[LOGGER ERROR] {exc}")
77
+
78
+ @staticmethod
79
+ def _print_step(entry: dict[str, Any]) -> None:
80
+ """Pretty-print a step to the console."""
81
+ print("\n" + "=" * 60)
82
+ print(f" STEP {entry['step']} | STATUS: {entry['status']}")
83
+ print("=" * 60)
84
+ print(f" THOUGHT: {entry['thought']}")
85
+ print(f" ACTION: {entry['action']}")
86
+ print(f" OBSERVATION: {entry['observation'][:1500]}")
87
+ if entry.get("review"):
88
+ print(f" REVIEW: {entry['review']}")
89
+ if entry.get("patch"):
90
+ print(f" PATCH: {entry['patch'][:200]}")
91
+ if entry.get("latency"):
92
+ print(f" LATENCY: {entry['latency']}")
93
+ print(f" TEST RESULT: {entry['test_result'][:1500]}")
94
+ print("=" * 60)
@@ -0,0 +1,130 @@
1
+ """
2
+ Metrics collector — tracks latency, retries, token estimates, and benchmark results.
3
+
4
+ All metrics are stored in-memory and flushed to disk on demand.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import time
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime, timezone
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+
17
+ @dataclass
18
+ class StepMetrics:
19
+ """Metrics for a single agent step."""
20
+ step: int = 0
21
+ action: str = ""
22
+ latency_s: float = 0.0
23
+ prompt_chars: int = 0
24
+ response_chars: int = 0
25
+ estimated_tokens: int = 0 # rough: chars / 4
26
+ status: str = ""
27
+
28
+
29
+ @dataclass
30
+ class RunMetrics:
31
+ """Aggregated metrics for an entire agent run."""
32
+
33
+ model: str = ""
34
+ task: str = ""
35
+ start_time: float = field(default_factory=time.time)
36
+ end_time: float = 0.0
37
+ total_steps: int = 0
38
+ retries: int = 0
39
+ successes: int = 0
40
+ failures: int = 0
41
+ total_latency_s: float = 0.0
42
+ total_estimated_tokens: int = 0
43
+ steps: list[StepMetrics] = field(default_factory=list)
44
+
45
+ # ── Recording ─────────────────────────────────────────────────────────
46
+
47
+ def record_step(
48
+ self,
49
+ step: int,
50
+ action: str,
51
+ latency_s: float,
52
+ prompt_chars: int,
53
+ response_chars: int,
54
+ status: str,
55
+ ) -> StepMetrics:
56
+ """Record metrics for a single step."""
57
+ estimated_tokens = (prompt_chars + response_chars) // 4
58
+ sm = StepMetrics(
59
+ step=step,
60
+ action=action,
61
+ latency_s=round(latency_s, 3),
62
+ prompt_chars=prompt_chars,
63
+ response_chars=response_chars,
64
+ estimated_tokens=estimated_tokens,
65
+ status=status,
66
+ )
67
+ self.steps.append(sm)
68
+ self.total_steps += 1
69
+ self.total_latency_s += latency_s
70
+ self.total_estimated_tokens += estimated_tokens
71
+ if status == "success":
72
+ self.successes += 1
73
+ elif status == "fail":
74
+ self.failures += 1
75
+ return sm
76
+
77
+ def finalize(self) -> None:
78
+ """Mark the run as complete."""
79
+ self.end_time = time.time()
80
+
81
+ # ── Reporting ─────────────────────────────────────────────────────────
82
+
83
+ def summary(self) -> dict[str, Any]:
84
+ """Return a JSON-serialisable summary."""
85
+ elapsed = (self.end_time or time.time()) - self.start_time
86
+ return {
87
+ "model": self.model,
88
+ "task": self.task[:100],
89
+ "total_steps": self.total_steps,
90
+ "retries": self.retries,
91
+ "successes": self.successes,
92
+ "failures": self.failures,
93
+ "total_latency_s": round(self.total_latency_s, 2),
94
+ "wall_time_s": round(elapsed, 2),
95
+ "total_estimated_tokens": self.total_estimated_tokens,
96
+ "avg_step_latency_s": round(
97
+ self.total_latency_s / max(self.total_steps, 1), 2
98
+ ),
99
+ "timestamp": datetime.now(timezone.utc).isoformat(),
100
+ }
101
+
102
+ def save(self, log_dir: str) -> str:
103
+ """Save metrics to disk."""
104
+ path = Path(log_dir)
105
+ path.mkdir(parents=True, exist_ok=True)
106
+ out_file = path / "metrics.json"
107
+ try:
108
+ out_file.write_text(
109
+ json.dumps(self.summary(), indent=2, ensure_ascii=False),
110
+ encoding="utf-8",
111
+ )
112
+ return str(out_file)
113
+ except Exception as exc:
114
+ print(f"[METRICS ERROR] {exc}")
115
+ return ""
116
+
117
+
118
+ class Timer:
119
+ """Simple context-manager timer."""
120
+
121
+ def __init__(self) -> None:
122
+ self.start = 0.0
123
+ self.elapsed = 0.0
124
+
125
+ def __enter__(self) -> "Timer":
126
+ self.start = time.time()
127
+ return self
128
+
129
+ def __exit__(self, *_: Any) -> None:
130
+ self.elapsed = time.time() - self.start
@@ -0,0 +1,480 @@
1
+ Metadata-Version: 2.4
2
+ Name: devagent-cli
3
+ Version: 3.2.1
4
+ Summary: Professional Local autonomous coding agent powered by Ollama
5
+ Author: Vedant Jadhav
6
+ License: MIT
7
+ Keywords: ai,agent,coding,ollama,local,devagent,devagent-cli
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Software Development :: Interpreters
13
+ Requires-Python: >=3.11
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Requires-Dist: rich
17
+ Requires-Dist: pytest
18
+ Requires-Dist: requests
19
+ Requires-Dist: ollama
20
+ Requires-Dist: faiss-cpu
21
+ Provides-Extra: semantic
22
+ Requires-Dist: sentence-transformers; extra == "semantic"
23
+ Provides-Extra: lint
24
+ Requires-Dist: flake8; extra == "lint"
25
+ Dynamic: license-file
26
+
27
+ <div align="center">
28
+
29
+ # 🧠 DevAgent
30
+
31
+ ### A Lightweight Local Open-Source Miniature of Claude Code CLI
32
+
33
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
34
+ [![Python 3.11+](https://img.shields.io/badge/Python-3.11+-3776AB.svg?logo=python&logoColor=white)](https://www.python.org/)
35
+ [![Ollama](https://img.shields.io/badge/Ollama-Local%20LLM-black.svg?logo=ollama)](https://ollama.ai)
36
+ [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](CONTRIBUTING.md)
37
+ [![GitHub stars](https://img.shields.io/github/stars/VedantJadhav701/Developer-Code-Intelligence-Agent?style=social)](https://github.com/VedantJadhav701/Developer-Code-Intelligence-Agent)
38
+
39
+ **A production-grade local coding agent that finds bugs, writes patches, reviews its own code, and validates with tests — all offline, all local, zero API costs.**
40
+
41
+ [Quick Start](#-quick-start) •
42
+ [Architecture](#-architecture) •
43
+ [Benchmarks](#-benchmarks) •
44
+ [Roadmap](#-roadmap) •
45
+ [Contributing](#-contributing)
46
+
47
+ ---
48
+
49
+ </div>
50
+
51
+ ## 🤔 Why DevAgent?
52
+
53
+ Most AI coding tools are **chatbots** — they suggest code, you copy-paste, you pray.
54
+
55
+ DevAgent is a **real agent** with a retrieval-first, tool-grounded architecture:
56
+
57
+ | | Chatbot | DevAgent |
58
+ |---|---|---|
59
+ | Searches your codebase | ❌ | ✅ ripgrep + semantic search |
60
+ | Retrieves relevant code | ❌ | ✅ FAISS embeddings |
61
+ | Plans before coding | ❌ | ✅ Planner layer |
62
+ | Generates patches | ❌ | ✅ Unified diffs |
63
+ | Reviews its own output | ❌ | ✅ Self-critique loop |
64
+ | Runs your tests | ❌ | ✅ pytest integration |
65
+ | Retries on failure | ❌ | ✅ Up to N iterations |
66
+ | Works in sandbox | ❌ | ✅ Isolated workspace |
67
+ | Works offline | ❌ | ✅ 100% local via Ollama |
68
+ | Costs money | 💸 | ✅ Free forever |
69
+
70
+ > **Philosophy:** Execution > Reasoning. Tools > Hallucination. Retrieval > Huge Context. Reliability > Intelligence.
71
+
72
+ ---
73
+
74
+ ## ✨ Features
75
+
76
+ 🔁 **ReAct Loop** — Thought → Action → Observation → Fix → Review → Test cycle
77
+
78
+ 🧠 **Planner** — LLM generates an action plan before coding
79
+
80
+ 🔍 **Semantic Search** — FAISS + sentence-transformers code retrieval
81
+
82
+ 🔎 **Code Search** — ripgrep-powered with cross-platform fallback
83
+
84
+ 📝 **Self-Review** — LLM critiques its own fixes, revises until approved
85
+
86
+ 🩹 **Patch Engine** — Line-level unified diffs instead of full file rewrites
87
+
88
+ 🧪 **Test-Driven** — Runs pytest after every fix, retries on failure
89
+
90
+ 🏖️ **Sandbox Mode** — Agent works in an isolated copy, applies changes only on success
91
+
92
+ 📊 **Benchmarks** — 5 built-in benchmark suites with automated evaluation
93
+
94
+ 📈 **Metrics** — Latency, token estimates, retries, and performance tracking
95
+
96
+ 📋 **Full Audit Trail** — Every step logged to `logs/run.json`
97
+
98
+ 🔒 **100% Offline** — Runs on Ollama with small models (2-4 GB)
99
+
100
+ ⚡ **Low Resource** — Works on RTX 3050 (4 GB VRAM) / 16 GB RAM
101
+
102
+ ---
103
+
104
+ ## 🚀 Quick Start
105
+
106
+ ### Prerequisites
107
+
108
+ - [Python 3.11+](https://www.python.org/downloads/)
109
+ - [Ollama](https://ollama.ai) installed and running
110
+
111
+ ### Install & Setup
112
+
113
+ ```bash
114
+ # 1. Clone
115
+ git clone https://github.com/VedantJadhav701/Developer-Code-Intelligence-Agent.git
116
+ cd Developer-Code-Intelligence-Agent
117
+
118
+ # 2. Install
119
+ pip install devagent-cli # (Coming soon to PyPI)
120
+ # Or locally: pip install -e .
121
+
122
+ # 3. Verify System (CRITICAL)
123
+ # This checks your Python environment, Ollama connection, and dependencies
124
+ devagent doctor
125
+
126
+ # 4. Pull the model
127
+ ollama pull qwen2.5-coder:3b
128
+
129
+ # 5. Run!
130
+ devagent run --task "Fix the divide-by-zero bug" --root ./demo_project
131
+ ```
132
+
133
+ ### CLI Subcommands
134
+
135
+ | Command | Description |
136
+ |---|---|
137
+ | `devagent run` | Execute a coding task on a project |
138
+ | `devagent benchmark` | Run the automated benchmark suite |
139
+ | `devagent doctor` | Check system health and dependencies |
140
+ | `devagent models` | List available Ollama models |
141
+ | `devagent version` | Show current version |
142
+
143
+ ### ✨ New: Trust & Safety
144
+
145
+ #### 🛡️ Reliability Hardening (v3.2.1+)
146
+ DevAgent is now built for **Enterprise-grade reliability** in complex projects:
147
+ - **Path Anchoring**: Automatically corrects "root hallucinations." If the agent targets a file in a subdirectory but assumes it's at the root, the system auto-anchors it to the correct project location.
148
+ - **Forensic Test Detection**: Built-in intelligence to "see through" environment noise. It detects successful test runs even if unrelated parts of the repository have collection errors.
149
+ - **Confidence Scoring**: Every fix is graded (0-100%) based on test results, surgical precision, and self-review quality.
150
+
151
+ #### 🕹️ Interactive Mode
152
+ Run with `--interactive` (or `-i`) to review diffs before they are applied to your project.
153
+ ```bash
154
+ devagent run --task "Fix bug" --interactive
155
+ ```
156
+
157
+ ---
158
+
159
+ ## 🏗️ Architecture
160
+
161
+ ```mermaid
162
+ graph TD
163
+ CLI[DevAgent CLI] --> Orchestrator[ReAct Orchestrator]
164
+ Orchestrator --> Memory[Working Memory]
165
+ Orchestrator --> Retrieval[Semantic Retrieval FAISS]
166
+ Orchestrator --> Tools[Tool Suite: pytest, ripgrep, git]
167
+ Orchestrator --> Reviewer[Self-Review Loop]
168
+ Reviewer --> Patch[Surgical Patch Engine]
169
+ Patch --> Sandbox[Sandbox Environment]
170
+ ```
171
+
172
+ No API keys. No sign-ups. No cloud.
173
+
174
+ ### Optional: Enable Semantic Search
175
+
176
+ ```bash
177
+ pip install faiss-cpu sentence-transformers
178
+ ```
179
+
180
+ Without these, DevAgent falls back to keyword search — still fully functional.
181
+
182
+ ---
183
+
184
+ ## 🎬 Demo
185
+
186
+ ```
187
+ ____ _ _
188
+ | _ \ _____ __/ \ __ _ ___ _ __ | |_
189
+ | | | |/ _ \ \ / / _ \ / _` |/ _ \ '_ \| __|
190
+ | |_| | __/\ V / ___ \ (_| | __/ | | | |_
191
+ |____/ \___| \_/_/ \_\__, |\___|_| |_|\__|
192
+ |___/
193
+
194
+ +==========================================================+
195
+ | DEVELOPER CODE INTELLIGENCE AGENT |
196
+ | Model: qwen2.5-coder:3b |
197
+ | Sandbox: OFF |
198
+ +==========================================================+
199
+
200
+ [PLAN] LIKELY_FILES: calculator.py
201
+ 1. search_code: divide
202
+ 2. read_file: calculator.py
203
+ 3. run_tests
204
+
205
+ ----------------------------------------
206
+ ITERATION 1/5
207
+ ----------------------------------------
208
+ [TOOL] Executing: search_code(divide)
209
+ >> Found: calculator.py:10:def divide(a, b)
210
+ [REVIEW] #1: APPROVED
211
+ >> Tests: 5 passed ✓
212
+
213
+ [OK] AGENT COMPLETED SUCCESSFULLY
214
+
215
+ Status: success
216
+ Steps used: 1/5
217
+ Patches: 1
218
+ Time: 8.2s
219
+ ```
220
+
221
+ ---
222
+
223
+ ## 🏗️ Architecture
224
+
225
+ ```
226
+ ┌─────────────────────────────┐
227
+ │ CLI (main.py) │
228
+ │ --task --root --model │
229
+ │ --sandbox --benchmark │
230
+ │ --auto-commit --auto-push │
231
+ └──────────┬──────────────────┘
232
+
233
+ ┌──────────▼──────────────────┐
234
+ │ Planner Layer │
235
+ │ Identifies files + strategy │
236
+ └──────────┬──────────────────┘
237
+
238
+ ┌──────────▼──────────────────┐
239
+ │ Retrieval Layer (Memory) │
240
+ │ FAISS + Sentence-Transformers│
241
+ │ Chunk → Embed → Top-K │
242
+ └──────────┬──────────────────┘
243
+
244
+ ┌──────────▼──────────────────┐
245
+ │ ReAct Agent Loop │
246
+ │ │
247
+ │ 1. THOUGHT (LLM) │
248
+ │ 2. ACTION (Tool) │
249
+ │ 3. OBSERVATION │
250
+ │ 4. FIX (LLM) │
251
+ │ 5. REVIEW (LLM) │
252
+ │ 6. PATCH (Diff Engine) │
253
+ │ 7. TEST (pytest) │
254
+ │ │
255
+ │ if FAIL → retry │
256
+ │ if PASS → done ✓ │
257
+ └──┬──────────────┬───────────┘
258
+ │ │
259
+ ┌────────▼──┐ ┌──────▼──────┐
260
+ │ Tools │ │ Ollama │
261
+ │ │ │ (Local) │
262
+ │ • search │ │ │
263
+ │ • semantic│ │ qwen2.5- │
264
+ │ • read │ │ coder:3b │
265
+ │ • patch │ │ phi3:mini │
266
+ │ • pytest │ │ mistral:7b │
267
+ │ • flake8 │ │ │
268
+ │ • git_diff│ └─────────────┘
269
+ │ • sandbox │
270
+ └───────────┘
271
+ ```
272
+
273
+ ### 9-Layer Architecture
274
+
275
+ | Layer | Module | Purpose |
276
+ |---|---|---|
277
+ | 1. CLI | `main.py` | Argument parsing, mode selection, banner |
278
+ | 2. Planner | `app/planner.py` | Task interpretation, file identification |
279
+ | 3. Retrieval | `app/memory.py` | FAISS index, semantic chunking, Top-K search |
280
+ | 4. Tools | `tools/*` | 8 real tools: search, semantic_search, read, write, test, lint, git, sandbox |
281
+ | 5. Agent | `app/agent.py` | ReAct orchestration loop |
282
+ | 6. Review | `app/reviewer.py` | Self-critique with APPROVED/REVISE |
283
+ | 7. Validation | `tools/test_runner.py` | pytest + flake8 execution feedback |
284
+ | 8. Logging | `utils/logger.py` | Structured JSON audit trail |
285
+ | 9. Safety | `app/sandbox.py` | Isolated workspace, path validation |
286
+
287
+ ---
288
+
289
+ ## 📁 Project Structure
290
+
291
+ ```
292
+ Developer-Code-Intelligence-Agent/
293
+ ├── app/
294
+ │ ├── agent.py # Core ReAct agent engine
295
+ │ ├── planner.py # Task planning layer
296
+ │ ├── reviewer.py # Self-review module
297
+ │ ├── llm.py # Ollama integration
298
+ │ ├── memory.py # FAISS retrieval + working memory
299
+ │ ├── patcher.py # Unified diff patch engine
300
+ │ ├── sandbox.py # Sandbox workspace manager
301
+ │ └── state.py # Shared state dataclass
302
+ ├── tools/
303
+ │ ├── search.py # Code search (ripgrep + fallbacks)
304
+ │ ├── semantic_search.py # FAISS semantic search
305
+ │ ├── file_ops.py # Safe file read/write
306
+ │ ├── test_runner.py # pytest runner
307
+ │ ├── linter.py # flake8 linter
308
+ │ ├── git_tools.py # Git diff/commit/push
309
+ │ └── benchmark_runner.py # Benchmark evaluation
310
+ ├── utils/
311
+ │ ├── logger.py # Structured JSON logger
312
+ │ ├── config.py # Centralized configuration
313
+ │ └── metrics.py # Performance metrics
314
+ ├── benchmarks/
315
+ │ ├── divide_by_zero/ # Benchmark: zero division guard
316
+ │ ├── missing_validation/ # Benchmark: input validation
317
+ │ ├── syntax_error/ # Benchmark: syntax fix
318
+ │ ├── import_bug/ # Benchmark: wrong import
319
+ │ └── edge_case/ # Benchmark: empty list handling
320
+ ├── demo_project/ # Sample buggy project
321
+ ├── docs/
322
+ │ └── USER_GUIDE.md # Full usage guide
323
+ ├── main.py # CLI entry point
324
+ ├── devagent.py # Global CLI wrapper
325
+ ├── devagent.bat # Windows global shortcut
326
+ ├── requirements.txt
327
+ ├── CONTRIBUTING.md
328
+ ├── CHANGELOG.md
329
+ ├── CODE_OF_CONDUCT.md
330
+ ├── SECURITY.md
331
+ ├── LICENSE
332
+ └── README.md
333
+ ```
334
+
335
+ ---
336
+
337
+ ## 💻 CLI Reference
338
+
339
+ ```bash
340
+ python main.py --task "TASK" --root ./project [OPTIONS]
341
+ ```
342
+
343
+ | Flag | Default | Description |
344
+ |---|---|---|
345
+ | `--task`, `-t` | *(required)* | The coding task for the agent |
346
+ | `--root`, `-r` | `.` | Project root directory |
347
+ | `--model` | `qwen2.5-coder:3b` | Any Ollama model |
348
+ | `--max-steps`, `-m` | `5` | Max ReAct iterations |
349
+ | `--benchmark` | off | Run benchmark suite |
350
+ | `--sandbox` | off | Run in isolated sandbox |
351
+ | `--auto-commit` | off | Git commit on success |
352
+ | `--auto-push` | off | Git push after commit |
353
+ | `--verbose`, `-v` | off | Verbose output |
354
+
355
+ ### Examples
356
+
357
+ ```bash
358
+ # Fix a specific bug
359
+ python main.py -t "Fix the TypeError in user_service.py" -r ./backend
360
+
361
+ # Run in sandbox mode (safe — doesn't touch real files until success)
362
+ python main.py -t "Fix divide-by-zero bug" -r ./project --sandbox
363
+
364
+ # Auto-commit changes on success
365
+ python main.py -t "Add input validation" -r ./api --auto-commit
366
+
367
+ # Use a stronger model
368
+ python main.py -t "Refactor auth middleware" -r ./server --model mistral:7b
369
+
370
+ # Run benchmarks
371
+ python main.py --benchmark
372
+
373
+ # More retries for complex tasks
374
+ python main.py -t "Make all tests pass" -r ./project --max-steps 10
375
+ ```
376
+
377
+ > 📖 **[Full User Guide →](docs/USER_GUIDE.md)**
378
+
379
+ ---
380
+
381
+ ## 📊 Benchmarks
382
+
383
+ DevAgent includes 5 built-in benchmarks to evaluate agent performance:
384
+
385
+ | Benchmark | Bug Type | Difficulty |
386
+ |---|---|---|
387
+ | `divide_by_zero` | Missing guard clause | Easy |
388
+ | `missing_validation` | No input validation | Medium |
389
+ | `syntax_error` | Broken syntax | Medium |
390
+ | `import_bug` | Wrong module name | Easy |
391
+ | `edge_case` | Empty list crash | Medium |
392
+
393
+ Run benchmarks:
394
+
395
+ ```bash
396
+ python main.py --benchmark
397
+ python main.py --benchmark --model phi3:mini
398
+ ```
399
+
400
+ ---
401
+
402
+ ## 🔧 Supported Models
403
+
404
+ | Model | Size | Speed | Quality | Best For |
405
+ |---|---|---|---|---|
406
+ | `qwen2.5-coder:3b` | 1.9 GB | ⚡ Fast | ★★★★ | **Default — best for code** |
407
+ | `qwen2.5:3b` | 1.9 GB | ⚡ Fast | ★★★☆ | General fallback |
408
+ | `phi3:mini` | 2.2 GB | ⚡ Fast | ★★★☆ | Good reasoning |
409
+ | `qwen3:4b` | 2.5 GB | ⚡ Fast | ★★★★ | Better understanding |
410
+ | `gemma2:2b` | 1.6 GB | ⚡⚡ | ★★☆☆ | Ultra-low resource |
411
+ | `mistral:7b` | 4.4 GB | 🐢 | ★★★★★ | Best quality (8GB+ RAM) |
412
+
413
+ ---
414
+
415
+ ## 🗺️ Roadmap
416
+
417
+ ### ✅ Completed (v2.0)
418
+
419
+ - [x] Core ReAct agent loop
420
+ - [x] Self-review module
421
+ - [x] Tool system (9 tools)
422
+ - [x] Planner layer
423
+ - [x] Semantic retrieval (FAISS)
424
+ - [x] Patch engine (unified diffs)
425
+ - [x] Sandbox mode
426
+ - [x] Benchmark system (5 suites)
427
+ - [x] Metrics + structured logging
428
+ - [x] Git integration
429
+ - [x] CLI with all flags
430
+
431
+ ### 🔜 Coming Next
432
+
433
+ - [ ] **Multi-file support** — Agent works across multiple files simultaneously
434
+ - [ ] **Language support** — JavaScript, TypeScript, Go, Rust
435
+ - [ ] **Plugin system** — Custom tools via YAML/Python
436
+ - [ ] **Watch mode** — Auto-fix on test failure (`--watch`)
437
+ - [ ] **VS Code extension** — Run agent from your editor
438
+ - [ ] **Conversation memory** — Learn from past runs
439
+ - [ ] **Multi-agent mode** — Planner + Coder + Reviewer + Evaluator agents
440
+
441
+ ---
442
+
443
+ ## 🤝 Contributing
444
+
445
+ We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for details.
446
+
447
+ ```bash
448
+ git checkout -b feature/your-feature
449
+ # ... make changes ...
450
+ python -m pytest demo_project/ -v
451
+ git commit -m "feat: your feature"
452
+ git push origin feature/your-feature
453
+ ```
454
+
455
+ **Good first issues** are tagged and waiting:
456
+ [Browse good first issues →](https://github.com/VedantJadhav701/Developer-Code-Intelligence-Agent/labels/good%20first%20issue)
457
+
458
+ ---
459
+
460
+ ## 📜 License
461
+
462
+ MIT — use it however you want. See [LICENSE](LICENSE).
463
+
464
+ ---
465
+
466
+ ## ⭐ Star History
467
+
468
+ If DevAgent helps you, give it a star! It helps others discover the project.
469
+
470
+ [![Star History Chart](https://api.star-history.com/svg?repos=VedantJadhav701/Developer-Code-Intelligence-Agent&type=Date)](https://star-history.com/#VedantJadhav701/Developer-Code-Intelligence-Agent&Date)
471
+
472
+ ---
473
+
474
+ <div align="center">
475
+
476
+ **Built with 🧠 by [Vedant Jadhav](https://github.com/VedantJadhav701)**
477
+
478
+ *A lightweight local open-source miniature of Claude Code CLI.*
479
+
480
+ </div>