devagent-cli 3.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- devagent/__init__.py +1 -0
- devagent/app/__init__.py +1 -0
- devagent/app/agent.py +717 -0
- devagent/app/llm.py +83 -0
- devagent/app/memory.py +309 -0
- devagent/app/patcher.py +83 -0
- devagent/app/planner.py +76 -0
- devagent/app/reviewer.py +65 -0
- devagent/app/sandbox.py +105 -0
- devagent/app/state.py +113 -0
- devagent/cli.py +282 -0
- devagent/tools/__init__.py +1 -0
- devagent/tools/benchmark_runner.py +184 -0
- devagent/tools/file_ops.py +52 -0
- devagent/tools/git_tools.py +91 -0
- devagent/tools/linter.py +55 -0
- devagent/tools/search.py +65 -0
- devagent/tools/semantic_search.py +60 -0
- devagent/tools/surgical_patcher.py +39 -0
- devagent/tools/test_runner.py +143 -0
- devagent/utils/__init__.py +1 -0
- devagent/utils/config.py +116 -0
- devagent/utils/logger.py +94 -0
- devagent/utils/metrics.py +130 -0
- devagent_cli-3.2.1.dist-info/METADATA +480 -0
- devagent_cli-3.2.1.dist-info/RECORD +30 -0
- devagent_cli-3.2.1.dist-info/WHEEL +5 -0
- devagent_cli-3.2.1.dist-info/entry_points.txt +2 -0
- devagent_cli-3.2.1.dist-info/licenses/LICENSE +21 -0
- devagent_cli-3.2.1.dist-info/top_level.txt +1 -0
devagent/utils/logger.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Structured JSON logger — writes every agent step to logs/run.json.
|
|
3
|
+
|
|
4
|
+
Tracks: thoughts, actions, observations, reviews, test results,
|
|
5
|
+
latency, model info, and benchmark performance.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AgentLogger:
|
|
18
|
+
"""Append-only JSON logger that writes each step as an object in an array."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, log_dir: str = "logs"):
|
|
21
|
+
self._log_dir = Path(log_dir)
|
|
22
|
+
self._log_dir.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
self._log_file = self._log_dir / "run.json"
|
|
24
|
+
self._entries: list[dict[str, Any]] = []
|
|
25
|
+
|
|
26
|
+
def log_step(
|
|
27
|
+
self,
|
|
28
|
+
step: int,
|
|
29
|
+
thought: str,
|
|
30
|
+
action: str,
|
|
31
|
+
observation: str,
|
|
32
|
+
review: str,
|
|
33
|
+
test_result: str,
|
|
34
|
+
status: str,
|
|
35
|
+
*,
|
|
36
|
+
latency: float = 0.0,
|
|
37
|
+
model: str = "",
|
|
38
|
+
patch_summary: str = "",
|
|
39
|
+
) -> None:
|
|
40
|
+
"""Log a complete agent iteration step."""
|
|
41
|
+
entry = {
|
|
42
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
43
|
+
"step": step,
|
|
44
|
+
"thought": thought,
|
|
45
|
+
"action": action,
|
|
46
|
+
"observation": observation[:2000],
|
|
47
|
+
"review": review,
|
|
48
|
+
"test_result": test_result[:2000],
|
|
49
|
+
"latency": f"{latency:.2f}s" if latency else "",
|
|
50
|
+
"model": model,
|
|
51
|
+
"patch": patch_summary,
|
|
52
|
+
"status": status,
|
|
53
|
+
}
|
|
54
|
+
self._entries.append(entry)
|
|
55
|
+
self._flush()
|
|
56
|
+
self._print_step(entry)
|
|
57
|
+
|
|
58
|
+
def log_event(self, event: str, data: dict[str, Any] | None = None) -> None:
|
|
59
|
+
"""Log a freeform event (startup, shutdown, error, etc.)."""
|
|
60
|
+
entry = {
|
|
61
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
62
|
+
"event": event,
|
|
63
|
+
**(data or {}),
|
|
64
|
+
}
|
|
65
|
+
self._entries.append(entry)
|
|
66
|
+
self._flush()
|
|
67
|
+
|
|
68
|
+
def _flush(self) -> None:
|
|
69
|
+
"""Write the full log array to disk."""
|
|
70
|
+
try:
|
|
71
|
+
self._log_file.write_text(
|
|
72
|
+
json.dumps(self._entries, indent=2, ensure_ascii=False),
|
|
73
|
+
encoding="utf-8",
|
|
74
|
+
)
|
|
75
|
+
except Exception as exc: # noqa: BLE001
|
|
76
|
+
print(f"[LOGGER ERROR] {exc}")
|
|
77
|
+
|
|
78
|
+
@staticmethod
|
|
79
|
+
def _print_step(entry: dict[str, Any]) -> None:
|
|
80
|
+
"""Pretty-print a step to the console."""
|
|
81
|
+
print("\n" + "=" * 60)
|
|
82
|
+
print(f" STEP {entry['step']} | STATUS: {entry['status']}")
|
|
83
|
+
print("=" * 60)
|
|
84
|
+
print(f" THOUGHT: {entry['thought']}")
|
|
85
|
+
print(f" ACTION: {entry['action']}")
|
|
86
|
+
print(f" OBSERVATION: {entry['observation'][:1500]}")
|
|
87
|
+
if entry.get("review"):
|
|
88
|
+
print(f" REVIEW: {entry['review']}")
|
|
89
|
+
if entry.get("patch"):
|
|
90
|
+
print(f" PATCH: {entry['patch'][:200]}")
|
|
91
|
+
if entry.get("latency"):
|
|
92
|
+
print(f" LATENCY: {entry['latency']}")
|
|
93
|
+
print(f" TEST RESULT: {entry['test_result'][:1500]}")
|
|
94
|
+
print("=" * 60)
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Metrics collector — tracks latency, retries, token estimates, and benchmark results.
|
|
3
|
+
|
|
4
|
+
All metrics are stored in-memory and flushed to disk on demand.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import time
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class StepMetrics:
|
|
19
|
+
"""Metrics for a single agent step."""
|
|
20
|
+
step: int = 0
|
|
21
|
+
action: str = ""
|
|
22
|
+
latency_s: float = 0.0
|
|
23
|
+
prompt_chars: int = 0
|
|
24
|
+
response_chars: int = 0
|
|
25
|
+
estimated_tokens: int = 0 # rough: chars / 4
|
|
26
|
+
status: str = ""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class RunMetrics:
|
|
31
|
+
"""Aggregated metrics for an entire agent run."""
|
|
32
|
+
|
|
33
|
+
model: str = ""
|
|
34
|
+
task: str = ""
|
|
35
|
+
start_time: float = field(default_factory=time.time)
|
|
36
|
+
end_time: float = 0.0
|
|
37
|
+
total_steps: int = 0
|
|
38
|
+
retries: int = 0
|
|
39
|
+
successes: int = 0
|
|
40
|
+
failures: int = 0
|
|
41
|
+
total_latency_s: float = 0.0
|
|
42
|
+
total_estimated_tokens: int = 0
|
|
43
|
+
steps: list[StepMetrics] = field(default_factory=list)
|
|
44
|
+
|
|
45
|
+
# ── Recording ─────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
def record_step(
|
|
48
|
+
self,
|
|
49
|
+
step: int,
|
|
50
|
+
action: str,
|
|
51
|
+
latency_s: float,
|
|
52
|
+
prompt_chars: int,
|
|
53
|
+
response_chars: int,
|
|
54
|
+
status: str,
|
|
55
|
+
) -> StepMetrics:
|
|
56
|
+
"""Record metrics for a single step."""
|
|
57
|
+
estimated_tokens = (prompt_chars + response_chars) // 4
|
|
58
|
+
sm = StepMetrics(
|
|
59
|
+
step=step,
|
|
60
|
+
action=action,
|
|
61
|
+
latency_s=round(latency_s, 3),
|
|
62
|
+
prompt_chars=prompt_chars,
|
|
63
|
+
response_chars=response_chars,
|
|
64
|
+
estimated_tokens=estimated_tokens,
|
|
65
|
+
status=status,
|
|
66
|
+
)
|
|
67
|
+
self.steps.append(sm)
|
|
68
|
+
self.total_steps += 1
|
|
69
|
+
self.total_latency_s += latency_s
|
|
70
|
+
self.total_estimated_tokens += estimated_tokens
|
|
71
|
+
if status == "success":
|
|
72
|
+
self.successes += 1
|
|
73
|
+
elif status == "fail":
|
|
74
|
+
self.failures += 1
|
|
75
|
+
return sm
|
|
76
|
+
|
|
77
|
+
def finalize(self) -> None:
|
|
78
|
+
"""Mark the run as complete."""
|
|
79
|
+
self.end_time = time.time()
|
|
80
|
+
|
|
81
|
+
# ── Reporting ─────────────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
def summary(self) -> dict[str, Any]:
|
|
84
|
+
"""Return a JSON-serialisable summary."""
|
|
85
|
+
elapsed = (self.end_time or time.time()) - self.start_time
|
|
86
|
+
return {
|
|
87
|
+
"model": self.model,
|
|
88
|
+
"task": self.task[:100],
|
|
89
|
+
"total_steps": self.total_steps,
|
|
90
|
+
"retries": self.retries,
|
|
91
|
+
"successes": self.successes,
|
|
92
|
+
"failures": self.failures,
|
|
93
|
+
"total_latency_s": round(self.total_latency_s, 2),
|
|
94
|
+
"wall_time_s": round(elapsed, 2),
|
|
95
|
+
"total_estimated_tokens": self.total_estimated_tokens,
|
|
96
|
+
"avg_step_latency_s": round(
|
|
97
|
+
self.total_latency_s / max(self.total_steps, 1), 2
|
|
98
|
+
),
|
|
99
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
def save(self, log_dir: str) -> str:
|
|
103
|
+
"""Save metrics to disk."""
|
|
104
|
+
path = Path(log_dir)
|
|
105
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
106
|
+
out_file = path / "metrics.json"
|
|
107
|
+
try:
|
|
108
|
+
out_file.write_text(
|
|
109
|
+
json.dumps(self.summary(), indent=2, ensure_ascii=False),
|
|
110
|
+
encoding="utf-8",
|
|
111
|
+
)
|
|
112
|
+
return str(out_file)
|
|
113
|
+
except Exception as exc:
|
|
114
|
+
print(f"[METRICS ERROR] {exc}")
|
|
115
|
+
return ""
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class Timer:
|
|
119
|
+
"""Simple context-manager timer."""
|
|
120
|
+
|
|
121
|
+
def __init__(self) -> None:
|
|
122
|
+
self.start = 0.0
|
|
123
|
+
self.elapsed = 0.0
|
|
124
|
+
|
|
125
|
+
def __enter__(self) -> "Timer":
|
|
126
|
+
self.start = time.time()
|
|
127
|
+
return self
|
|
128
|
+
|
|
129
|
+
def __exit__(self, *_: Any) -> None:
|
|
130
|
+
self.elapsed = time.time() - self.start
|
|
@@ -0,0 +1,480 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: devagent-cli
|
|
3
|
+
Version: 3.2.1
|
|
4
|
+
Summary: Professional Local autonomous coding agent powered by Ollama
|
|
5
|
+
Author: Vedant Jadhav
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: ai,agent,coding,ollama,local,devagent,devagent-cli
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Topic :: Software Development :: Interpreters
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: rich
|
|
17
|
+
Requires-Dist: pytest
|
|
18
|
+
Requires-Dist: requests
|
|
19
|
+
Requires-Dist: ollama
|
|
20
|
+
Requires-Dist: faiss-cpu
|
|
21
|
+
Provides-Extra: semantic
|
|
22
|
+
Requires-Dist: sentence-transformers; extra == "semantic"
|
|
23
|
+
Provides-Extra: lint
|
|
24
|
+
Requires-Dist: flake8; extra == "lint"
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
<div align="center">
|
|
28
|
+
|
|
29
|
+
# 🧠 DevAgent
|
|
30
|
+
|
|
31
|
+
### A Lightweight Local Open-Source Miniature of Claude Code CLI
|
|
32
|
+
|
|
33
|
+
[](https://opensource.org/licenses/MIT)
|
|
34
|
+
[](https://www.python.org/)
|
|
35
|
+
[](https://ollama.ai)
|
|
36
|
+
[](CONTRIBUTING.md)
|
|
37
|
+
[](https://github.com/VedantJadhav701/Developer-Code-Intelligence-Agent)
|
|
38
|
+
|
|
39
|
+
**A production-grade local coding agent that finds bugs, writes patches, reviews its own code, and validates with tests — all offline, all local, zero API costs.**
|
|
40
|
+
|
|
41
|
+
[Quick Start](#-quick-start) •
|
|
42
|
+
[Architecture](#-architecture) •
|
|
43
|
+
[Benchmarks](#-benchmarks) •
|
|
44
|
+
[Roadmap](#-roadmap) •
|
|
45
|
+
[Contributing](#-contributing)
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
</div>
|
|
50
|
+
|
|
51
|
+
## 🤔 Why DevAgent?
|
|
52
|
+
|
|
53
|
+
Most AI coding tools are **chatbots** — they suggest code, you copy-paste, you pray.
|
|
54
|
+
|
|
55
|
+
DevAgent is a **real agent** with a retrieval-first, tool-grounded architecture:
|
|
56
|
+
|
|
57
|
+
| | Chatbot | DevAgent |
|
|
58
|
+
|---|---|---|
|
|
59
|
+
| Searches your codebase | ❌ | ✅ ripgrep + semantic search |
|
|
60
|
+
| Retrieves relevant code | ❌ | ✅ FAISS embeddings |
|
|
61
|
+
| Plans before coding | ❌ | ✅ Planner layer |
|
|
62
|
+
| Generates patches | ❌ | ✅ Unified diffs |
|
|
63
|
+
| Reviews its own output | ❌ | ✅ Self-critique loop |
|
|
64
|
+
| Runs your tests | ❌ | ✅ pytest integration |
|
|
65
|
+
| Retries on failure | ❌ | ✅ Up to N iterations |
|
|
66
|
+
| Works in sandbox | ❌ | ✅ Isolated workspace |
|
|
67
|
+
| Works offline | ❌ | ✅ 100% local via Ollama |
|
|
68
|
+
| Costs money | 💸 | ✅ Free forever |
|
|
69
|
+
|
|
70
|
+
> **Philosophy:** Execution > Reasoning. Tools > Hallucination. Retrieval > Huge Context. Reliability > Intelligence.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## ✨ Features
|
|
75
|
+
|
|
76
|
+
🔁 **ReAct Loop** — Thought → Action → Observation → Fix → Review → Test cycle
|
|
77
|
+
|
|
78
|
+
🧠 **Planner** — LLM generates an action plan before coding
|
|
79
|
+
|
|
80
|
+
🔍 **Semantic Search** — FAISS + sentence-transformers code retrieval
|
|
81
|
+
|
|
82
|
+
🔎 **Code Search** — ripgrep-powered with cross-platform fallback
|
|
83
|
+
|
|
84
|
+
📝 **Self-Review** — LLM critiques its own fixes, revises until approved
|
|
85
|
+
|
|
86
|
+
🩹 **Patch Engine** — Line-level unified diffs instead of full file rewrites
|
|
87
|
+
|
|
88
|
+
🧪 **Test-Driven** — Runs pytest after every fix, retries on failure
|
|
89
|
+
|
|
90
|
+
🏖️ **Sandbox Mode** — Agent works in an isolated copy, applies changes only on success
|
|
91
|
+
|
|
92
|
+
📊 **Benchmarks** — 5 built-in benchmark suites with automated evaluation
|
|
93
|
+
|
|
94
|
+
📈 **Metrics** — Latency, token estimates, retries, and performance tracking
|
|
95
|
+
|
|
96
|
+
📋 **Full Audit Trail** — Every step logged to `logs/run.json`
|
|
97
|
+
|
|
98
|
+
🔒 **100% Offline** — Runs on Ollama with small models (2-4 GB)
|
|
99
|
+
|
|
100
|
+
⚡ **Low Resource** — Works on RTX 3050 (4 GB VRAM) / 16 GB RAM
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## 🚀 Quick Start
|
|
105
|
+
|
|
106
|
+
### Prerequisites
|
|
107
|
+
|
|
108
|
+
- [Python 3.11+](https://www.python.org/downloads/)
|
|
109
|
+
- [Ollama](https://ollama.ai) installed and running
|
|
110
|
+
|
|
111
|
+
### Install & Setup
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
# 1. Clone
|
|
115
|
+
git clone https://github.com/VedantJadhav701/Developer-Code-Intelligence-Agent.git
|
|
116
|
+
cd Developer-Code-Intelligence-Agent
|
|
117
|
+
|
|
118
|
+
# 2. Install
|
|
119
|
+
pip install devagent-cli # (Coming soon to PyPI)
|
|
120
|
+
# Or locally: pip install -e .
|
|
121
|
+
|
|
122
|
+
# 3. Verify System (CRITICAL)
|
|
123
|
+
# This checks your Python environment, Ollama connection, and dependencies
|
|
124
|
+
devagent doctor
|
|
125
|
+
|
|
126
|
+
# 4. Pull the model
|
|
127
|
+
ollama pull qwen2.5-coder:3b
|
|
128
|
+
|
|
129
|
+
# 5. Run!
|
|
130
|
+
devagent run --task "Fix the divide-by-zero bug" --root ./demo_project
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### CLI Subcommands
|
|
134
|
+
|
|
135
|
+
| Command | Description |
|
|
136
|
+
|---|---|
|
|
137
|
+
| `devagent run` | Execute a coding task on a project |
|
|
138
|
+
| `devagent benchmark` | Run the automated benchmark suite |
|
|
139
|
+
| `devagent doctor` | Check system health and dependencies |
|
|
140
|
+
| `devagent models` | List available Ollama models |
|
|
141
|
+
| `devagent version` | Show current version |
|
|
142
|
+
|
|
143
|
+
### ✨ New: Trust & Safety
|
|
144
|
+
|
|
145
|
+
#### 🛡️ Reliability Hardening (v3.2.1+)
|
|
146
|
+
DevAgent is now built for **Enterprise-grade reliability** in complex projects:
|
|
147
|
+
- **Path Anchoring**: Automatically corrects "root hallucinations." If the agent targets a file in a subdirectory but assumes it's at the root, the system auto-anchors it to the correct project location.
|
|
148
|
+
- **Forensic Test Detection**: Built-in intelligence to "see through" environment noise. It detects successful test runs even if unrelated parts of the repository have collection errors.
|
|
149
|
+
- **Confidence Scoring**: Every fix is graded (0-100%) based on test results, surgical precision, and self-review quality.
|
|
150
|
+
|
|
151
|
+
#### 🕹️ Interactive Mode
|
|
152
|
+
Run with `--interactive` (or `-i`) to review diffs before they are applied to your project.
|
|
153
|
+
```bash
|
|
154
|
+
devagent run --task "Fix bug" --interactive
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## 🏗️ Architecture
|
|
160
|
+
|
|
161
|
+
```mermaid
|
|
162
|
+
graph TD
|
|
163
|
+
CLI[DevAgent CLI] --> Orchestrator[ReAct Orchestrator]
|
|
164
|
+
Orchestrator --> Memory[Working Memory]
|
|
165
|
+
Orchestrator --> Retrieval[Semantic Retrieval FAISS]
|
|
166
|
+
Orchestrator --> Tools[Tool Suite: pytest, ripgrep, git]
|
|
167
|
+
Orchestrator --> Reviewer[Self-Review Loop]
|
|
168
|
+
Reviewer --> Patch[Surgical Patch Engine]
|
|
169
|
+
Patch --> Sandbox[Sandbox Environment]
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
No API keys. No sign-ups. No cloud.
|
|
173
|
+
|
|
174
|
+
### Optional: Enable Semantic Search
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
pip install faiss-cpu sentence-transformers
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
Without these, DevAgent falls back to keyword search — still fully functional.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## 🎬 Demo
|
|
185
|
+
|
|
186
|
+
```
|
|
187
|
+
____ _ _
|
|
188
|
+
| _ \ _____ __/ \ __ _ ___ _ __ | |_
|
|
189
|
+
| | | |/ _ \ \ / / _ \ / _` |/ _ \ '_ \| __|
|
|
190
|
+
| |_| | __/\ V / ___ \ (_| | __/ | | | |_
|
|
191
|
+
|____/ \___| \_/_/ \_\__, |\___|_| |_|\__|
|
|
192
|
+
|___/
|
|
193
|
+
|
|
194
|
+
+==========================================================+
|
|
195
|
+
| DEVELOPER CODE INTELLIGENCE AGENT |
|
|
196
|
+
| Model: qwen2.5-coder:3b |
|
|
197
|
+
| Sandbox: OFF |
|
|
198
|
+
+==========================================================+
|
|
199
|
+
|
|
200
|
+
[PLAN] LIKELY_FILES: calculator.py
|
|
201
|
+
1. search_code: divide
|
|
202
|
+
2. read_file: calculator.py
|
|
203
|
+
3. run_tests
|
|
204
|
+
|
|
205
|
+
----------------------------------------
|
|
206
|
+
ITERATION 1/5
|
|
207
|
+
----------------------------------------
|
|
208
|
+
[TOOL] Executing: search_code(divide)
|
|
209
|
+
>> Found: calculator.py:10:def divide(a, b)
|
|
210
|
+
[REVIEW] #1: APPROVED
|
|
211
|
+
>> Tests: 5 passed ✓
|
|
212
|
+
|
|
213
|
+
[OK] AGENT COMPLETED SUCCESSFULLY
|
|
214
|
+
|
|
215
|
+
Status: success
|
|
216
|
+
Steps used: 1/5
|
|
217
|
+
Patches: 1
|
|
218
|
+
Time: 8.2s
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## 🏗️ Architecture
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
┌─────────────────────────────┐
|
|
227
|
+
│ CLI (main.py) │
|
|
228
|
+
│ --task --root --model │
|
|
229
|
+
│ --sandbox --benchmark │
|
|
230
|
+
│ --auto-commit --auto-push │
|
|
231
|
+
└──────────┬──────────────────┘
|
|
232
|
+
│
|
|
233
|
+
┌──────────▼──────────────────┐
|
|
234
|
+
│ Planner Layer │
|
|
235
|
+
│ Identifies files + strategy │
|
|
236
|
+
└──────────┬──────────────────┘
|
|
237
|
+
│
|
|
238
|
+
┌──────────▼──────────────────┐
|
|
239
|
+
│ Retrieval Layer (Memory) │
|
|
240
|
+
│ FAISS + Sentence-Transformers│
|
|
241
|
+
│ Chunk → Embed → Top-K │
|
|
242
|
+
└──────────┬──────────────────┘
|
|
243
|
+
│
|
|
244
|
+
┌──────────▼──────────────────┐
|
|
245
|
+
│ ReAct Agent Loop │
|
|
246
|
+
│ │
|
|
247
|
+
│ 1. THOUGHT (LLM) │
|
|
248
|
+
│ 2. ACTION (Tool) │
|
|
249
|
+
│ 3. OBSERVATION │
|
|
250
|
+
│ 4. FIX (LLM) │
|
|
251
|
+
│ 5. REVIEW (LLM) │
|
|
252
|
+
│ 6. PATCH (Diff Engine) │
|
|
253
|
+
│ 7. TEST (pytest) │
|
|
254
|
+
│ │
|
|
255
|
+
│ if FAIL → retry │
|
|
256
|
+
│ if PASS → done ✓ │
|
|
257
|
+
└──┬──────────────┬───────────┘
|
|
258
|
+
│ │
|
|
259
|
+
┌────────▼──┐ ┌──────▼──────┐
|
|
260
|
+
│ Tools │ │ Ollama │
|
|
261
|
+
│ │ │ (Local) │
|
|
262
|
+
│ • search │ │ │
|
|
263
|
+
│ • semantic│ │ qwen2.5- │
|
|
264
|
+
│ • read │ │ coder:3b │
|
|
265
|
+
│ • patch │ │ phi3:mini │
|
|
266
|
+
│ • pytest │ │ mistral:7b │
|
|
267
|
+
│ • flake8 │ │ │
|
|
268
|
+
│ • git_diff│ └─────────────┘
|
|
269
|
+
│ • sandbox │
|
|
270
|
+
└───────────┘
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### 9-Layer Architecture
|
|
274
|
+
|
|
275
|
+
| Layer | Module | Purpose |
|
|
276
|
+
|---|---|---|
|
|
277
|
+
| 1. CLI | `main.py` | Argument parsing, mode selection, banner |
|
|
278
|
+
| 2. Planner | `app/planner.py` | Task interpretation, file identification |
|
|
279
|
+
| 3. Retrieval | `app/memory.py` | FAISS index, semantic chunking, Top-K search |
|
|
280
|
+
| 4. Tools | `tools/*` | 8 real tools: search, semantic_search, read, write, test, lint, git, sandbox |
|
|
281
|
+
| 5. Agent | `app/agent.py` | ReAct orchestration loop |
|
|
282
|
+
| 6. Review | `app/reviewer.py` | Self-critique with APPROVED/REVISE |
|
|
283
|
+
| 7. Validation | `tools/test_runner.py` | pytest + flake8 execution feedback |
|
|
284
|
+
| 8. Logging | `utils/logger.py` | Structured JSON audit trail |
|
|
285
|
+
| 9. Safety | `app/sandbox.py` | Isolated workspace, path validation |
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## 📁 Project Structure
|
|
290
|
+
|
|
291
|
+
```
|
|
292
|
+
Developer-Code-Intelligence-Agent/
|
|
293
|
+
├── app/
|
|
294
|
+
│ ├── agent.py # Core ReAct agent engine
|
|
295
|
+
│ ├── planner.py # Task planning layer
|
|
296
|
+
│ ├── reviewer.py # Self-review module
|
|
297
|
+
│ ├── llm.py # Ollama integration
|
|
298
|
+
│ ├── memory.py # FAISS retrieval + working memory
|
|
299
|
+
│ ├── patcher.py # Unified diff patch engine
|
|
300
|
+
│ ├── sandbox.py # Sandbox workspace manager
|
|
301
|
+
│ └── state.py # Shared state dataclass
|
|
302
|
+
├── tools/
|
|
303
|
+
│ ├── search.py # Code search (ripgrep + fallbacks)
|
|
304
|
+
│ ├── semantic_search.py # FAISS semantic search
|
|
305
|
+
│ ├── file_ops.py # Safe file read/write
|
|
306
|
+
│ ├── test_runner.py # pytest runner
|
|
307
|
+
│ ├── linter.py # flake8 linter
|
|
308
|
+
│ ├── git_tools.py # Git diff/commit/push
|
|
309
|
+
│ └── benchmark_runner.py # Benchmark evaluation
|
|
310
|
+
├── utils/
|
|
311
|
+
│ ├── logger.py # Structured JSON logger
|
|
312
|
+
│ ├── config.py # Centralized configuration
|
|
313
|
+
│ └── metrics.py # Performance metrics
|
|
314
|
+
├── benchmarks/
|
|
315
|
+
│ ├── divide_by_zero/ # Benchmark: zero division guard
|
|
316
|
+
│ ├── missing_validation/ # Benchmark: input validation
|
|
317
|
+
│ ├── syntax_error/ # Benchmark: syntax fix
|
|
318
|
+
│ ├── import_bug/ # Benchmark: wrong import
|
|
319
|
+
│ └── edge_case/ # Benchmark: empty list handling
|
|
320
|
+
├── demo_project/ # Sample buggy project
|
|
321
|
+
├── docs/
|
|
322
|
+
│ └── USER_GUIDE.md # Full usage guide
|
|
323
|
+
├── main.py # CLI entry point
|
|
324
|
+
├── devagent.py # Global CLI wrapper
|
|
325
|
+
├── devagent.bat # Windows global shortcut
|
|
326
|
+
├── requirements.txt
|
|
327
|
+
├── CONTRIBUTING.md
|
|
328
|
+
├── CHANGELOG.md
|
|
329
|
+
├── CODE_OF_CONDUCT.md
|
|
330
|
+
├── SECURITY.md
|
|
331
|
+
├── LICENSE
|
|
332
|
+
└── README.md
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
---
|
|
336
|
+
|
|
337
|
+
## 💻 CLI Reference
|
|
338
|
+
|
|
339
|
+
```bash
|
|
340
|
+
python main.py --task "TASK" --root ./project [OPTIONS]
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
| Flag | Default | Description |
|
|
344
|
+
|---|---|---|
|
|
345
|
+
| `--task`, `-t` | *(required)* | The coding task for the agent |
|
|
346
|
+
| `--root`, `-r` | `.` | Project root directory |
|
|
347
|
+
| `--model` | `qwen2.5-coder:3b` | Any Ollama model |
|
|
348
|
+
| `--max-steps`, `-m` | `5` | Max ReAct iterations |
|
|
349
|
+
| `--benchmark` | off | Run benchmark suite |
|
|
350
|
+
| `--sandbox` | off | Run in isolated sandbox |
|
|
351
|
+
| `--auto-commit` | off | Git commit on success |
|
|
352
|
+
| `--auto-push` | off | Git push after commit |
|
|
353
|
+
| `--verbose`, `-v` | off | Verbose output |
|
|
354
|
+
|
|
355
|
+
### Examples
|
|
356
|
+
|
|
357
|
+
```bash
|
|
358
|
+
# Fix a specific bug
|
|
359
|
+
python main.py -t "Fix the TypeError in user_service.py" -r ./backend
|
|
360
|
+
|
|
361
|
+
# Run in sandbox mode (safe — doesn't touch real files until success)
|
|
362
|
+
python main.py -t "Fix divide-by-zero bug" -r ./project --sandbox
|
|
363
|
+
|
|
364
|
+
# Auto-commit changes on success
|
|
365
|
+
python main.py -t "Add input validation" -r ./api --auto-commit
|
|
366
|
+
|
|
367
|
+
# Use a stronger model
|
|
368
|
+
python main.py -t "Refactor auth middleware" -r ./server --model mistral:7b
|
|
369
|
+
|
|
370
|
+
# Run benchmarks
|
|
371
|
+
python main.py --benchmark
|
|
372
|
+
|
|
373
|
+
# More retries for complex tasks
|
|
374
|
+
python main.py -t "Make all tests pass" -r ./project --max-steps 10
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
> 📖 **[Full User Guide →](docs/USER_GUIDE.md)**
|
|
378
|
+
|
|
379
|
+
---
|
|
380
|
+
|
|
381
|
+
## 📊 Benchmarks
|
|
382
|
+
|
|
383
|
+
DevAgent includes 5 built-in benchmarks to evaluate agent performance:
|
|
384
|
+
|
|
385
|
+
| Benchmark | Bug Type | Difficulty |
|
|
386
|
+
|---|---|---|
|
|
387
|
+
| `divide_by_zero` | Missing guard clause | Easy |
|
|
388
|
+
| `missing_validation` | No input validation | Medium |
|
|
389
|
+
| `syntax_error` | Broken syntax | Medium |
|
|
390
|
+
| `import_bug` | Wrong module name | Easy |
|
|
391
|
+
| `edge_case` | Empty list crash | Medium |
|
|
392
|
+
|
|
393
|
+
Run benchmarks:
|
|
394
|
+
|
|
395
|
+
```bash
|
|
396
|
+
python main.py --benchmark
|
|
397
|
+
python main.py --benchmark --model phi3:mini
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
---
|
|
401
|
+
|
|
402
|
+
## 🔧 Supported Models
|
|
403
|
+
|
|
404
|
+
| Model | Size | Speed | Quality | Best For |
|
|
405
|
+
|---|---|---|---|---|
|
|
406
|
+
| `qwen2.5-coder:3b` | 1.9 GB | ⚡ Fast | ★★★★ | **Default — best for code** |
|
|
407
|
+
| `qwen2.5:3b` | 1.9 GB | ⚡ Fast | ★★★☆ | General fallback |
|
|
408
|
+
| `phi3:mini` | 2.2 GB | ⚡ Fast | ★★★☆ | Good reasoning |
|
|
409
|
+
| `qwen3:4b` | 2.5 GB | ⚡ Fast | ★★★★ | Better understanding |
|
|
410
|
+
| `gemma2:2b` | 1.6 GB | ⚡⚡ | ★★☆☆ | Ultra-low resource |
|
|
411
|
+
| `mistral:7b` | 4.4 GB | 🐢 | ★★★★★ | Best quality (8GB+ RAM) |
|
|
412
|
+
|
|
413
|
+
---
|
|
414
|
+
|
|
415
|
+
## 🗺️ Roadmap
|
|
416
|
+
|
|
417
|
+
### ✅ Completed (v2.0)
|
|
418
|
+
|
|
419
|
+
- [x] Core ReAct agent loop
|
|
420
|
+
- [x] Self-review module
|
|
421
|
+
- [x] Tool system (9 tools)
|
|
422
|
+
- [x] Planner layer
|
|
423
|
+
- [x] Semantic retrieval (FAISS)
|
|
424
|
+
- [x] Patch engine (unified diffs)
|
|
425
|
+
- [x] Sandbox mode
|
|
426
|
+
- [x] Benchmark system (5 suites)
|
|
427
|
+
- [x] Metrics + structured logging
|
|
428
|
+
- [x] Git integration
|
|
429
|
+
- [x] CLI with all flags
|
|
430
|
+
|
|
431
|
+
### 🔜 Coming Next
|
|
432
|
+
|
|
433
|
+
- [ ] **Multi-file support** — Agent works across multiple files simultaneously
|
|
434
|
+
- [ ] **Language support** — JavaScript, TypeScript, Go, Rust
|
|
435
|
+
- [ ] **Plugin system** — Custom tools via YAML/Python
|
|
436
|
+
- [ ] **Watch mode** — Auto-fix on test failure (`--watch`)
|
|
437
|
+
- [ ] **VS Code extension** — Run agent from your editor
|
|
438
|
+
- [ ] **Conversation memory** — Learn from past runs
|
|
439
|
+
- [ ] **Multi-agent mode** — Planner + Coder + Reviewer + Evaluator agents
|
|
440
|
+
|
|
441
|
+
---
|
|
442
|
+
|
|
443
|
+
## 🤝 Contributing
|
|
444
|
+
|
|
445
|
+
We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for details.
|
|
446
|
+
|
|
447
|
+
```bash
|
|
448
|
+
git checkout -b feature/your-feature
|
|
449
|
+
# ... make changes ...
|
|
450
|
+
python -m pytest demo_project/ -v
|
|
451
|
+
git commit -m "feat: your feature"
|
|
452
|
+
git push origin feature/your-feature
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
**Good first issues** are tagged and waiting:
|
|
456
|
+
[Browse good first issues →](https://github.com/VedantJadhav701/Developer-Code-Intelligence-Agent/labels/good%20first%20issue)
|
|
457
|
+
|
|
458
|
+
---
|
|
459
|
+
|
|
460
|
+
## 📜 License
|
|
461
|
+
|
|
462
|
+
MIT — use it however you want. See [LICENSE](LICENSE).
|
|
463
|
+
|
|
464
|
+
---
|
|
465
|
+
|
|
466
|
+
## ⭐ Star History
|
|
467
|
+
|
|
468
|
+
If DevAgent helps you, give it a star! It helps others discover the project.
|
|
469
|
+
|
|
470
|
+
[](https://star-history.com/#VedantJadhav701/Developer-Code-Intelligence-Agent&Date)
|
|
471
|
+
|
|
472
|
+
---
|
|
473
|
+
|
|
474
|
+
<div align="center">
|
|
475
|
+
|
|
476
|
+
**Built with 🧠 by [Vedant Jadhav](https://github.com/VedantJadhav701)**
|
|
477
|
+
|
|
478
|
+
*A lightweight local open-source miniature of Claude Code CLI.*
|
|
479
|
+
|
|
480
|
+
</div>
|