fableforge-anvil-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fableforge_anvil_agent-0.1.0/LICENSE +21 -0
- fableforge_anvil_agent-0.1.0/PKG-INFO +289 -0
- fableforge_anvil_agent-0.1.0/README.md +244 -0
- fableforge_anvil_agent-0.1.0/pyproject.toml +64 -0
- fableforge_anvil_agent-0.1.0/setup.cfg +4 -0
- fableforge_anvil_agent-0.1.0/src/anvil/__init__.py +154 -0
- fableforge_anvil_agent-0.1.0/src/anvil/agents/__init__.py +28 -0
- fableforge_anvil_agent-0.1.0/src/anvil/agents/agent_base.py +127 -0
- fableforge_anvil_agent-0.1.0/src/anvil/agents/agent_manager.py +380 -0
- fableforge_anvil_agent-0.1.0/src/anvil/agents/builtin_agents.py +156 -0
- fableforge_anvil_agent-0.1.0/src/anvil/cli.py +507 -0
- fableforge_anvil_agent-0.1.0/src/anvil/commands/__init__.py +5 -0
- fableforge_anvil_agent-0.1.0/src/anvil/commands/command_manager.py +148 -0
- fableforge_anvil_agent-0.1.0/src/anvil/compaction/__init__.py +5 -0
- fableforge_anvil_agent-0.1.0/src/anvil/compaction/compactor.py +159 -0
- fableforge_anvil_agent-0.1.0/src/anvil/config_v2/__init__.py +5 -0
- fableforge_anvil_agent-0.1.0/src/anvil/config_v2/config_v2.py +266 -0
- fableforge_anvil_agent-0.1.0/src/anvil/core/__init__.py +19 -0
- fableforge_anvil_agent-0.1.0/src/anvil/core/commands.py +273 -0
- fableforge_anvil_agent-0.1.0/src/anvil/core/compaction.py +204 -0
- fableforge_anvil_agent-0.1.0/src/anvil/core/config.py +228 -0
- fableforge_anvil_agent-0.1.0/src/anvil/core/config_v2.py +519 -0
- fableforge_anvil_agent-0.1.0/src/anvil/core/engine.py +516 -0
- fableforge_anvil_agent-0.1.0/src/anvil/core/init_project.py +792 -0
- fableforge_anvil_agent-0.1.0/src/anvil/core/rules.py +198 -0
- fableforge_anvil_agent-0.1.0/src/anvil/core/session.py +166 -0
- fableforge_anvil_agent-0.1.0/src/anvil/core/snapshot.py +311 -0
- fableforge_anvil_agent-0.1.0/src/anvil/daemon/__init__.py +4 -0
- fableforge_anvil_agent-0.1.0/src/anvil/daemon/server.py +94 -0
- fableforge_anvil_agent-0.1.0/src/anvil/integrations/__init__.py +13 -0
- fableforge_anvil_agent-0.1.0/src/anvil/integrations/agent_swarm.py +98 -0
- fableforge_anvil_agent-0.1.0/src/anvil/integrations/cost_optimizer.py +157 -0
- fableforge_anvil_agent-0.1.0/src/anvil/integrations/error_recovery.py +188 -0
- fableforge_anvil_agent-0.1.0/src/anvil/integrations/verifyloop.py +143 -0
- fableforge_anvil_agent-0.1.0/src/anvil/mcp/__init__.py +13 -0
- fableforge_anvil_agent-0.1.0/src/anvil/mcp/mcp_manager.py +379 -0
- fableforge_anvil_agent-0.1.0/src/anvil/mcp/mcp_types.py +175 -0
- fableforge_anvil_agent-0.1.0/src/anvil/models/__init__.py +4 -0
- fableforge_anvil_agent-0.1.0/src/anvil/models/anthropic_model.py +5 -0
- fableforge_anvil_agent-0.1.0/src/anvil/models/local.py +5 -0
- fableforge_anvil_agent-0.1.0/src/anvil/models/openai_model.py +5 -0
- fableforge_anvil_agent-0.1.0/src/anvil/models/registry.py +284 -0
- fableforge_anvil_agent-0.1.0/src/anvil/permissions/__init__.py +5 -0
- fableforge_anvil_agent-0.1.0/src/anvil/permissions/permissions.py +265 -0
- fableforge_anvil_agent-0.1.0/src/anvil/rules/__init__.py +5 -0
- fableforge_anvil_agent-0.1.0/src/anvil/rules/rules_manager.py +123 -0
- fableforge_anvil_agent-0.1.0/src/anvil/sdk.py +712 -0
- fableforge_anvil_agent-0.1.0/src/anvil/snapshot/__init__.py +5 -0
- fableforge_anvil_agent-0.1.0/src/anvil/snapshot/snapshot_manager.py +148 -0
- fableforge_anvil_agent-0.1.0/src/anvil/tools/__init__.py +18 -0
- fableforge_anvil_agent-0.1.0/src/anvil/tools/executor.py +291 -0
- fableforge_anvil_agent-0.1.0/src/anvil/tools/new_tools.py +258 -0
- fableforge_anvil_agent-0.1.0/src/anvil/tui/__init__.py +17 -0
- fableforge_anvil_agent-0.1.0/src/anvil/tui/app.py +781 -0
- fableforge_anvil_agent-0.1.0/src/anvil/tui/dashboard.py +103 -0
- fableforge_anvil_agent-0.1.0/src/anvil/verify/__init__.py +4 -0
- fableforge_anvil_agent-0.1.0/src/anvil/verify/pipeline.py +266 -0
- fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/PKG-INFO +289 -0
- fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/SOURCES.txt +76 -0
- fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/dependency_links.txt +1 -0
- fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/entry_points.txt +7 -0
- fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/requires.txt +31 -0
- fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/top_level.txt +1 -0
- fableforge_anvil_agent-0.1.0/tests/test_agents.py +489 -0
- fableforge_anvil_agent-0.1.0/tests/test_anvil.py +333 -0
- fableforge_anvil_agent-0.1.0/tests/test_commands.py +262 -0
- fableforge_anvil_agent-0.1.0/tests/test_compaction.py +217 -0
- fableforge_anvil_agent-0.1.0/tests/test_config_v2.py +372 -0
- fableforge_anvil_agent-0.1.0/tests/test_daemon.py +161 -0
- fableforge_anvil_agent-0.1.0/tests/test_engine.py +392 -0
- fableforge_anvil_agent-0.1.0/tests/test_mcp.py +325 -0
- fableforge_anvil_agent-0.1.0/tests/test_models.py +319 -0
- fableforge_anvil_agent-0.1.0/tests/test_new_tools.py +442 -0
- fableforge_anvil_agent-0.1.0/tests/test_permissions.py +169 -0
- fableforge_anvil_agent-0.1.0/tests/test_rules.py +228 -0
- fableforge_anvil_agent-0.1.0/tests/test_snapshot.py +201 -0
- fableforge_anvil_agent-0.1.0/tests/test_tools.py +463 -0
- fableforge_anvil_agent-0.1.0/tests/test_verify_pipeline.py +336 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 FableForge Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fableforge-anvil-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: The open-source, self-verified coding agent. Generate → Execute → Verify → Recover.
|
|
5
|
+
Author-email: FableForge <team@fableforge.ai>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: agent,coding,verification,self-healing,llm,open-source
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Environment :: Console
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
14
|
+
Classifier: Topic :: Software Development :: Testing
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: rich>=13.0
|
|
19
|
+
Requires-Dist: click>=8.1
|
|
20
|
+
Requires-Dist: pydantic>=2.0
|
|
21
|
+
Requires-Dist: httpx>=0.25
|
|
22
|
+
Requires-Dist: tiktoken>=0.5
|
|
23
|
+
Requires-Dist: pathspec>=0.11
|
|
24
|
+
Requires-Dist: tree-sitter>=0.20
|
|
25
|
+
Requires-Dist: prompt-toolkit>=3.0
|
|
26
|
+
Requires-Dist: jsonschema>=4.0
|
|
27
|
+
Requires-Dist: python-slugify>=8.0
|
|
28
|
+
Provides-Extra: local
|
|
29
|
+
Requires-Dist: llama-cpp-python>=0.2; extra == "local"
|
|
30
|
+
Requires-Dist: onnxruntime>=1.16; extra == "local"
|
|
31
|
+
Provides-Extra: api
|
|
32
|
+
Requires-Dist: openai>=1.0; extra == "api"
|
|
33
|
+
Requires-Dist: anthropic>=0.18; extra == "api"
|
|
34
|
+
Provides-Extra: tui
|
|
35
|
+
Requires-Dist: textual>=0.47; extra == "tui"
|
|
36
|
+
Requires-Dist: textual-dev>=0.4; extra == "tui"
|
|
37
|
+
Provides-Extra: all
|
|
38
|
+
Requires-Dist: fableforge-anvil-agent[api,local,tui]; extra == "all"
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
42
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
43
|
+
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
44
|
+
Dynamic: license-file
|
|
45
|
+
|
|
46
|
+
<picture>
|
|
47
|
+
<source media="(prefers-color-scheme: dark)" srcset="docs/assets/logo-dark.svg">
|
|
48
|
+
<source media="(prefers-color-scheme: light)" srcset="docs/assets/logo-light.svg">
|
|
49
|
+
<img alt="Anvil" src="docs/assets/logo-light.svg" width="400">
|
|
50
|
+
</picture>
|
|
51
|
+
|
|
52
|
+
# Anvil — The Self-Verified Coding Agent
|
|
53
|
+
|
|
54
|
+
[](LICENSE) [](https://www.python.org/downloads/) [](tests/)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
> **Generate → Execute → Verify → Recover**
|
|
58
|
+
|
|
59
|
+
Every other open agent generates and hopes. **Anvil generates, runs, checks, and fixes** — because it was trained on 210,000 examples of real agents doing exactly that.
|
|
60
|
+
|
|
61
|
+
This isn't prompt engineering. This is **behavior engineering**.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Why Anvil?
|
|
66
|
+
|
|
67
|
+
| Other Agents | Anvil |
|
|
68
|
+
|---|---|
|
|
69
|
+
| Generate code and hope it works | Generate code, then **verify it works** |
|
|
70
|
+
| No error recovery | **Self-healing** with 3 retry attempts |
|
|
71
|
+
| One-shot output | **Iterative** Plan→Execute→Verify→Recover loop |
|
|
72
|
+
| No cost awareness | **Token tracking + model routing** for cost optimization |
|
|
73
|
+
| Black box | **Full session tracking**, verify reports, telemetry |
|
|
74
|
+
| Requires expensive API | Runs **fully local** with ShellWhisperer (1.5B) |
|
|
75
|
+
|
|
76
|
+
## The Verification Loop
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐
|
|
80
|
+
│ PLAN │────▶│ EXEC │────▶│VERIFY│────▶│ DONE │
|
|
81
|
+
└──────┘ └──────┘ └──┬───┘ └──────┘
|
|
82
|
+
│ Fail
|
|
83
|
+
▼
|
|
84
|
+
┌──────┐
|
|
85
|
+
│RECOVR│────▶ back to EXEC
|
|
86
|
+
└──────┘
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Anvil doesn't just write code. It **verifies** every change:
|
|
90
|
+
|
|
91
|
+
1. **Syntax check** — Does the code parse?
|
|
92
|
+
2. **Test run** — Do the tests pass?
|
|
93
|
+
3. **Lint check** — Is the code clean?
|
|
94
|
+
4. **Import check** — Are dependencies valid?
|
|
95
|
+
|
|
96
|
+
If verification fails, Anvil **diagnoses the error, generates a fix, and re-verifies**. Up to 3 retry cycles. This isn't optional — it's the core loop.
|
|
97
|
+
|
|
98
|
+
## Quick Start
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
pip install anvil-agent
|
|
102
|
+
|
|
103
|
+
# Run with local model (ollama)
|
|
104
|
+
anvil run "Add error handling to main.py"
|
|
105
|
+
|
|
106
|
+
# Run with API model
|
|
107
|
+
anvil run -m gpt-4o "Refactor the auth module"
|
|
108
|
+
|
|
109
|
+
# Interactive chat with verification
|
|
110
|
+
anvil chat
|
|
111
|
+
|
|
112
|
+
# Verify existing code
|
|
113
|
+
anvil verify src/
|
|
114
|
+
|
|
115
|
+
# Start as persistent daemon
|
|
116
|
+
anvil daemon --port 8765
|
|
117
|
+
|
|
118
|
+
# List past sessions
|
|
119
|
+
anvil sessions
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## The Name
|
|
123
|
+
|
|
124
|
+
**Anvil** — where code gets forged, hammered, and tested until it holds.
|
|
125
|
+
|
|
126
|
+
Every blacksmith knows: you don't just shape metal on the anvil. You **test** it. You strike it, check it, and if it's not right, you heat it again and hammer it until it is. That's what this agent does with code.
|
|
127
|
+
|
|
128
|
+
**Other agents shape and ship. Anvil shapes, verifies, and only then ships.**
|
|
129
|
+
|
|
130
|
+
## Architecture
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
anvil/
|
|
134
|
+
├── core/
|
|
135
|
+
│ ├── engine.py # Plan→Execute→Verify→Recover loop
|
|
136
|
+
│ ├── config.py # 7-layer configuration system
|
|
137
|
+
│ └── session.py # Full session tracking + persistence
|
|
138
|
+
├── tools/
|
|
139
|
+
│ └── executor.py # Bash, Read, Write, Edit, Grep, Glob, LS
|
|
140
|
+
├── verify/
|
|
141
|
+
│ └── pipeline.py # Syntax, test, lint, import verification
|
|
142
|
+
├── models/
|
|
143
|
+
│ └── registry.py # Local (ollama), OpenAI, Anthropic + cost tracking
|
|
144
|
+
├── integrations/
|
|
145
|
+
│ ├── verifyloop.py # VerifyLoop framework integration
|
|
146
|
+
│ ├── error_recovery.py # ErrorRecovery engine integration
|
|
147
|
+
│ ├── agent_swarm.py # AgentSwarm coordination integration
|
|
148
|
+
│ └── cost_optimizer.py # CostOptimizer routing integration
|
|
149
|
+
├── daemon/
|
|
150
|
+
│ └── server.py # Persistent HTTP daemon mode
|
|
151
|
+
├── tui/
|
|
152
|
+
│ └── dashboard.py # Rich terminal dashboard
|
|
153
|
+
└── cli.py # run, chat, verify, daemon, sessions, models
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## The FableForge Ecosystem
|
|
157
|
+
|
|
158
|
+
Anvil is the flagship product of the **FableForge** ecosystem — 21 open-source projects built from 210K real agent traces:
|
|
159
|
+
|
|
160
|
+
| Project | What It Does |
|
|
161
|
+
|---|---|
|
|
162
|
+
| **Anvil** | Self-verified coding agent (this one) |
|
|
163
|
+
| VerifyLoop | Plan→Execute→Verify→Recover framework |
|
|
164
|
+
| ErrorRecovery | Self-healing middleware (3,725 error examples) |
|
|
165
|
+
| FableForge-14B | The fine-tuned model (4-stage training) |
|
|
166
|
+
| ShellWhisperer | 1.5B edge agent (phone/RPi, 50ms) |
|
|
167
|
+
| ReasonCritic | Verification model (130 benchmark tasks) |
|
|
168
|
+
| TraceCompiler | Compile traces → LoRA skills |
|
|
169
|
+
| AgentRuntime | Persistent agent daemon (systemd for AI) |
|
|
170
|
+
| AgentSwarm | Multi-agent from real trace transitions |
|
|
171
|
+
| AgentTelemetry | Datadog for agents (token tracking, costs) |
|
|
172
|
+
| BenchAgent | HumanEval for tool-use (107 tasks) |
|
|
173
|
+
| AgentDev | VSCode extension with verification |
|
|
174
|
+
| TraceViz | Trace replay visualizer (Next.js) |
|
|
175
|
+
| AgentSkills.org | npm for agent behaviors |
|
|
176
|
+
| AgentCurriculum | 5-stage progressive training |
|
|
177
|
+
| AgentFuzzer | Adversarial testing for agents |
|
|
178
|
+
| AgentConstitution | Safety guardrails from traces |
|
|
179
|
+
| CostOptimizer | Token cost reduction (50-80%) |
|
|
180
|
+
| AgentProfiler | Behavioral fingerprinting |
|
|
181
|
+
| TrajectoryDistiller | Trace→training data pipeline |
|
|
182
|
+
| Fable5-Dataset | HuggingFace dataset release |
|
|
183
|
+
|
|
184
|
+
## Configuration
|
|
185
|
+
|
|
186
|
+
Create `.anvil.json` in your project root:
|
|
187
|
+
|
|
188
|
+
```json
|
|
189
|
+
{
|
|
190
|
+
"model": {
|
|
191
|
+
"model": "local",
|
|
192
|
+
"temperature": 0.2,
|
|
193
|
+
"max_tokens": 4096
|
|
194
|
+
},
|
|
195
|
+
"verify": {
|
|
196
|
+
"enabled": true,
|
|
197
|
+
"auto_recover": true,
|
|
198
|
+
"max_retries": 3,
|
|
199
|
+
"check_syntax": true,
|
|
200
|
+
"check_tests": true,
|
|
201
|
+
"check_lint": true
|
|
202
|
+
},
|
|
203
|
+
"tools": {
|
|
204
|
+
"allow_shell": true,
|
|
205
|
+
"sandbox": false
|
|
206
|
+
},
|
|
207
|
+
"safety": {
|
|
208
|
+
"constitution_enabled": true,
|
|
209
|
+
"blocked_commands": ["rm -rf /", "mkfs"],
|
|
210
|
+
"require_confirmation_for": ["git push", "DROP TABLE"]
|
|
211
|
+
},
|
|
212
|
+
"cost": {
|
|
213
|
+
"max_cost_per_session_usd": 5.0,
|
|
214
|
+
"route_by_complexity": true,
|
|
215
|
+
"simple_model": "local",
|
|
216
|
+
"complex_model": "gpt-4o"
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## Daemon Mode
|
|
222
|
+
|
|
223
|
+
Run Anvil as a persistent server:
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
anvil daemon --port 8765
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
curl -X POST http://localhost:8765/run \
|
|
231
|
+
-H "Content-Type: application/json" \
|
|
232
|
+
-d '{"task": "Add input validation to all API endpoints"}'
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
## Model Backends
|
|
236
|
+
|
|
237
|
+
| Model | Type | Input $/1M | Output $/1M |
|
|
238
|
+
|---|---|---|---|
|
|
239
|
+
| local (fableforge-14b) | Local | Free | Free |
|
|
240
|
+
| gpt-4o | API | $2.50 | $10.00 |
|
|
241
|
+
| gpt-4o-mini | API | $0.15 | $0.60 |
|
|
242
|
+
| o3-mini | API | $1.10 | $4.40 |
|
|
243
|
+
| claude-3.5-sonnet | API | $3.00 | $15.00 |
|
|
244
|
+
| claude-3.5-haiku | API | $0.80 | $4.00 |
|
|
245
|
+
|
|
246
|
+
## How It's Different
|
|
247
|
+
|
|
248
|
+
### Trained on Real Behavior
|
|
249
|
+
|
|
250
|
+
The FableForge model was trained on 210K examples from real agent traces:
|
|
251
|
+
- **87.7% planning rate** — agents plan before they act
|
|
252
|
+
- **39.5% error recovery rate** — agents that hit errors and recover
|
|
253
|
+
- **1,311-step trace** — the Boeing 747 trace proves agents need persistent runtime
|
|
254
|
+
- **31 tools** mapped — transition matrices drive swarm coordination
|
|
255
|
+
|
|
256
|
+
### Verification Is Not Optional
|
|
257
|
+
|
|
258
|
+
Other agents: "Here's the code, hope it works."
|
|
259
|
+
|
|
260
|
+
Anvil: "Here's the code. I ran it. Tests pass. Lint is clean. Imports resolve. Here's the proof."
|
|
261
|
+
|
|
262
|
+
### Self-Healing
|
|
263
|
+
|
|
264
|
+
When verification fails, Anvil doesn't just report the error. It **reads the error, generates a fix, applies it, and re-verifies**. This is the ErrorRecovery engine with 3,725 real error examples baked in.
|
|
265
|
+
|
|
266
|
+
### Ecosystem Integration
|
|
267
|
+
|
|
268
|
+
Anvil doesn't work alone. It's wired into the full FableForge stack:
|
|
269
|
+
- **VerifyLoop** → Sophisticated multi-step verification
|
|
270
|
+
- **ErrorRecovery** → Pattern-matched error resolution from real traces
|
|
271
|
+
- **AgentSwarm** → Multi-agent coordination via transition matrices
|
|
272
|
+
- **CostOptimizer** → Automatic model routing based on task complexity
|
|
273
|
+
- **AgentConstitution** → Safety guardrails from analysis of real traces
|
|
274
|
+
|
|
275
|
+
## License
|
|
276
|
+
|
|
277
|
+
MIT
|
|
278
|
+
|
|
279
|
+
## Built With
|
|
280
|
+
|
|
281
|
+
- 210,000+ real agent traces from the Fable-5 dataset collection
|
|
282
|
+
- 87.7% planning rate behavioral signal
|
|
283
|
+
- 39.5% error recovery success rate
|
|
284
|
+
- 303 tool calls in a single session (Boeing 747 trace)
|
|
285
|
+
- 5 specialized micro-models (ShellWhisperer, ReasonCritic, etc.)
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
**Anvil: Forge your code. Verify it holds.**
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
<picture>
|
|
2
|
+
<source media="(prefers-color-scheme: dark)" srcset="docs/assets/logo-dark.svg">
|
|
3
|
+
<source media="(prefers-color-scheme: light)" srcset="docs/assets/logo-light.svg">
|
|
4
|
+
<img alt="Anvil" src="docs/assets/logo-light.svg" width="400">
|
|
5
|
+
</picture>
|
|
6
|
+
|
|
7
|
+
# Anvil — The Self-Verified Coding Agent
|
|
8
|
+
|
|
9
|
+
[](LICENSE) [](https://www.python.org/downloads/) [](tests/)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
> **Generate → Execute → Verify → Recover**
|
|
13
|
+
|
|
14
|
+
Every other open agent generates and hopes. **Anvil generates, runs, checks, and fixes** — because it was trained on 210,000 examples of real agents doing exactly that.
|
|
15
|
+
|
|
16
|
+
This isn't prompt engineering. This is **behavior engineering**.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Why Anvil?
|
|
21
|
+
|
|
22
|
+
| Other Agents | Anvil |
|
|
23
|
+
|---|---|
|
|
24
|
+
| Generate code and hope it works | Generate code, then **verify it works** |
|
|
25
|
+
| No error recovery | **Self-healing** with 3 retry attempts |
|
|
26
|
+
| One-shot output | **Iterative** Plan→Execute→Verify→Recover loop |
|
|
27
|
+
| No cost awareness | **Token tracking + model routing** for cost optimization |
|
|
28
|
+
| Black box | **Full session tracking**, verify reports, telemetry |
|
|
29
|
+
| Requires expensive API | Runs **fully local** with ShellWhisperer (1.5B) |
|
|
30
|
+
|
|
31
|
+
## The Verification Loop
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐
|
|
35
|
+
│ PLAN │────▶│ EXEC │────▶│VERIFY│────▶│ DONE │
|
|
36
|
+
└──────┘ └──────┘ └──┬───┘ └──────┘
|
|
37
|
+
│ Fail
|
|
38
|
+
▼
|
|
39
|
+
┌──────┐
|
|
40
|
+
│RECOVR│────▶ back to EXEC
|
|
41
|
+
└──────┘
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Anvil doesn't just write code. It **verifies** every change:
|
|
45
|
+
|
|
46
|
+
1. **Syntax check** — Does the code parse?
|
|
47
|
+
2. **Test run** — Do the tests pass?
|
|
48
|
+
3. **Lint check** — Is the code clean?
|
|
49
|
+
4. **Import check** — Are dependencies valid?
|
|
50
|
+
|
|
51
|
+
If verification fails, Anvil **diagnoses the error, generates a fix, and re-verifies**. Up to 3 retry cycles. This isn't optional — it's the core loop.
|
|
52
|
+
|
|
53
|
+
## Quick Start
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install anvil-agent
|
|
57
|
+
|
|
58
|
+
# Run with local model (ollama)
|
|
59
|
+
anvil run "Add error handling to main.py"
|
|
60
|
+
|
|
61
|
+
# Run with API model
|
|
62
|
+
anvil run -m gpt-4o "Refactor the auth module"
|
|
63
|
+
|
|
64
|
+
# Interactive chat with verification
|
|
65
|
+
anvil chat
|
|
66
|
+
|
|
67
|
+
# Verify existing code
|
|
68
|
+
anvil verify src/
|
|
69
|
+
|
|
70
|
+
# Start as persistent daemon
|
|
71
|
+
anvil daemon --port 8765
|
|
72
|
+
|
|
73
|
+
# List past sessions
|
|
74
|
+
anvil sessions
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## The Name
|
|
78
|
+
|
|
79
|
+
**Anvil** — where code gets forged, hammered, and tested until it holds.
|
|
80
|
+
|
|
81
|
+
Every blacksmith knows: you don't just shape metal on the anvil. You **test** it. You strike it, check it, and if it's not right, you heat it again and hammer it until it is. That's what this agent does with code.
|
|
82
|
+
|
|
83
|
+
**Other agents shape and ship. Anvil shapes, verifies, and only then ships.**
|
|
84
|
+
|
|
85
|
+
## Architecture
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
anvil/
|
|
89
|
+
├── core/
|
|
90
|
+
│ ├── engine.py # Plan→Execute→Verify→Recover loop
|
|
91
|
+
│ ├── config.py # 7-layer configuration system
|
|
92
|
+
│ └── session.py # Full session tracking + persistence
|
|
93
|
+
├── tools/
|
|
94
|
+
│ └── executor.py # Bash, Read, Write, Edit, Grep, Glob, LS
|
|
95
|
+
├── verify/
|
|
96
|
+
│ └── pipeline.py # Syntax, test, lint, import verification
|
|
97
|
+
├── models/
|
|
98
|
+
│ └── registry.py # Local (ollama), OpenAI, Anthropic + cost tracking
|
|
99
|
+
├── integrations/
|
|
100
|
+
│ ├── verifyloop.py # VerifyLoop framework integration
|
|
101
|
+
│ ├── error_recovery.py # ErrorRecovery engine integration
|
|
102
|
+
│ ├── agent_swarm.py # AgentSwarm coordination integration
|
|
103
|
+
│ └── cost_optimizer.py # CostOptimizer routing integration
|
|
104
|
+
├── daemon/
|
|
105
|
+
│ └── server.py # Persistent HTTP daemon mode
|
|
106
|
+
├── tui/
|
|
107
|
+
│ └── dashboard.py # Rich terminal dashboard
|
|
108
|
+
└── cli.py # run, chat, verify, daemon, sessions, models
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## The FableForge Ecosystem
|
|
112
|
+
|
|
113
|
+
Anvil is the flagship product of the **FableForge** ecosystem — 21 open-source projects built from 210K real agent traces:
|
|
114
|
+
|
|
115
|
+
| Project | What It Does |
|
|
116
|
+
|---|---|
|
|
117
|
+
| **Anvil** | Self-verified coding agent (this one) |
|
|
118
|
+
| VerifyLoop | Plan→Execute→Verify→Recover framework |
|
|
119
|
+
| ErrorRecovery | Self-healing middleware (3,725 error examples) |
|
|
120
|
+
| FableForge-14B | The fine-tuned model (4-stage training) |
|
|
121
|
+
| ShellWhisperer | 1.5B edge agent (phone/RPi, 50ms) |
|
|
122
|
+
| ReasonCritic | Verification model (130 benchmark tasks) |
|
|
123
|
+
| TraceCompiler | Compile traces → LoRA skills |
|
|
124
|
+
| AgentRuntime | Persistent agent daemon (systemd for AI) |
|
|
125
|
+
| AgentSwarm | Multi-agent from real trace transitions |
|
|
126
|
+
| AgentTelemetry | Datadog for agents (token tracking, costs) |
|
|
127
|
+
| BenchAgent | HumanEval for tool-use (107 tasks) |
|
|
128
|
+
| AgentDev | VSCode extension with verification |
|
|
129
|
+
| TraceViz | Trace replay visualizer (Next.js) |
|
|
130
|
+
| AgentSkills.org | npm for agent behaviors |
|
|
131
|
+
| AgentCurriculum | 5-stage progressive training |
|
|
132
|
+
| AgentFuzzer | Adversarial testing for agents |
|
|
133
|
+
| AgentConstitution | Safety guardrails from traces |
|
|
134
|
+
| CostOptimizer | Token cost reduction (50-80%) |
|
|
135
|
+
| AgentProfiler | Behavioral fingerprinting |
|
|
136
|
+
| TrajectoryDistiller | Trace→training data pipeline |
|
|
137
|
+
| Fable5-Dataset | HuggingFace dataset release |
|
|
138
|
+
|
|
139
|
+
## Configuration
|
|
140
|
+
|
|
141
|
+
Create `.anvil.json` in your project root:
|
|
142
|
+
|
|
143
|
+
```json
|
|
144
|
+
{
|
|
145
|
+
"model": {
|
|
146
|
+
"model": "local",
|
|
147
|
+
"temperature": 0.2,
|
|
148
|
+
"max_tokens": 4096
|
|
149
|
+
},
|
|
150
|
+
"verify": {
|
|
151
|
+
"enabled": true,
|
|
152
|
+
"auto_recover": true,
|
|
153
|
+
"max_retries": 3,
|
|
154
|
+
"check_syntax": true,
|
|
155
|
+
"check_tests": true,
|
|
156
|
+
"check_lint": true
|
|
157
|
+
},
|
|
158
|
+
"tools": {
|
|
159
|
+
"allow_shell": true,
|
|
160
|
+
"sandbox": false
|
|
161
|
+
},
|
|
162
|
+
"safety": {
|
|
163
|
+
"constitution_enabled": true,
|
|
164
|
+
"blocked_commands": ["rm -rf /", "mkfs"],
|
|
165
|
+
"require_confirmation_for": ["git push", "DROP TABLE"]
|
|
166
|
+
},
|
|
167
|
+
"cost": {
|
|
168
|
+
"max_cost_per_session_usd": 5.0,
|
|
169
|
+
"route_by_complexity": true,
|
|
170
|
+
"simple_model": "local",
|
|
171
|
+
"complex_model": "gpt-4o"
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Daemon Mode
|
|
177
|
+
|
|
178
|
+
Run Anvil as a persistent server:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
anvil daemon --port 8765
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
curl -X POST http://localhost:8765/run \
|
|
186
|
+
-H "Content-Type: application/json" \
|
|
187
|
+
-d '{"task": "Add input validation to all API endpoints"}'
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
## Model Backends
|
|
191
|
+
|
|
192
|
+
| Model | Type | Input $/1M | Output $/1M |
|
|
193
|
+
|---|---|---|---|
|
|
194
|
+
| local (fableforge-14b) | Local | Free | Free |
|
|
195
|
+
| gpt-4o | API | $2.50 | $10.00 |
|
|
196
|
+
| gpt-4o-mini | API | $0.15 | $0.60 |
|
|
197
|
+
| o3-mini | API | $1.10 | $4.40 |
|
|
198
|
+
| claude-3.5-sonnet | API | $3.00 | $15.00 |
|
|
199
|
+
| claude-3.5-haiku | API | $0.80 | $4.00 |
|
|
200
|
+
|
|
201
|
+
## How It's Different
|
|
202
|
+
|
|
203
|
+
### Trained on Real Behavior
|
|
204
|
+
|
|
205
|
+
The FableForge model was trained on 210K examples from real agent traces:
|
|
206
|
+
- **87.7% planning rate** — agents plan before they act
|
|
207
|
+
- **39.5% error recovery rate** — agents that hit errors and recover
|
|
208
|
+
- **1,311-step trace** — the Boeing 747 trace proves agents need persistent runtime
|
|
209
|
+
- **31 tools** mapped — transition matrices drive swarm coordination
|
|
210
|
+
|
|
211
|
+
### Verification Is Not Optional
|
|
212
|
+
|
|
213
|
+
Other agents: "Here's the code, hope it works."
|
|
214
|
+
|
|
215
|
+
Anvil: "Here's the code. I ran it. Tests pass. Lint is clean. Imports resolve. Here's the proof."
|
|
216
|
+
|
|
217
|
+
### Self-Healing
|
|
218
|
+
|
|
219
|
+
When verification fails, Anvil doesn't just report the error. It **reads the error, generates a fix, applies it, and re-verifies**. This is the ErrorRecovery engine with 3,725 real error examples baked in.
|
|
220
|
+
|
|
221
|
+
### Ecosystem Integration
|
|
222
|
+
|
|
223
|
+
Anvil doesn't work alone. It's wired into the full FableForge stack:
|
|
224
|
+
- **VerifyLoop** → Sophisticated multi-step verification
|
|
225
|
+
- **ErrorRecovery** → Pattern-matched error resolution from real traces
|
|
226
|
+
- **AgentSwarm** → Multi-agent coordination via transition matrices
|
|
227
|
+
- **CostOptimizer** → Automatic model routing based on task complexity
|
|
228
|
+
- **AgentConstitution** → Safety guardrails from analysis of real traces
|
|
229
|
+
|
|
230
|
+
## License
|
|
231
|
+
|
|
232
|
+
MIT
|
|
233
|
+
|
|
234
|
+
## Built With
|
|
235
|
+
|
|
236
|
+
- 210,000+ real agent traces from the Fable-5 dataset collection
|
|
237
|
+
- 87.7% planning rate behavioral signal
|
|
238
|
+
- 39.5% error recovery success rate
|
|
239
|
+
- 303 tool calls in a single session (Boeing 747 trace)
|
|
240
|
+
- 5 specialized micro-models (ShellWhisperer, ReasonCritic, etc.)
|
|
241
|
+
|
|
242
|
+
---
|
|
243
|
+
|
|
244
|
+
**Anvil: Forge your code. Verify it holds.**
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "fableforge-anvil-agent"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "The open-source, self-verified coding agent. Generate → Execute → Verify → Recover."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{name = "FableForge", email = "team@fableforge.ai"}]
|
|
13
|
+
keywords = ["agent", "coding", "verification", "self-healing", "llm", "open-source"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Environment :: Console",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Topic :: Software Development :: Code Generators",
|
|
21
|
+
"Topic :: Software Development :: Testing",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
dependencies = [
|
|
25
|
+
"rich>=13.0",
|
|
26
|
+
"click>=8.1",
|
|
27
|
+
"pydantic>=2.0",
|
|
28
|
+
"httpx>=0.25",
|
|
29
|
+
"tiktoken>=0.5",
|
|
30
|
+
"pathspec>=0.11",
|
|
31
|
+
"tree-sitter>=0.20",
|
|
32
|
+
"prompt-toolkit>=3.0",
|
|
33
|
+
"jsonschema>=4.0",
|
|
34
|
+
"python-slugify>=8.0",
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
[project.optional-dependencies]
|
|
38
|
+
local = ["llama-cpp-python>=0.2", "onnxruntime>=1.16"]
|
|
39
|
+
api = ["openai>=1.0", "anthropic>=0.18"]
|
|
40
|
+
tui = ["textual>=0.47", "textual-dev>=0.4"]
|
|
41
|
+
all = ["fableforge-anvil-agent[local,api,tui]"]
|
|
42
|
+
dev = ["pytest>=7.0", "pytest-asyncio>=0.21", "pytest-cov>=4.0", "ruff>=0.1"]
|
|
43
|
+
|
|
44
|
+
[project.scripts]
|
|
45
|
+
anvil = "anvil.cli:main"
|
|
46
|
+
|
|
47
|
+
[project.entry-points."anvil.models"]
|
|
48
|
+
local = "anvil.models.local:LocalModel"
|
|
49
|
+
openai = "anvil.models.openai_model:OpenAIModel"
|
|
50
|
+
anthropic = "anvil.models.anthropic_model:AnthropicModel"
|
|
51
|
+
|
|
52
|
+
[tool.setuptools.packages.find]
|
|
53
|
+
where = ["src"]
|
|
54
|
+
|
|
55
|
+
[tool.pytest.ini_options]
|
|
56
|
+
testpaths = ["tests"]
|
|
57
|
+
asyncio_mode = "auto"
|
|
58
|
+
|
|
59
|
+
[tool.ruff]
|
|
60
|
+
target-version = "py310"
|
|
61
|
+
line-length = 100
|
|
62
|
+
|
|
63
|
+
[tool.ruff.lint]
|
|
64
|
+
select = ["E", "F", "I", "N", "W", "UP"]
|