fableforge-anvil-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. fableforge_anvil_agent-0.1.0/LICENSE +21 -0
  2. fableforge_anvil_agent-0.1.0/PKG-INFO +289 -0
  3. fableforge_anvil_agent-0.1.0/README.md +244 -0
  4. fableforge_anvil_agent-0.1.0/pyproject.toml +64 -0
  5. fableforge_anvil_agent-0.1.0/setup.cfg +4 -0
  6. fableforge_anvil_agent-0.1.0/src/anvil/__init__.py +154 -0
  7. fableforge_anvil_agent-0.1.0/src/anvil/agents/__init__.py +28 -0
  8. fableforge_anvil_agent-0.1.0/src/anvil/agents/agent_base.py +127 -0
  9. fableforge_anvil_agent-0.1.0/src/anvil/agents/agent_manager.py +380 -0
  10. fableforge_anvil_agent-0.1.0/src/anvil/agents/builtin_agents.py +156 -0
  11. fableforge_anvil_agent-0.1.0/src/anvil/cli.py +507 -0
  12. fableforge_anvil_agent-0.1.0/src/anvil/commands/__init__.py +5 -0
  13. fableforge_anvil_agent-0.1.0/src/anvil/commands/command_manager.py +148 -0
  14. fableforge_anvil_agent-0.1.0/src/anvil/compaction/__init__.py +5 -0
  15. fableforge_anvil_agent-0.1.0/src/anvil/compaction/compactor.py +159 -0
  16. fableforge_anvil_agent-0.1.0/src/anvil/config_v2/__init__.py +5 -0
  17. fableforge_anvil_agent-0.1.0/src/anvil/config_v2/config_v2.py +266 -0
  18. fableforge_anvil_agent-0.1.0/src/anvil/core/__init__.py +19 -0
  19. fableforge_anvil_agent-0.1.0/src/anvil/core/commands.py +273 -0
  20. fableforge_anvil_agent-0.1.0/src/anvil/core/compaction.py +204 -0
  21. fableforge_anvil_agent-0.1.0/src/anvil/core/config.py +228 -0
  22. fableforge_anvil_agent-0.1.0/src/anvil/core/config_v2.py +519 -0
  23. fableforge_anvil_agent-0.1.0/src/anvil/core/engine.py +516 -0
  24. fableforge_anvil_agent-0.1.0/src/anvil/core/init_project.py +792 -0
  25. fableforge_anvil_agent-0.1.0/src/anvil/core/rules.py +198 -0
  26. fableforge_anvil_agent-0.1.0/src/anvil/core/session.py +166 -0
  27. fableforge_anvil_agent-0.1.0/src/anvil/core/snapshot.py +311 -0
  28. fableforge_anvil_agent-0.1.0/src/anvil/daemon/__init__.py +4 -0
  29. fableforge_anvil_agent-0.1.0/src/anvil/daemon/server.py +94 -0
  30. fableforge_anvil_agent-0.1.0/src/anvil/integrations/__init__.py +13 -0
  31. fableforge_anvil_agent-0.1.0/src/anvil/integrations/agent_swarm.py +98 -0
  32. fableforge_anvil_agent-0.1.0/src/anvil/integrations/cost_optimizer.py +157 -0
  33. fableforge_anvil_agent-0.1.0/src/anvil/integrations/error_recovery.py +188 -0
  34. fableforge_anvil_agent-0.1.0/src/anvil/integrations/verifyloop.py +143 -0
  35. fableforge_anvil_agent-0.1.0/src/anvil/mcp/__init__.py +13 -0
  36. fableforge_anvil_agent-0.1.0/src/anvil/mcp/mcp_manager.py +379 -0
  37. fableforge_anvil_agent-0.1.0/src/anvil/mcp/mcp_types.py +175 -0
  38. fableforge_anvil_agent-0.1.0/src/anvil/models/__init__.py +4 -0
  39. fableforge_anvil_agent-0.1.0/src/anvil/models/anthropic_model.py +5 -0
  40. fableforge_anvil_agent-0.1.0/src/anvil/models/local.py +5 -0
  41. fableforge_anvil_agent-0.1.0/src/anvil/models/openai_model.py +5 -0
  42. fableforge_anvil_agent-0.1.0/src/anvil/models/registry.py +284 -0
  43. fableforge_anvil_agent-0.1.0/src/anvil/permissions/__init__.py +5 -0
  44. fableforge_anvil_agent-0.1.0/src/anvil/permissions/permissions.py +265 -0
  45. fableforge_anvil_agent-0.1.0/src/anvil/rules/__init__.py +5 -0
  46. fableforge_anvil_agent-0.1.0/src/anvil/rules/rules_manager.py +123 -0
  47. fableforge_anvil_agent-0.1.0/src/anvil/sdk.py +712 -0
  48. fableforge_anvil_agent-0.1.0/src/anvil/snapshot/__init__.py +5 -0
  49. fableforge_anvil_agent-0.1.0/src/anvil/snapshot/snapshot_manager.py +148 -0
  50. fableforge_anvil_agent-0.1.0/src/anvil/tools/__init__.py +18 -0
  51. fableforge_anvil_agent-0.1.0/src/anvil/tools/executor.py +291 -0
  52. fableforge_anvil_agent-0.1.0/src/anvil/tools/new_tools.py +258 -0
  53. fableforge_anvil_agent-0.1.0/src/anvil/tui/__init__.py +17 -0
  54. fableforge_anvil_agent-0.1.0/src/anvil/tui/app.py +781 -0
  55. fableforge_anvil_agent-0.1.0/src/anvil/tui/dashboard.py +103 -0
  56. fableforge_anvil_agent-0.1.0/src/anvil/verify/__init__.py +4 -0
  57. fableforge_anvil_agent-0.1.0/src/anvil/verify/pipeline.py +266 -0
  58. fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/PKG-INFO +289 -0
  59. fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/SOURCES.txt +76 -0
  60. fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/dependency_links.txt +1 -0
  61. fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/entry_points.txt +7 -0
  62. fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/requires.txt +31 -0
  63. fableforge_anvil_agent-0.1.0/src/fableforge_anvil_agent.egg-info/top_level.txt +1 -0
  64. fableforge_anvil_agent-0.1.0/tests/test_agents.py +489 -0
  65. fableforge_anvil_agent-0.1.0/tests/test_anvil.py +333 -0
  66. fableforge_anvil_agent-0.1.0/tests/test_commands.py +262 -0
  67. fableforge_anvil_agent-0.1.0/tests/test_compaction.py +217 -0
  68. fableforge_anvil_agent-0.1.0/tests/test_config_v2.py +372 -0
  69. fableforge_anvil_agent-0.1.0/tests/test_daemon.py +161 -0
  70. fableforge_anvil_agent-0.1.0/tests/test_engine.py +392 -0
  71. fableforge_anvil_agent-0.1.0/tests/test_mcp.py +325 -0
  72. fableforge_anvil_agent-0.1.0/tests/test_models.py +319 -0
  73. fableforge_anvil_agent-0.1.0/tests/test_new_tools.py +442 -0
  74. fableforge_anvil_agent-0.1.0/tests/test_permissions.py +169 -0
  75. fableforge_anvil_agent-0.1.0/tests/test_rules.py +228 -0
  76. fableforge_anvil_agent-0.1.0/tests/test_snapshot.py +201 -0
  77. fableforge_anvil_agent-0.1.0/tests/test_tools.py +463 -0
  78. fableforge_anvil_agent-0.1.0/tests/test_verify_pipeline.py +336 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 FableForge Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,289 @@
1
+ Metadata-Version: 2.4
2
+ Name: fableforge-anvil-agent
3
+ Version: 0.1.0
4
+ Summary: The open-source, self-verified coding agent. Generate → Execute → Verify → Recover.
5
+ Author-email: FableForge <team@fableforge.ai>
6
+ License: MIT
7
+ Keywords: agent,coding,verification,self-healing,llm,open-source
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: Software Development :: Code Generators
14
+ Classifier: Topic :: Software Development :: Testing
15
+ Requires-Python: >=3.10
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: rich>=13.0
19
+ Requires-Dist: click>=8.1
20
+ Requires-Dist: pydantic>=2.0
21
+ Requires-Dist: httpx>=0.25
22
+ Requires-Dist: tiktoken>=0.5
23
+ Requires-Dist: pathspec>=0.11
24
+ Requires-Dist: tree-sitter>=0.20
25
+ Requires-Dist: prompt-toolkit>=3.0
26
+ Requires-Dist: jsonschema>=4.0
27
+ Requires-Dist: python-slugify>=8.0
28
+ Provides-Extra: local
29
+ Requires-Dist: llama-cpp-python>=0.2; extra == "local"
30
+ Requires-Dist: onnxruntime>=1.16; extra == "local"
31
+ Provides-Extra: api
32
+ Requires-Dist: openai>=1.0; extra == "api"
33
+ Requires-Dist: anthropic>=0.18; extra == "api"
34
+ Provides-Extra: tui
35
+ Requires-Dist: textual>=0.47; extra == "tui"
36
+ Requires-Dist: textual-dev>=0.4; extra == "tui"
37
+ Provides-Extra: all
38
+ Requires-Dist: fableforge-anvil-agent[api,local,tui]; extra == "all"
39
+ Provides-Extra: dev
40
+ Requires-Dist: pytest>=7.0; extra == "dev"
41
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
42
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
43
+ Requires-Dist: ruff>=0.1; extra == "dev"
44
+ Dynamic: license-file
45
+
46
+ <picture>
47
+ <source media="(prefers-color-scheme: dark)" srcset="docs/assets/logo-dark.svg">
48
+ <source media="(prefers-color-scheme: light)" srcset="docs/assets/logo-light.svg">
49
+ <img alt="Anvil" src="docs/assets/logo-light.svg" width="400">
50
+ </picture>
51
+
52
+ # Anvil — The Self-Verified Coding Agent
53
+
54
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/) [![Tests](https://img.shields.io/badge/tests-278+-green.svg)](tests/)
55
+
56
+
57
+ > **Generate → Execute → Verify → Recover**
58
+
59
+ Every other open agent generates and hopes. **Anvil generates, runs, checks, and fixes** — because it was trained on 210,000 examples of real agents doing exactly that.
60
+
61
+ This isn't prompt engineering. This is **behavior engineering**.
62
+
63
+ ---
64
+
65
+ ## Why Anvil?
66
+
67
+ | Other Agents | Anvil |
68
+ |---|---|
69
+ | Generate code and hope it works | Generate code, then **verify it works** |
70
+ | No error recovery | **Self-healing** with 3 retry attempts |
71
+ | One-shot output | **Iterative** Plan→Execute→Verify→Recover loop |
72
+ | No cost awareness | **Token tracking + model routing** for cost optimization |
73
+ | Black box | **Full session tracking**, verify reports, telemetry |
74
+ | Requires expensive API | Runs **fully local** with ShellWhisperer (1.5B) |
75
+
76
+ ## The Verification Loop
77
+
78
+ ```
79
+ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐
80
+ │ PLAN │────▶│ EXEC │────▶│VERIFY│────▶│ DONE │
81
+ └──────┘ └──────┘ └──┬───┘ └──────┘
82
+ │ Fail
83
+
84
+ ┌──────┐
85
+ │RECOVR│────▶ back to EXEC
86
+ └──────┘
87
+ ```
88
+
89
+ Anvil doesn't just write code. It **verifies** every change:
90
+
91
+ 1. **Syntax check** — Does the code parse?
92
+ 2. **Test run** — Do the tests pass?
93
+ 3. **Lint check** — Is the code clean?
94
+ 4. **Import check** — Are dependencies valid?
95
+
96
+ If verification fails, Anvil **diagnoses the error, generates a fix, and re-verifies**. Up to 3 retry cycles. This isn't optional — it's the core loop.
97
+
98
+ ## Quick Start
99
+
100
+ ```bash
101
+ pip install anvil-agent
102
+
103
+ # Run with local model (ollama)
104
+ anvil run "Add error handling to main.py"
105
+
106
+ # Run with API model
107
+ anvil run -m gpt-4o "Refactor the auth module"
108
+
109
+ # Interactive chat with verification
110
+ anvil chat
111
+
112
+ # Verify existing code
113
+ anvil verify src/
114
+
115
+ # Start as persistent daemon
116
+ anvil daemon --port 8765
117
+
118
+ # List past sessions
119
+ anvil sessions
120
+ ```
121
+
122
+ ## The Name
123
+
124
+ **Anvil** — where code gets forged, hammered, and tested until it holds.
125
+
126
+ Every blacksmith knows: you don't just shape metal on the anvil. You **test** it. You strike it, check it, and if it's not right, you heat it again and hammer it until it is. That's what this agent does with code.
127
+
128
+ **Other agents shape and ship. Anvil shapes, verifies, and only then ships.**
129
+
130
+ ## Architecture
131
+
132
+ ```
133
+ anvil/
134
+ ├── core/
135
+ │ ├── engine.py # Plan→Execute→Verify→Recover loop
136
+ │ ├── config.py # 7-layer configuration system
137
+ │ └── session.py # Full session tracking + persistence
138
+ ├── tools/
139
+ │ └── executor.py # Bash, Read, Write, Edit, Grep, Glob, LS
140
+ ├── verify/
141
+ │ └── pipeline.py # Syntax, test, lint, import verification
142
+ ├── models/
143
+ │ └── registry.py # Local (ollama), OpenAI, Anthropic + cost tracking
144
+ ├── integrations/
145
+ │ ├── verifyloop.py # VerifyLoop framework integration
146
+ │ ├── error_recovery.py # ErrorRecovery engine integration
147
+ │ ├── agent_swarm.py # AgentSwarm coordination integration
148
+ │ └── cost_optimizer.py # CostOptimizer routing integration
149
+ ├── daemon/
150
+ │ └── server.py # Persistent HTTP daemon mode
151
+ ├── tui/
152
+ │ └── dashboard.py # Rich terminal dashboard
153
+ └── cli.py # run, chat, verify, daemon, sessions, models
154
+ ```
155
+
156
+ ## The FableForge Ecosystem
157
+
158
+ Anvil is the flagship product of the **FableForge** ecosystem — 21 open-source projects built from 210K real agent traces:
159
+
160
+ | Project | What It Does |
161
+ |---|---|
162
+ | **Anvil** | Self-verified coding agent (this one) |
163
+ | VerifyLoop | Plan→Execute→Verify→Recover framework |
164
+ | ErrorRecovery | Self-healing middleware (3,725 error examples) |
165
+ | FableForge-14B | The fine-tuned model (4-stage training) |
166
+ | ShellWhisperer | 1.5B edge agent (phone/RPi, 50ms) |
167
+ | ReasonCritic | Verification model (130 benchmark tasks) |
168
+ | TraceCompiler | Compile traces → LoRA skills |
169
+ | AgentRuntime | Persistent agent daemon (systemd for AI) |
170
+ | AgentSwarm | Multi-agent from real trace transitions |
171
+ | AgentTelemetry | Datadog for agents (token tracking, costs) |
172
+ | BenchAgent | HumanEval for tool-use (107 tasks) |
173
+ | AgentDev | VSCode extension with verification |
174
+ | TraceViz | Trace replay visualizer (Next.js) |
175
+ | AgentSkills.org | npm for agent behaviors |
176
+ | AgentCurriculum | 5-stage progressive training |
177
+ | AgentFuzzer | Adversarial testing for agents |
178
+ | AgentConstitution | Safety guardrails from traces |
179
+ | CostOptimizer | Token cost reduction (50-80%) |
180
+ | AgentProfiler | Behavioral fingerprinting |
181
+ | TrajectoryDistiller | Trace→training data pipeline |
182
+ | Fable5-Dataset | HuggingFace dataset release |
183
+
184
+ ## Configuration
185
+
186
+ Create `.anvil.json` in your project root:
187
+
188
+ ```json
189
+ {
190
+ "model": {
191
+ "model": "local",
192
+ "temperature": 0.2,
193
+ "max_tokens": 4096
194
+ },
195
+ "verify": {
196
+ "enabled": true,
197
+ "auto_recover": true,
198
+ "max_retries": 3,
199
+ "check_syntax": true,
200
+ "check_tests": true,
201
+ "check_lint": true
202
+ },
203
+ "tools": {
204
+ "allow_shell": true,
205
+ "sandbox": false
206
+ },
207
+ "safety": {
208
+ "constitution_enabled": true,
209
+ "blocked_commands": ["rm -rf /", "mkfs"],
210
+ "require_confirmation_for": ["git push", "DROP TABLE"]
211
+ },
212
+ "cost": {
213
+ "max_cost_per_session_usd": 5.0,
214
+ "route_by_complexity": true,
215
+ "simple_model": "local",
216
+ "complex_model": "gpt-4o"
217
+ }
218
+ }
219
+ ```
220
+
221
+ ## Daemon Mode
222
+
223
+ Run Anvil as a persistent server:
224
+
225
+ ```bash
226
+ anvil daemon --port 8765
227
+ ```
228
+
229
+ ```bash
230
+ curl -X POST http://localhost:8765/run \
231
+ -H "Content-Type: application/json" \
232
+ -d '{"task": "Add input validation to all API endpoints"}'
233
+ ```
234
+
235
+ ## Model Backends
236
+
237
+ | Model | Type | Input $/1M | Output $/1M |
238
+ |---|---|---|---|
239
+ | local (fableforge-14b) | Local | Free | Free |
240
+ | gpt-4o | API | $2.50 | $10.00 |
241
+ | gpt-4o-mini | API | $0.15 | $0.60 |
242
+ | o3-mini | API | $1.10 | $4.40 |
243
+ | claude-3.5-sonnet | API | $3.00 | $15.00 |
244
+ | claude-3.5-haiku | API | $0.80 | $4.00 |
245
+
246
+ ## How It's Different
247
+
248
+ ### Trained on Real Behavior
249
+
250
+ The FableForge model was trained on 210K examples from real agent traces:
251
+ - **87.7% planning rate** — agents plan before they act
252
+ - **39.5% error recovery rate** — agents that hit errors and recover
253
+ - **1,311-step trace** — the Boeing 747 trace proves agents need persistent runtime
254
+ - **31 tools** mapped — transition matrices drive swarm coordination
255
+
256
+ ### Verification Is Not Optional
257
+
258
+ Other agents: "Here's the code, hope it works."
259
+
260
+ Anvil: "Here's the code. I ran it. Tests pass. Lint is clean. Imports resolve. Here's the proof."
261
+
262
+ ### Self-Healing
263
+
264
+ When verification fails, Anvil doesn't just report the error. It **reads the error, generates a fix, applies it, and re-verifies**. This is the ErrorRecovery engine with 3,725 real error examples baked in.
265
+
266
+ ### Ecosystem Integration
267
+
268
+ Anvil doesn't work alone. It's wired into the full FableForge stack:
269
+ - **VerifyLoop** → Sophisticated multi-step verification
270
+ - **ErrorRecovery** → Pattern-matched error resolution from real traces
271
+ - **AgentSwarm** → Multi-agent coordination via transition matrices
272
+ - **CostOptimizer** → Automatic model routing based on task complexity
273
+ - **AgentConstitution** → Safety guardrails from analysis of real traces
274
+
275
+ ## License
276
+
277
+ MIT
278
+
279
+ ## Built With
280
+
281
+ - 210,000+ real agent traces from the Fable-5 dataset collection
282
+ - 87.7% planning rate behavioral signal
283
+ - 39.5% error recovery success rate
284
+ - 303 tool calls in a single session (Boeing 747 trace)
285
+ - 5 specialized micro-models (ShellWhisperer, ReasonCritic, etc.)
286
+
287
+ ---
288
+
289
+ **Anvil: Forge your code. Verify it holds.**
@@ -0,0 +1,244 @@
1
+ <picture>
2
+ <source media="(prefers-color-scheme: dark)" srcset="docs/assets/logo-dark.svg">
3
+ <source media="(prefers-color-scheme: light)" srcset="docs/assets/logo-light.svg">
4
+ <img alt="Anvil" src="docs/assets/logo-light.svg" width="400">
5
+ </picture>
6
+
7
+ # Anvil — The Self-Verified Coding Agent
8
+
9
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE) [![Python 3.10+](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/) [![Tests](https://img.shields.io/badge/tests-278+-green.svg)](tests/)
10
+
11
+
12
+ > **Generate → Execute → Verify → Recover**
13
+
14
+ Every other open agent generates and hopes. **Anvil generates, runs, checks, and fixes** — because it was trained on 210,000 examples of real agents doing exactly that.
15
+
16
+ This isn't prompt engineering. This is **behavior engineering**.
17
+
18
+ ---
19
+
20
+ ## Why Anvil?
21
+
22
+ | Other Agents | Anvil |
23
+ |---|---|
24
+ | Generate code and hope it works | Generate code, then **verify it works** |
25
+ | No error recovery | **Self-healing** with 3 retry attempts |
26
+ | One-shot output | **Iterative** Plan→Execute→Verify→Recover loop |
27
+ | No cost awareness | **Token tracking + model routing** for cost optimization |
28
+ | Black box | **Full session tracking**, verify reports, telemetry |
29
+ | Requires expensive API | Runs **fully local** with ShellWhisperer (1.5B) |
30
+
31
+ ## The Verification Loop
32
+
33
+ ```
34
+ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐
35
+ │ PLAN │────▶│ EXEC │────▶│VERIFY│────▶│ DONE │
36
+ └──────┘ └──────┘ └──┬───┘ └──────┘
37
+ │ Fail
38
+
39
+ ┌──────┐
40
+ │RECOVR│────▶ back to EXEC
41
+ └──────┘
42
+ ```
43
+
44
+ Anvil doesn't just write code. It **verifies** every change:
45
+
46
+ 1. **Syntax check** — Does the code parse?
47
+ 2. **Test run** — Do the tests pass?
48
+ 3. **Lint check** — Is the code clean?
49
+ 4. **Import check** — Are dependencies valid?
50
+
51
+ If verification fails, Anvil **diagnoses the error, generates a fix, and re-verifies**. Up to 3 retry cycles. This isn't optional — it's the core loop.
52
+
53
+ ## Quick Start
54
+
55
+ ```bash
56
+ pip install anvil-agent
57
+
58
+ # Run with local model (ollama)
59
+ anvil run "Add error handling to main.py"
60
+
61
+ # Run with API model
62
+ anvil run -m gpt-4o "Refactor the auth module"
63
+
64
+ # Interactive chat with verification
65
+ anvil chat
66
+
67
+ # Verify existing code
68
+ anvil verify src/
69
+
70
+ # Start as persistent daemon
71
+ anvil daemon --port 8765
72
+
73
+ # List past sessions
74
+ anvil sessions
75
+ ```
76
+
77
+ ## The Name
78
+
79
+ **Anvil** — where code gets forged, hammered, and tested until it holds.
80
+
81
+ Every blacksmith knows: you don't just shape metal on the anvil. You **test** it. You strike it, check it, and if it's not right, you heat it again and hammer it until it is. That's what this agent does with code.
82
+
83
+ **Other agents shape and ship. Anvil shapes, verifies, and only then ships.**
84
+
85
+ ## Architecture
86
+
87
+ ```
88
+ anvil/
89
+ ├── core/
90
+ │ ├── engine.py # Plan→Execute→Verify→Recover loop
91
+ │ ├── config.py # 7-layer configuration system
92
+ │ └── session.py # Full session tracking + persistence
93
+ ├── tools/
94
+ │ └── executor.py # Bash, Read, Write, Edit, Grep, Glob, LS
95
+ ├── verify/
96
+ │ └── pipeline.py # Syntax, test, lint, import verification
97
+ ├── models/
98
+ │ └── registry.py # Local (ollama), OpenAI, Anthropic + cost tracking
99
+ ├── integrations/
100
+ │ ├── verifyloop.py # VerifyLoop framework integration
101
+ │ ├── error_recovery.py # ErrorRecovery engine integration
102
+ │ ├── agent_swarm.py # AgentSwarm coordination integration
103
+ │ └── cost_optimizer.py # CostOptimizer routing integration
104
+ ├── daemon/
105
+ │ └── server.py # Persistent HTTP daemon mode
106
+ ├── tui/
107
+ │ └── dashboard.py # Rich terminal dashboard
108
+ └── cli.py # run, chat, verify, daemon, sessions, models
109
+ ```
110
+
111
+ ## The FableForge Ecosystem
112
+
113
+ Anvil is the flagship product of the **FableForge** ecosystem — 21 open-source projects built from 210K real agent traces:
114
+
115
+ | Project | What It Does |
116
+ |---|---|
117
+ | **Anvil** | Self-verified coding agent (this one) |
118
+ | VerifyLoop | Plan→Execute→Verify→Recover framework |
119
+ | ErrorRecovery | Self-healing middleware (3,725 error examples) |
120
+ | FableForge-14B | The fine-tuned model (4-stage training) |
121
+ | ShellWhisperer | 1.5B edge agent (phone/RPi, 50ms) |
122
+ | ReasonCritic | Verification model (130 benchmark tasks) |
123
+ | TraceCompiler | Compile traces → LoRA skills |
124
+ | AgentRuntime | Persistent agent daemon (systemd for AI) |
125
+ | AgentSwarm | Multi-agent from real trace transitions |
126
+ | AgentTelemetry | Datadog for agents (token tracking, costs) |
127
+ | BenchAgent | HumanEval for tool-use (107 tasks) |
128
+ | AgentDev | VSCode extension with verification |
129
+ | TraceViz | Trace replay visualizer (Next.js) |
130
+ | AgentSkills.org | npm for agent behaviors |
131
+ | AgentCurriculum | 5-stage progressive training |
132
+ | AgentFuzzer | Adversarial testing for agents |
133
+ | AgentConstitution | Safety guardrails from traces |
134
+ | CostOptimizer | Token cost reduction (50-80%) |
135
+ | AgentProfiler | Behavioral fingerprinting |
136
+ | TrajectoryDistiller | Trace→training data pipeline |
137
+ | Fable5-Dataset | HuggingFace dataset release |
138
+
139
+ ## Configuration
140
+
141
+ Create `.anvil.json` in your project root:
142
+
143
+ ```json
144
+ {
145
+ "model": {
146
+ "model": "local",
147
+ "temperature": 0.2,
148
+ "max_tokens": 4096
149
+ },
150
+ "verify": {
151
+ "enabled": true,
152
+ "auto_recover": true,
153
+ "max_retries": 3,
154
+ "check_syntax": true,
155
+ "check_tests": true,
156
+ "check_lint": true
157
+ },
158
+ "tools": {
159
+ "allow_shell": true,
160
+ "sandbox": false
161
+ },
162
+ "safety": {
163
+ "constitution_enabled": true,
164
+ "blocked_commands": ["rm -rf /", "mkfs"],
165
+ "require_confirmation_for": ["git push", "DROP TABLE"]
166
+ },
167
+ "cost": {
168
+ "max_cost_per_session_usd": 5.0,
169
+ "route_by_complexity": true,
170
+ "simple_model": "local",
171
+ "complex_model": "gpt-4o"
172
+ }
173
+ }
174
+ ```
175
+
176
+ ## Daemon Mode
177
+
178
+ Run Anvil as a persistent server:
179
+
180
+ ```bash
181
+ anvil daemon --port 8765
182
+ ```
183
+
184
+ ```bash
185
+ curl -X POST http://localhost:8765/run \
186
+ -H "Content-Type: application/json" \
187
+ -d '{"task": "Add input validation to all API endpoints"}'
188
+ ```
189
+
190
+ ## Model Backends
191
+
192
+ | Model | Type | Input $/1M | Output $/1M |
193
+ |---|---|---|---|
194
+ | local (fableforge-14b) | Local | Free | Free |
195
+ | gpt-4o | API | $2.50 | $10.00 |
196
+ | gpt-4o-mini | API | $0.15 | $0.60 |
197
+ | o3-mini | API | $1.10 | $4.40 |
198
+ | claude-3.5-sonnet | API | $3.00 | $15.00 |
199
+ | claude-3.5-haiku | API | $0.80 | $4.00 |
200
+
201
+ ## How It's Different
202
+
203
+ ### Trained on Real Behavior
204
+
205
+ The FableForge model was trained on 210K examples from real agent traces:
206
+ - **87.7% planning rate** — agents plan before they act
207
+ - **39.5% error recovery rate** — agents that hit errors and recover
208
+ - **1,311-step trace** — the Boeing 747 trace proves agents need persistent runtime
209
+ - **31 tools** mapped — transition matrices drive swarm coordination
210
+
211
+ ### Verification Is Not Optional
212
+
213
+ Other agents: "Here's the code, hope it works."
214
+
215
+ Anvil: "Here's the code. I ran it. Tests pass. Lint is clean. Imports resolve. Here's the proof."
216
+
217
+ ### Self-Healing
218
+
219
+ When verification fails, Anvil doesn't just report the error. It **reads the error, generates a fix, applies it, and re-verifies**. This is the ErrorRecovery engine with 3,725 real error examples baked in.
220
+
221
+ ### Ecosystem Integration
222
+
223
+ Anvil doesn't work alone. It's wired into the full FableForge stack:
224
+ - **VerifyLoop** → Sophisticated multi-step verification
225
+ - **ErrorRecovery** → Pattern-matched error resolution from real traces
226
+ - **AgentSwarm** → Multi-agent coordination via transition matrices
227
+ - **CostOptimizer** → Automatic model routing based on task complexity
228
+ - **AgentConstitution** → Safety guardrails from analysis of real traces
229
+
230
+ ## License
231
+
232
+ MIT
233
+
234
+ ## Built With
235
+
236
+ - 210,000+ real agent traces from the Fable-5 dataset collection
237
+ - 87.7% planning rate behavioral signal
238
+ - 39.5% error recovery success rate
239
+ - 303 tool calls in a single session (Boeing 747 trace)
240
+ - 5 specialized micro-models (ShellWhisperer, ReasonCritic, etc.)
241
+
242
+ ---
243
+
244
+ **Anvil: Forge your code. Verify it holds.**
@@ -0,0 +1,64 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "fableforge-anvil-agent"
7
+ version = "0.1.0"
8
+ description = "The open-source, self-verified coding agent. Generate → Execute → Verify → Recover."
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.10"
12
+ authors = [{name = "FableForge", email = "team@fableforge.ai"}]
13
+ keywords = ["agent", "coding", "verification", "self-healing", "llm", "open-source"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Environment :: Console",
17
+ "Intended Audience :: Developers",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Topic :: Software Development :: Code Generators",
21
+ "Topic :: Software Development :: Testing",
22
+ ]
23
+
24
+ dependencies = [
25
+ "rich>=13.0",
26
+ "click>=8.1",
27
+ "pydantic>=2.0",
28
+ "httpx>=0.25",
29
+ "tiktoken>=0.5",
30
+ "pathspec>=0.11",
31
+ "tree-sitter>=0.20",
32
+ "prompt-toolkit>=3.0",
33
+ "jsonschema>=4.0",
34
+ "python-slugify>=8.0",
35
+ ]
36
+
37
+ [project.optional-dependencies]
38
+ local = ["llama-cpp-python>=0.2", "onnxruntime>=1.16"]
39
+ api = ["openai>=1.0", "anthropic>=0.18"]
40
+ tui = ["textual>=0.47", "textual-dev>=0.4"]
41
+ all = ["fableforge-anvil-agent[local,api,tui]"]
42
+ dev = ["pytest>=7.0", "pytest-asyncio>=0.21", "pytest-cov>=4.0", "ruff>=0.1"]
43
+
44
+ [project.scripts]
45
+ anvil = "anvil.cli:main"
46
+
47
+ [project.entry-points."anvil.models"]
48
+ local = "anvil.models.local:LocalModel"
49
+ openai = "anvil.models.openai_model:OpenAIModel"
50
+ anthropic = "anvil.models.anthropic_model:AnthropicModel"
51
+
52
+ [tool.setuptools.packages.find]
53
+ where = ["src"]
54
+
55
+ [tool.pytest.ini_options]
56
+ testpaths = ["tests"]
57
+ asyncio_mode = "auto"
58
+
59
+ [tool.ruff]
60
+ target-version = "py310"
61
+ line-length = 100
62
+
63
+ [tool.ruff.lint]
64
+ select = ["E", "F", "I", "N", "W", "UP"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+