open-autoforge 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- open_autoforge-0.1.0/LICENSE +21 -0
- open_autoforge-0.1.0/PKG-INFO +266 -0
- open_autoforge-0.1.0/README.md +235 -0
- open_autoforge-0.1.0/pyproject.toml +52 -0
- open_autoforge-0.1.0/setup.cfg +4 -0
- open_autoforge-0.1.0/src/autoforge/__init__.py +3 -0
- open_autoforge-0.1.0/src/autoforge/__main__.py +327 -0
- open_autoforge-0.1.0/src/autoforge/adapters/__init__.py +1 -0
- open_autoforge-0.1.0/src/autoforge/adapters/base.py +42 -0
- open_autoforge-0.1.0/src/autoforge/adapters/complexity.py +124 -0
- open_autoforge-0.1.0/src/autoforge/adapters/test_quality.py +949 -0
- open_autoforge-0.1.0/src/autoforge/budget.py +102 -0
- open_autoforge-0.1.0/src/autoforge/git_manager.py +137 -0
- open_autoforge-0.1.0/src/autoforge/models.py +299 -0
- open_autoforge-0.1.0/src/autoforge/registry.py +113 -0
- open_autoforge-0.1.0/src/autoforge/regression.py +161 -0
- open_autoforge-0.1.0/src/autoforge/reporting.py +209 -0
- open_autoforge-0.1.0/src/autoforge/runner.py +432 -0
- open_autoforge-0.1.0/src/autoforge/workflows/__init__.py +1 -0
- open_autoforge-0.1.0/src/open_autoforge.egg-info/PKG-INFO +266 -0
- open_autoforge-0.1.0/src/open_autoforge.egg-info/SOURCES.txt +36 -0
- open_autoforge-0.1.0/src/open_autoforge.egg-info/dependency_links.txt +1 -0
- open_autoforge-0.1.0/src/open_autoforge.egg-info/entry_points.txt +2 -0
- open_autoforge-0.1.0/src/open_autoforge.egg-info/requires.txt +11 -0
- open_autoforge-0.1.0/src/open_autoforge.egg-info/top_level.txt +1 -0
- open_autoforge-0.1.0/tests/test___main__.py +261 -0
- open_autoforge-0.1.0/tests/test_adapters.py +207 -0
- open_autoforge-0.1.0/tests/test_base.py +53 -0
- open_autoforge-0.1.0/tests/test_budget.py +161 -0
- open_autoforge-0.1.0/tests/test_cli.py +316 -0
- open_autoforge-0.1.0/tests/test_complexity.py +133 -0
- open_autoforge-0.1.0/tests/test_git_manager.py +190 -0
- open_autoforge-0.1.0/tests/test_models.py +300 -0
- open_autoforge-0.1.0/tests/test_registry.py +162 -0
- open_autoforge-0.1.0/tests/test_regression.py +295 -0
- open_autoforge-0.1.0/tests/test_reporting.py +201 -0
- open_autoforge-0.1.0/tests/test_runner.py +661 -0
- open_autoforge-0.1.0/tests/test_test_quality_adapter.py +1484 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Liping Zhang
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: open-autoforge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Autonomous metric-driven agentic coding framework
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/zhanglpg/autoforge
|
|
7
|
+
Project-URL: Repository, https://github.com/zhanglpg/autoforge
|
|
8
|
+
Keywords: code-quality,autonomous,agentic,metrics,refactoring
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Requires-Dist: pyyaml>=6.0
|
|
22
|
+
Requires-Dist: complexity-accounting>=1.6.0
|
|
23
|
+
Provides-Extra: test
|
|
24
|
+
Requires-Dist: pytest>=7.0; extra == "test"
|
|
25
|
+
Requires-Dist: pytest-cov>=4.0; extra == "test"
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
29
|
+
Requires-Dist: ruff>=0.4.0; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# AutoForge
|
|
33
|
+
|
|
34
|
+
Autonomous metric-driven agentic coding framework. Generalizes the pattern:
|
|
35
|
+
|
|
36
|
+
**measure → agent acts → re-measure → iterate until target met**
|
|
37
|
+
|
|
38
|
+
AutoForge wraps any code-quality metric into an iterative improvement loop driven by an AI coding agent (e.g., Claude Code). Define a target metric, set budget limits, and let the framework orchestrate measurement, agent action, regression testing, and git management automatically.
|
|
39
|
+
|
|
40
|
+
## Features
|
|
41
|
+
|
|
42
|
+
- **Metric-driven iteration loop** — measure, act, validate, repeat until the target is reached or budget is exhausted.
|
|
43
|
+
- **Pluggable metric adapters** — bring your own measurement tool. Built-in adapters for code complexity (NCS via `complexity-accounting`) and test quality (coverage + assertion analysis).
|
|
44
|
+
- **Budget management** — hard limits on iterations, tokens, and wall-clock time, plus automatic stall detection when improvements plateau.
|
|
45
|
+
- **Regression guard** — runs your test suite between iterations and enforces constraint metrics so improvements never break existing behavior.
|
|
46
|
+
- **Git integration** — automatic branch creation, per-iteration commits, and rollback on failed iterations.
|
|
47
|
+
- **YAML workflow configs** — declarative workflow definitions that specify metrics, budgets, constraints, agent prompts, and language-specific tooling.
|
|
48
|
+
- **Reporting** — JSON and Markdown run reports with health dashboards.
|
|
49
|
+
|
|
50
|
+
## Prerequisites
|
|
51
|
+
|
|
52
|
+
AutoForge requires a **local AI coding agent** to perform the actual code modifications. The agent is configurable — it defaults to [Claude Code](https://docs.anthropic.com/en/docs/claude-code) but can be changed per workflow or at the command line.
|
|
53
|
+
|
|
54
|
+
- The configured agent binary must be installed and available on your `PATH`. AutoForge checks this at startup and **fails fast** with a clear error if the agent cannot be found.
|
|
55
|
+
- By default, AutoForge invokes `claude --print --output-format json -p "<prompt>"` as a subprocess each iteration.
|
|
56
|
+
- To use a different agent, set `agent.command` in your workflow YAML or use the `--agent-command` CLI flag (see [Agent Integration](#agent-integration)).
|
|
57
|
+
|
|
58
|
+
AutoForge itself does **not** call the Claude API directly — it orchestrates the iteration loop (measure, budget, git, regression) and delegates code changes to the local agent process.
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install autoforge
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
For development:
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
git clone https://github.com/zhanglpg/autoforge.git
|
|
70
|
+
cd autoforge
|
|
71
|
+
pip install -e ".[dev]"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Requires Python 3.10+.
|
|
75
|
+
|
|
76
|
+
## Quick Start
|
|
77
|
+
|
|
78
|
+
### Run a workflow
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Reduce code complexity in ./src to a target NCS of 3.0
|
|
82
|
+
autoforge run complexity_refactor --path ./src --target 3.0
|
|
83
|
+
|
|
84
|
+
# Improve test quality to 80% score
|
|
85
|
+
autoforge run test_quality --path ./src --target 80.0
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Check project health
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Run all metric adapters and show a health dashboard
|
|
92
|
+
autoforge health --path ./src
|
|
93
|
+
|
|
94
|
+
# Output as JSON
|
|
95
|
+
autoforge health --path ./src --format json
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### List available workflows and adapters
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
autoforge list
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## CLI Reference
|
|
105
|
+
|
|
106
|
+
### `autoforge run <workflow>`
|
|
107
|
+
|
|
108
|
+
Execute a metric-driven improvement workflow.
|
|
109
|
+
|
|
110
|
+
| Flag | Description |
|
|
111
|
+
|---|---|
|
|
112
|
+
| `--path, -p` | Target path to improve (default: repo root) |
|
|
113
|
+
| `--repo, -r` | Repository root (default: `.`) |
|
|
114
|
+
| `--target, -t` | Target metric value to achieve |
|
|
115
|
+
| `--adapter, -a` | Metric adapter override |
|
|
116
|
+
| `--config, -c` | Path to a custom workflow YAML |
|
|
117
|
+
| `--max-iterations` | Override max iteration count |
|
|
118
|
+
| `--max-tokens` | Override max token budget |
|
|
119
|
+
| `--max-time` | Override max wall-clock time (minutes) |
|
|
120
|
+
| `--test-command` | Custom test command for regression guard |
|
|
121
|
+
| `--skip-tests` | Skip test validation between iterations |
|
|
122
|
+
| `--skip-git` | Skip git branch/commit management |
|
|
123
|
+
| `--dry-run` | Measure only, don't run the agent |
|
|
124
|
+
| `--agent-command` | Custom agent command (overrides workflow `agent.command`; used as-is) |
|
|
125
|
+
| `--output, -o` | Output directory for reports |
|
|
126
|
+
|
|
127
|
+
### `autoforge health`
|
|
128
|
+
|
|
129
|
+
Run all (or specified) metric adapters and produce a health dashboard.
|
|
130
|
+
|
|
131
|
+
| Flag | Description |
|
|
132
|
+
|---|---|
|
|
133
|
+
| `--path, -p` | Target path to analyze |
|
|
134
|
+
| `--repo, -r` | Repository root (default: `.`) |
|
|
135
|
+
| `--adapters` | Comma-separated adapter names |
|
|
136
|
+
| `--format, -f` | Output format: `text` or `json` |
|
|
137
|
+
| `--output, -o` | Save output to file |
|
|
138
|
+
|
|
139
|
+
### `autoforge list`
|
|
140
|
+
|
|
141
|
+
List all registered workflows and adapters.
|
|
142
|
+
|
|
143
|
+
## Architecture
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
┌─────────────────────────────────────────────────┐
|
|
147
|
+
│ WorkflowRunner │
|
|
148
|
+
│ (measure → act → validate loop) │
|
|
149
|
+
├────────────┬────────────┬───────────┬────────────┤
|
|
150
|
+
│ BudgetMgr │ GitMgr │ Regression│ Reporting │
|
|
151
|
+
│ (limits, │ (branch, │ Guard │ (JSON, │
|
|
152
|
+
│ stall │ commit, │ (tests, │ markdown,│
|
|
153
|
+
│ detect) │ rollback)│ checks) │ health) │
|
|
154
|
+
├────────────┴────────────┴───────────┴────────────┤
|
|
155
|
+
│ MetricAdapter (pluggable) │
|
|
156
|
+
│ complexity · test_quality · your own ... │
|
|
157
|
+
└─────────────────────────────────────────────────-┘
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
- **WorkflowRunner** — orchestrates the iteration loop: measure baseline, invoke agent, re-measure, validate, commit or rollback.
|
|
161
|
+
- **Agent (Claude Code)** — the external coding agent invoked as a subprocess each iteration. The runner constructs a prompt with the current metric value, target, and priority files, then calls `claude --print --output-format json`. Use `--agent-command` to substitute a different agent.
|
|
162
|
+
- **BudgetManager** — enforces iteration/token/time limits and detects improvement stalls. Token usage is parsed best-effort from the agent's JSON output or stderr.
|
|
163
|
+
- **GitManager** — creates feature branches, commits per iteration, rolls back failed iterations.
|
|
164
|
+
- **RegressionGuard** — runs tests and checks constraint metrics between iterations.
|
|
165
|
+
- **MetricAdapter** — protocol for plugging in any measurement tool. Adapters normalize tool output into a standard `MetricResult` and identify priority files for the agent to focus on.
|
|
166
|
+
|
|
167
|
+
## Agent Integration
|
|
168
|
+
|
|
169
|
+
AutoForge follows a **subprocess-based agent model**: the framework handles everything except the actual code changes, which are delegated to a local coding agent.
|
|
170
|
+
|
|
171
|
+
### Default: Claude Code
|
|
172
|
+
|
|
173
|
+
Each iteration, the `WorkflowRunner`:
|
|
174
|
+
|
|
175
|
+
1. Calls `adapter.identify_targets()` to find priority files needing improvement.
|
|
176
|
+
2. Builds a structured prompt containing: iteration number, current metric value, target, direction (minimize/maximize), priority file list, and any `system_prompt_addendum` from the workflow YAML.
|
|
177
|
+
3. Writes the prompt to a temp file and invokes:
|
|
178
|
+
```
|
|
179
|
+
claude --print --output-format json -p "$(cat <prompt_file>)"
|
|
180
|
+
```
|
|
181
|
+
4. Parses token usage from the agent's JSON output (or stderr) for budget tracking.
|
|
182
|
+
|
|
183
|
+
The agent runs inside the repository working directory and directly modifies files on disk. After the agent returns, AutoForge re-measures, validates (tests + constraints), and commits or rolls back.
|
|
184
|
+
|
|
185
|
+
### Custom Agents
|
|
186
|
+
|
|
187
|
+
Use `--agent-command` to substitute any command that accepts a prompt on stdin or as an argument:
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
# Use a custom script
|
|
191
|
+
autoforge run complexity_refactor --path ./src --target 3.0 \
|
|
192
|
+
--agent-command "python my_agent.py"
|
|
193
|
+
|
|
194
|
+
# Use a different CLI tool
|
|
195
|
+
autoforge run complexity_refactor --path ./src --target 3.0 \
|
|
196
|
+
--agent-command "aider --message"
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### Workflow YAML Agent Config
|
|
200
|
+
|
|
201
|
+
Each workflow YAML can include an `agent` section to configure the agent binary and provide domain-specific instructions:
|
|
202
|
+
|
|
203
|
+
```yaml
|
|
204
|
+
agent:
|
|
205
|
+
command: "claude" # Agent binary (default: claude). Change to use a different agent.
|
|
206
|
+
skill: "refactor-complexity"
|
|
207
|
+
system_prompt_addendum: |
|
|
208
|
+
You are performing complexity-driven iterative refactoring.
|
|
209
|
+
Prioritize extracting helper functions and reducing nesting depth.
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### Fail-Fast Validation
|
|
213
|
+
|
|
214
|
+
AutoForge verifies the agent binary exists on `PATH` before starting any iterations. If the agent is not found, the run fails immediately with a clear error message suggesting either installing the agent or using `--agent-command`.
|
|
215
|
+
|
|
216
|
+
## Built-in Workflows
|
|
217
|
+
|
|
218
|
+
### `complexity_refactor`
|
|
219
|
+
|
|
220
|
+
Reduces code complexity using [complexity-accounting](https://pypi.org/project/complexity-accounting/) to measure Net Complexity Score (NCS). The agent iteratively refactors mega-functions, dispatch chains, deep nesting, and duplicated logic.
|
|
221
|
+
|
|
222
|
+
### `test_quality`
|
|
223
|
+
|
|
224
|
+
Improves test suite quality by combining coverage measurement, function gap analysis, and assertion quality scoring. The agent generates missing tests and strengthens existing ones.
|
|
225
|
+
|
|
226
|
+
## Adding a New Adapter
|
|
227
|
+
|
|
228
|
+
1. Subclass `BaseMetricAdapter` in `src/autoforge/adapters/`
|
|
229
|
+
2. Implement `check_prerequisites()`, `measure()`, `identify_targets()`
|
|
230
|
+
3. Register in `src/autoforge/registry.py`
|
|
231
|
+
4. Create a workflow YAML in `src/autoforge/workflows/`
|
|
232
|
+
|
|
233
|
+
See `src/autoforge/adapters/complexity.py` for a reference implementation.
|
|
234
|
+
|
|
235
|
+
## Project Structure
|
|
236
|
+
|
|
237
|
+
```
|
|
238
|
+
src/autoforge/
|
|
239
|
+
├── __init__.py # Package version
|
|
240
|
+
├── __main__.py # CLI entry point (run, health, list commands)
|
|
241
|
+
├── models.py # Core data models (MetricResult, WorkflowConfig, RunReport)
|
|
242
|
+
├── runner.py # Workflow runner (measure-act-validate loop)
|
|
243
|
+
├── budget.py # Budget manager (iteration/token/time limits, stall detection)
|
|
244
|
+
├── git_manager.py # Git operations (branch, commit, rollback per iteration)
|
|
245
|
+
├── regression.py # Regression guard (test runner, constraint checking)
|
|
246
|
+
├── reporting.py # Report generation (JSON, markdown, health dashboard)
|
|
247
|
+
├── registry.py # Workflow & adapter registry
|
|
248
|
+
├── adapters/
|
|
249
|
+
│ ├── base.py # BaseMetricAdapter ABC
|
|
250
|
+
│ ├── complexity.py # Complexity adapter (NCS)
|
|
251
|
+
│ └── test_quality.py # Test quality adapter (TQS)
|
|
252
|
+
└── workflows/
|
|
253
|
+
├── complexity_refactor.yaml
|
|
254
|
+
└── test_quality.yaml
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
## Development
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
pip install -e ".[dev]"
|
|
261
|
+
pytest
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
## License
|
|
265
|
+
|
|
266
|
+
MIT
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# AutoForge
|
|
2
|
+
|
|
3
|
+
Autonomous metric-driven agentic coding framework. Generalizes the pattern:
|
|
4
|
+
|
|
5
|
+
**measure → agent acts → re-measure → iterate until target met**
|
|
6
|
+
|
|
7
|
+
AutoForge wraps any code-quality metric into an iterative improvement loop driven by an AI coding agent (e.g., Claude Code). Define a target metric, set budget limits, and let the framework orchestrate measurement, agent action, regression testing, and git management automatically.
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- **Metric-driven iteration loop** — measure, act, validate, repeat until the target is reached or budget is exhausted.
|
|
12
|
+
- **Pluggable metric adapters** — bring your own measurement tool. Built-in adapters for code complexity (NCS via `complexity-accounting`) and test quality (coverage + assertion analysis).
|
|
13
|
+
- **Budget management** — hard limits on iterations, tokens, and wall-clock time, plus automatic stall detection when improvements plateau.
|
|
14
|
+
- **Regression guard** — runs your test suite between iterations and enforces constraint metrics so improvements never break existing behavior.
|
|
15
|
+
- **Git integration** — automatic branch creation, per-iteration commits, and rollback on failed iterations.
|
|
16
|
+
- **YAML workflow configs** — declarative workflow definitions that specify metrics, budgets, constraints, agent prompts, and language-specific tooling.
|
|
17
|
+
- **Reporting** — JSON and Markdown run reports with health dashboards.
|
|
18
|
+
|
|
19
|
+
## Prerequisites
|
|
20
|
+
|
|
21
|
+
AutoForge requires a **local AI coding agent** to perform the actual code modifications. The agent is configurable — it defaults to [Claude Code](https://docs.anthropic.com/en/docs/claude-code) but can be changed per workflow or at the command line.
|
|
22
|
+
|
|
23
|
+
- The configured agent binary must be installed and available on your `PATH`. AutoForge checks this at startup and **fails fast** with a clear error if the agent cannot be found.
|
|
24
|
+
- By default, AutoForge invokes `claude --print --output-format json -p "<prompt>"` as a subprocess each iteration.
|
|
25
|
+
- To use a different agent, set `agent.command` in your workflow YAML or use the `--agent-command` CLI flag (see [Agent Integration](#agent-integration)).
|
|
26
|
+
|
|
27
|
+
AutoForge itself does **not** call the Claude API directly — it orchestrates the iteration loop (measure, budget, git, regression) and delegates code changes to the local agent process.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install autoforge
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
For development:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
git clone https://github.com/zhanglpg/autoforge.git
|
|
39
|
+
cd autoforge
|
|
40
|
+
pip install -e ".[dev]"
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Requires Python 3.10+.
|
|
44
|
+
|
|
45
|
+
## Quick Start
|
|
46
|
+
|
|
47
|
+
### Run a workflow
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Reduce code complexity in ./src to a target NCS of 3.0
|
|
51
|
+
autoforge run complexity_refactor --path ./src --target 3.0
|
|
52
|
+
|
|
53
|
+
# Improve test quality to 80% score
|
|
54
|
+
autoforge run test_quality --path ./src --target 80.0
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Check project health
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# Run all metric adapters and show a health dashboard
|
|
61
|
+
autoforge health --path ./src
|
|
62
|
+
|
|
63
|
+
# Output as JSON
|
|
64
|
+
autoforge health --path ./src --format json
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### List available workflows and adapters
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
autoforge list
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## CLI Reference
|
|
74
|
+
|
|
75
|
+
### `autoforge run <workflow>`
|
|
76
|
+
|
|
77
|
+
Execute a metric-driven improvement workflow.
|
|
78
|
+
|
|
79
|
+
| Flag | Description |
|
|
80
|
+
|---|---|
|
|
81
|
+
| `--path, -p` | Target path to improve (default: repo root) |
|
|
82
|
+
| `--repo, -r` | Repository root (default: `.`) |
|
|
83
|
+
| `--target, -t` | Target metric value to achieve |
|
|
84
|
+
| `--adapter, -a` | Metric adapter override |
|
|
85
|
+
| `--config, -c` | Path to a custom workflow YAML |
|
|
86
|
+
| `--max-iterations` | Override max iteration count |
|
|
87
|
+
| `--max-tokens` | Override max token budget |
|
|
88
|
+
| `--max-time` | Override max wall-clock time (minutes) |
|
|
89
|
+
| `--test-command` | Custom test command for regression guard |
|
|
90
|
+
| `--skip-tests` | Skip test validation between iterations |
|
|
91
|
+
| `--skip-git` | Skip git branch/commit management |
|
|
92
|
+
| `--dry-run` | Measure only, don't run the agent |
|
|
93
|
+
| `--agent-command` | Custom agent command (overrides workflow `agent.command`; used as-is) |
|
|
94
|
+
| `--output, -o` | Output directory for reports |
|
|
95
|
+
|
|
96
|
+
### `autoforge health`
|
|
97
|
+
|
|
98
|
+
Run all (or specified) metric adapters and produce a health dashboard.
|
|
99
|
+
|
|
100
|
+
| Flag | Description |
|
|
101
|
+
|---|---|
|
|
102
|
+
| `--path, -p` | Target path to analyze |
|
|
103
|
+
| `--repo, -r` | Repository root (default: `.`) |
|
|
104
|
+
| `--adapters` | Comma-separated adapter names |
|
|
105
|
+
| `--format, -f` | Output format: `text` or `json` |
|
|
106
|
+
| `--output, -o` | Save output to file |
|
|
107
|
+
|
|
108
|
+
### `autoforge list`
|
|
109
|
+
|
|
110
|
+
List all registered workflows and adapters.
|
|
111
|
+
|
|
112
|
+
## Architecture
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
┌─────────────────────────────────────────────────┐
|
|
116
|
+
│ WorkflowRunner │
|
|
117
|
+
│ (measure → act → validate loop) │
|
|
118
|
+
├────────────┬────────────┬───────────┬────────────┤
|
|
119
|
+
│ BudgetMgr │ GitMgr │ Regression│ Reporting │
|
|
120
|
+
│ (limits, │ (branch, │ Guard │ (JSON, │
|
|
121
|
+
│ stall │ commit, │ (tests, │ markdown,│
|
|
122
|
+
│ detect) │ rollback)│ checks) │ health) │
|
|
123
|
+
├────────────┴────────────┴───────────┴────────────┤
|
|
124
|
+
│ MetricAdapter (pluggable) │
|
|
125
|
+
│ complexity · test_quality · your own ... │
|
|
126
|
+
└─────────────────────────────────────────────────-┘
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
- **WorkflowRunner** — orchestrates the iteration loop: measure baseline, invoke agent, re-measure, validate, commit or rollback.
|
|
130
|
+
- **Agent (Claude Code)** — the external coding agent invoked as a subprocess each iteration. The runner constructs a prompt with the current metric value, target, and priority files, then calls `claude --print --output-format json`. Use `--agent-command` to substitute a different agent.
|
|
131
|
+
- **BudgetManager** — enforces iteration/token/time limits and detects improvement stalls. Token usage is parsed best-effort from the agent's JSON output or stderr.
|
|
132
|
+
- **GitManager** — creates feature branches, commits per iteration, rolls back failed iterations.
|
|
133
|
+
- **RegressionGuard** — runs tests and checks constraint metrics between iterations.
|
|
134
|
+
- **MetricAdapter** — protocol for plugging in any measurement tool. Adapters normalize tool output into a standard `MetricResult` and identify priority files for the agent to focus on.
|
|
135
|
+
|
|
136
|
+
## Agent Integration
|
|
137
|
+
|
|
138
|
+
AutoForge follows a **subprocess-based agent model**: the framework handles everything except the actual code changes, which are delegated to a local coding agent.
|
|
139
|
+
|
|
140
|
+
### Default: Claude Code
|
|
141
|
+
|
|
142
|
+
Each iteration, the `WorkflowRunner`:
|
|
143
|
+
|
|
144
|
+
1. Calls `adapter.identify_targets()` to find priority files needing improvement.
|
|
145
|
+
2. Builds a structured prompt containing: iteration number, current metric value, target, direction (minimize/maximize), priority file list, and any `system_prompt_addendum` from the workflow YAML.
|
|
146
|
+
3. Writes the prompt to a temp file and invokes:
|
|
147
|
+
```
|
|
148
|
+
claude --print --output-format json -p "$(cat <prompt_file>)"
|
|
149
|
+
```
|
|
150
|
+
4. Parses token usage from the agent's JSON output (or stderr) for budget tracking.
|
|
151
|
+
|
|
152
|
+
The agent runs inside the repository working directory and directly modifies files on disk. After the agent returns, AutoForge re-measures, validates (tests + constraints), and commits or rolls back.
|
|
153
|
+
|
|
154
|
+
### Custom Agents
|
|
155
|
+
|
|
156
|
+
Use `--agent-command` to substitute any command that accepts a prompt on stdin or as an argument:
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
# Use a custom script
|
|
160
|
+
autoforge run complexity_refactor --path ./src --target 3.0 \
|
|
161
|
+
--agent-command "python my_agent.py"
|
|
162
|
+
|
|
163
|
+
# Use a different CLI tool
|
|
164
|
+
autoforge run complexity_refactor --path ./src --target 3.0 \
|
|
165
|
+
--agent-command "aider --message"
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Workflow YAML Agent Config
|
|
169
|
+
|
|
170
|
+
Each workflow YAML can include an `agent` section to configure the agent binary and provide domain-specific instructions:
|
|
171
|
+
|
|
172
|
+
```yaml
|
|
173
|
+
agent:
|
|
174
|
+
command: "claude" # Agent binary (default: claude). Change to use a different agent.
|
|
175
|
+
skill: "refactor-complexity"
|
|
176
|
+
system_prompt_addendum: |
|
|
177
|
+
You are performing complexity-driven iterative refactoring.
|
|
178
|
+
Prioritize extracting helper functions and reducing nesting depth.
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Fail-Fast Validation
|
|
182
|
+
|
|
183
|
+
AutoForge verifies the agent binary exists on `PATH` before starting any iterations. If the agent is not found, the run fails immediately with a clear error message suggesting either installing the agent or using `--agent-command`.
|
|
184
|
+
|
|
185
|
+
## Built-in Workflows
|
|
186
|
+
|
|
187
|
+
### `complexity_refactor`
|
|
188
|
+
|
|
189
|
+
Reduces code complexity using [complexity-accounting](https://pypi.org/project/complexity-accounting/) to measure Net Complexity Score (NCS). The agent iteratively refactors mega-functions, dispatch chains, deep nesting, and duplicated logic.
|
|
190
|
+
|
|
191
|
+
### `test_quality`
|
|
192
|
+
|
|
193
|
+
Improves test suite quality by combining coverage measurement, function gap analysis, and assertion quality scoring. The agent generates missing tests and strengthens existing ones.
|
|
194
|
+
|
|
195
|
+
## Adding a New Adapter
|
|
196
|
+
|
|
197
|
+
1. Subclass `BaseMetricAdapter` in `src/autoforge/adapters/`
|
|
198
|
+
2. Implement `check_prerequisites()`, `measure()`, `identify_targets()`
|
|
199
|
+
3. Register in `src/autoforge/registry.py`
|
|
200
|
+
4. Create a workflow YAML in `src/autoforge/workflows/`
|
|
201
|
+
|
|
202
|
+
See `src/autoforge/adapters/complexity.py` for a reference implementation.
|
|
203
|
+
|
|
204
|
+
## Project Structure
|
|
205
|
+
|
|
206
|
+
```
|
|
207
|
+
src/autoforge/
|
|
208
|
+
├── __init__.py # Package version
|
|
209
|
+
├── __main__.py # CLI entry point (run, health, list commands)
|
|
210
|
+
├── models.py # Core data models (MetricResult, WorkflowConfig, RunReport)
|
|
211
|
+
├── runner.py # Workflow runner (measure-act-validate loop)
|
|
212
|
+
├── budget.py # Budget manager (iteration/token/time limits, stall detection)
|
|
213
|
+
├── git_manager.py # Git operations (branch, commit, rollback per iteration)
|
|
214
|
+
├── regression.py # Regression guard (test runner, constraint checking)
|
|
215
|
+
├── reporting.py # Report generation (JSON, markdown, health dashboard)
|
|
216
|
+
├── registry.py # Workflow & adapter registry
|
|
217
|
+
├── adapters/
|
|
218
|
+
│ ├── base.py # BaseMetricAdapter ABC
|
|
219
|
+
│ ├── complexity.py # Complexity adapter (NCS)
|
|
220
|
+
│ └── test_quality.py # Test quality adapter (TQS)
|
|
221
|
+
└── workflows/
|
|
222
|
+
├── complexity_refactor.yaml
|
|
223
|
+
└── test_quality.yaml
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## Development
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
pip install -e ".[dev]"
|
|
230
|
+
pytest
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## License
|
|
234
|
+
|
|
235
|
+
MIT
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "open-autoforge"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Autonomous metric-driven agentic coding framework"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 3 - Alpha",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
22
|
+
]
|
|
23
|
+
keywords = ["code-quality", "autonomous", "agentic", "metrics", "refactoring"]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"pyyaml>=6.0",
|
|
26
|
+
"complexity-accounting>=1.6.0",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
test = ["pytest>=7.0", "pytest-cov>=4.0"]
|
|
31
|
+
dev = ["pytest>=7.0", "pytest-cov>=4.0", "ruff>=0.4.0"]
|
|
32
|
+
|
|
33
|
+
[project.scripts]
|
|
34
|
+
autoforge = "autoforge.__main__:main"
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://github.com/zhanglpg/autoforge"
|
|
38
|
+
Repository = "https://github.com/zhanglpg/autoforge"
|
|
39
|
+
|
|
40
|
+
[tool.setuptools.packages.find]
|
|
41
|
+
where = ["src"]
|
|
42
|
+
|
|
43
|
+
[tool.pytest.ini_options]
|
|
44
|
+
testpaths = ["tests"]
|
|
45
|
+
python_files = ["test_*.py"]
|
|
46
|
+
python_functions = ["test_*"]
|
|
47
|
+
|
|
48
|
+
[tool.coverage.run]
|
|
49
|
+
source = ["autoforge"]
|
|
50
|
+
|
|
51
|
+
[tool.coverage.report]
|
|
52
|
+
show_missing = true
|