auto-re-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- auto_re_agent-0.1.0/.github/workflows/ci.yml +31 -0
- auto_re_agent-0.1.0/.gitignore +12 -0
- auto_re_agent-0.1.0/LICENSE +21 -0
- auto_re_agent-0.1.0/PKG-INFO +209 -0
- auto_re_agent-0.1.0/README.md +175 -0
- auto_re_agent-0.1.0/docs/architecture.md +24 -0
- auto_re_agent-0.1.0/docs/configuration.md +71 -0
- auto_re_agent-0.1.0/docs/getting-started.md +36 -0
- auto_re_agent-0.1.0/pyproject.toml +70 -0
- auto_re_agent-0.1.0/src/re_agent/__init__.py +2 -0
- auto_re_agent-0.1.0/src/re_agent/__main__.py +4 -0
- auto_re_agent-0.1.0/src/re_agent/agents/__init__.py +3 -0
- auto_re_agent-0.1.0/src/re_agent/agents/checker.py +94 -0
- auto_re_agent-0.1.0/src/re_agent/agents/loop.py +162 -0
- auto_re_agent-0.1.0/src/re_agent/agents/prompts/checker_system.md +16 -0
- auto_re_agent-0.1.0/src/re_agent/agents/prompts/checker_task.md +21 -0
- auto_re_agent-0.1.0/src/re_agent/agents/prompts/fix_instructions.md +16 -0
- auto_re_agent-0.1.0/src/re_agent/agents/prompts/reverser_system.md +13 -0
- auto_re_agent-0.1.0/src/re_agent/agents/prompts/reverser_task.md +25 -0
- auto_re_agent-0.1.0/src/re_agent/agents/reverser.py +159 -0
- auto_re_agent-0.1.0/src/re_agent/agents/source_context.py +148 -0
- auto_re_agent-0.1.0/src/re_agent/backend/__init__.py +3 -0
- auto_re_agent-0.1.0/src/re_agent/backend/ghidra_bridge.py +330 -0
- auto_re_agent-0.1.0/src/re_agent/backend/protocol.py +96 -0
- auto_re_agent-0.1.0/src/re_agent/backend/registry.py +36 -0
- auto_re_agent-0.1.0/src/re_agent/backend/stub.py +84 -0
- auto_re_agent-0.1.0/src/re_agent/cli/__init__.py +0 -0
- auto_re_agent-0.1.0/src/re_agent/cli/cmd_init.py +43 -0
- auto_re_agent-0.1.0/src/re_agent/cli/cmd_parity.py +109 -0
- auto_re_agent-0.1.0/src/re_agent/cli/cmd_reverse.py +100 -0
- auto_re_agent-0.1.0/src/re_agent/cli/cmd_status.py +50 -0
- auto_re_agent-0.1.0/src/re_agent/cli/main.py +72 -0
- auto_re_agent-0.1.0/src/re_agent/config/__init__.py +4 -0
- auto_re_agent-0.1.0/src/re_agent/config/defaults.py +97 -0
- auto_re_agent-0.1.0/src/re_agent/config/loader.py +215 -0
- auto_re_agent-0.1.0/src/re_agent/config/schema.py +106 -0
- auto_re_agent-0.1.0/src/re_agent/core/__init__.py +26 -0
- auto_re_agent-0.1.0/src/re_agent/core/function_picker.py +46 -0
- auto_re_agent-0.1.0/src/re_agent/core/models.py +260 -0
- auto_re_agent-0.1.0/src/re_agent/core/session.py +91 -0
- auto_re_agent-0.1.0/src/re_agent/llm/__init__.py +4 -0
- auto_re_agent-0.1.0/src/re_agent/llm/claude.py +89 -0
- auto_re_agent-0.1.0/src/re_agent/llm/codex_cli.py +92 -0
- auto_re_agent-0.1.0/src/re_agent/llm/openai_compat.py +82 -0
- auto_re_agent-0.1.0/src/re_agent/llm/protocol.py +64 -0
- auto_re_agent-0.1.0/src/re_agent/llm/registry.py +53 -0
- auto_re_agent-0.1.0/src/re_agent/orchestrator/__init__.py +4 -0
- auto_re_agent-0.1.0/src/re_agent/orchestrator/class_runner.py +77 -0
- auto_re_agent-0.1.0/src/re_agent/orchestrator/single.py +118 -0
- auto_re_agent-0.1.0/src/re_agent/parity/__init__.py +4 -0
- auto_re_agent-0.1.0/src/re_agent/parity/cache.py +36 -0
- auto_re_agent-0.1.0/src/re_agent/parity/engine.py +219 -0
- auto_re_agent-0.1.0/src/re_agent/parity/rules.py +103 -0
- auto_re_agent-0.1.0/src/re_agent/parity/scoring.py +15 -0
- auto_re_agent-0.1.0/src/re_agent/parity/signals.py +167 -0
- auto_re_agent-0.1.0/src/re_agent/parity/source_indexer.py +447 -0
- auto_re_agent-0.1.0/src/re_agent/reports/__init__.py +0 -0
- auto_re_agent-0.1.0/src/re_agent/reports/formatter.py +88 -0
- auto_re_agent-0.1.0/src/re_agent/reports/tracker.py +50 -0
- auto_re_agent-0.1.0/src/re_agent/utils/__init__.py +0 -0
- auto_re_agent-0.1.0/src/re_agent/utils/address.py +34 -0
- auto_re_agent-0.1.0/src/re_agent/utils/process.py +57 -0
- auto_re_agent-0.1.0/src/re_agent/utils/templates.py +40 -0
- auto_re_agent-0.1.0/src/re_agent/utils/text.py +150 -0
- auto_re_agent-0.1.0/src/re_agent/verification/__init__.py +1 -0
- auto_re_agent-0.1.0/src/re_agent/verification/objective.py +98 -0
- auto_re_agent-0.1.0/tests/__init__.py +0 -0
- auto_re_agent-0.1.0/tests/conftest.py +28 -0
- auto_re_agent-0.1.0/tests/fixtures/sample_asm.txt +25 -0
- auto_re_agent-0.1.0/tests/fixtures/sample_config.yaml +38 -0
- auto_re_agent-0.1.0/tests/fixtures/sample_decompile.txt +17 -0
- auto_re_agent-0.1.0/tests/fixtures/sample_hooks.csv +5 -0
- auto_re_agent-0.1.0/tests/fixtures/sample_source.cpp +23 -0
- auto_re_agent-0.1.0/tests/test_agents/__init__.py +0 -0
- auto_re_agent-0.1.0/tests/test_agents/test_loop.py +152 -0
- auto_re_agent-0.1.0/tests/test_agents/test_source_context.py +105 -0
- auto_re_agent-0.1.0/tests/test_backend/__init__.py +0 -0
- auto_re_agent-0.1.0/tests/test_backend/test_ghidra_bridge.py +84 -0
- auto_re_agent-0.1.0/tests/test_cli/__init__.py +0 -0
- auto_re_agent-0.1.0/tests/test_cli/test_main.py +63 -0
- auto_re_agent-0.1.0/tests/test_config/__init__.py +0 -0
- auto_re_agent-0.1.0/tests/test_config/test_loader.py +39 -0
- auto_re_agent-0.1.0/tests/test_core/__init__.py +0 -0
- auto_re_agent-0.1.0/tests/test_core/test_function_picker.py +48 -0
- auto_re_agent-0.1.0/tests/test_core/test_models.py +84 -0
- auto_re_agent-0.1.0/tests/test_llm/__init__.py +0 -0
- auto_re_agent-0.1.0/tests/test_llm/test_protocol.py +54 -0
- auto_re_agent-0.1.0/tests/test_orchestrator/__init__.py +0 -0
- auto_re_agent-0.1.0/tests/test_orchestrator/test_single.py +17 -0
- auto_re_agent-0.1.0/tests/test_parity/__init__.py +0 -0
- auto_re_agent-0.1.0/tests/test_parity/test_engine.py +79 -0
- auto_re_agent-0.1.0/tests/test_parity/test_scoring.py +27 -0
- auto_re_agent-0.1.0/tests/test_parity/test_signals.py +113 -0
- auto_re_agent-0.1.0/tests/test_parity/test_source_indexer.py +150 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, master]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
lint:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
- uses: actions/setup-python@v5
|
|
14
|
+
with:
|
|
15
|
+
python-version: "3.12"
|
|
16
|
+
- run: pip install -e ".[dev]"
|
|
17
|
+
- run: ruff check src/
|
|
18
|
+
- run: mypy src/re_agent/
|
|
19
|
+
|
|
20
|
+
test:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
strategy:
|
|
23
|
+
matrix:
|
|
24
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v4
|
|
27
|
+
- uses: actions/setup-python@v5
|
|
28
|
+
with:
|
|
29
|
+
python-version: ${{ matrix.python-version }}
|
|
30
|
+
- run: pip install -e ".[dev]"
|
|
31
|
+
- run: pytest tests/ -m "not llm and not ghidra" -x --tb=short
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dryxio
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: auto-re-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Autonomous reverse-engineering agent with a source-aware reverser/checker loop, objective verification, parity checks, and a Ghidra backend
|
|
5
|
+
Project-URL: Homepage, https://github.com/dryxio/auto-re-agent
|
|
6
|
+
Project-URL: Repository, https://github.com/dryxio/auto-re-agent
|
|
7
|
+
Project-URL: Issues, https://github.com/dryxio/auto-re-agent/issues
|
|
8
|
+
Author: Dryxio
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: autonomous-agent,binary-analysis,ghidra,llm,reverse-engineering
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Security
|
|
21
|
+
Classifier: Topic :: Software Development :: Disassemblers
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: anthropic>=0.40
|
|
24
|
+
Requires-Dist: openai>=1.0
|
|
25
|
+
Requires-Dist: pyyaml>=6.0
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Requires-Dist: mypy>=1.13; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
31
|
+
Provides-Extra: ghidra-bridge
|
|
32
|
+
Requires-Dist: ghidra-ai-bridge>=0.1.0; extra == 'ghidra-bridge'
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# re-agent
|
|
36
|
+
|
|
37
|
+
Autonomous reverse-engineering agent — source-aware reverser/checker loop, objective verifier, parity engine, and Ghidra backend.
|
|
38
|
+
|
|
39
|
+
## Overview
|
|
40
|
+
|
|
41
|
+
Demo: [YouTube](https://youtu.be/zBQJYMKmwAs?si=emi1kDsJ81-2-tc3)
|
|
42
|
+
|
|
43
|
+
re-agent automates a reverse-engineering workflow by combining a reverser/checker loop with Ghidra decompilation through [ghidra-ai-bridge](https://github.com/dryxio/ghidra-ai-bridge). The current pipeline also retrieves nearby project source context during generation and runs a conservative structural verifier before accepting checker passes.
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
re-agent reverse --class CTrain
|
|
47
|
+
│
|
|
48
|
+
├── Config (re-agent.yaml + env + CLI)
|
|
49
|
+
│ └── project_profile (stub_markers, hook_patterns, source_layout)
|
|
50
|
+
│
|
|
51
|
+
├── Orchestrator (single / class runner)
|
|
52
|
+
│ ├── Function Picker (ranks by caller count, filters completed)
|
|
53
|
+
│ ├── Context Gatherer (decompile + xrefs + structs + source retrieval)
|
|
54
|
+
│ │
|
|
55
|
+
│ ├── Agent Loop (reverser → checker → fix, max N rounds)
|
|
56
|
+
│ │ ├── LLM Providers: Claude | OpenAI-compatible APIs | Codex CLI
|
|
57
|
+
│ │ └── Prompt Templates (customizable .md files)
|
|
58
|
+
│ │
|
|
59
|
+
│ ├── Objective Verifier (call-count + control-flow sanity checks)
|
|
60
|
+
│ │
|
|
61
|
+
│ ├── Parity Engine (GREEN/YELLOW/RED verification gate)
|
|
62
|
+
│ │ ├── Source Indexer (C++ body parser)
|
|
63
|
+
│ │ ├── 11 Heuristic Signals (all configurable/toggleable)
|
|
64
|
+
│ │ └── Semantic Rules + Manual Approvals
|
|
65
|
+
│ │
|
|
66
|
+
│ └── Session State (JSON progress file)
|
|
67
|
+
│
|
|
68
|
+
└── RE Backend: ghidra-ai-bridge
|
|
69
|
+
└── Capability flags → graceful degradation
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Requirements
|
|
73
|
+
|
|
74
|
+
- Python 3.10+
|
|
75
|
+
- [ghidra-ai-bridge](https://github.com/Dryxio/ghidra-ai-bridge) — re-agent uses this as its backend to decompile functions, fetch xrefs, read structs/enums, and query Ghidra. Install it and point it at your Ghidra project before running `re-agent reverse`.
|
|
76
|
+
- One supported LLM setup:
|
|
77
|
+
- `ANTHROPIC_API_KEY` for Claude
|
|
78
|
+
- `OPENAI_API_KEY` for OpenAI-compatible APIs
|
|
79
|
+
- a local `codex` CLI login for the Codex provider
|
|
80
|
+
|
|
81
|
+
## Installation
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
pip install re-agent
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Quick Start
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
# 1. Initialize project config
|
|
91
|
+
re-agent init
|
|
92
|
+
|
|
93
|
+
# 2. Edit re-agent.yaml with your project settings
|
|
94
|
+
|
|
95
|
+
# 3. Reverse a single function
|
|
96
|
+
re-agent reverse --address 0x6F86A0
|
|
97
|
+
|
|
98
|
+
# 4. Reverse all functions in a class
|
|
99
|
+
re-agent reverse --class CTrain --max-functions 10
|
|
100
|
+
|
|
101
|
+
# 5. Run parity checks
|
|
102
|
+
re-agent parity --address 0x6F86A0
|
|
103
|
+
|
|
104
|
+
# 6. Check progress
|
|
105
|
+
re-agent status
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Configuration
|
|
109
|
+
|
|
110
|
+
re-agent uses a layered configuration system (highest priority first): CLI flags > environment variables (`RE_AGENT_*`) > `re-agent.yaml` > defaults.
|
|
111
|
+
|
|
112
|
+
```yaml
|
|
113
|
+
llm:
|
|
114
|
+
provider: claude # claude | openai | openai-compat | codex
|
|
115
|
+
model: claude-sonnet-4-5-20250929
|
|
116
|
+
# api_key: set via RE_AGENT_LLM_API_KEY env var
|
|
117
|
+
timeout_s: 1800
|
|
118
|
+
|
|
119
|
+
backend:
|
|
120
|
+
type: ghidra-bridge
|
|
121
|
+
cli_path: ~/ghidra-tools/ghidra
|
|
122
|
+
|
|
123
|
+
orchestrator:
|
|
124
|
+
max_review_rounds: 4
|
|
125
|
+
max_functions_per_class: 10
|
|
126
|
+
objective_verifier_enabled: true
|
|
127
|
+
|
|
128
|
+
project_profile:
|
|
129
|
+
source_root: ./source/game_sa
|
|
130
|
+
hook_patterns:
|
|
131
|
+
- 'RH_ScopedInstall\s*\(\s*(\w+)\s*,\s*(0x[0-9A-Fa-f]+)'
|
|
132
|
+
stub_markers: ["NOTSA_UNREACHABLE"]
|
|
133
|
+
stub_call_prefix: "plugin::Call"
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
See [docs/configuration.md](docs/configuration.md) for all options.
|
|
137
|
+
|
|
138
|
+
## CLI Reference
|
|
139
|
+
|
|
140
|
+
| Command | Description |
|
|
141
|
+
|---------|-------------|
|
|
142
|
+
| `re-agent init` | Generate `re-agent.yaml` config file |
|
|
143
|
+
| `re-agent reverse --address ADDR` | Reverse a single function |
|
|
144
|
+
| `re-agent reverse --class CLASS` | Reverse all functions in a class |
|
|
145
|
+
| `re-agent reverse --dry-run` | Show what would be reversed |
|
|
146
|
+
| `re-agent parity --address ADDR` | Run parity checks on a function |
|
|
147
|
+
| `re-agent parity --filter REGEX` | Run parity checks matching pattern |
|
|
148
|
+
| `re-agent status` | Show reversal progress |
|
|
149
|
+
| `re-agent status --class CLASS` | Show progress for a specific class |
|
|
150
|
+
|
|
151
|
+
## LLM Providers
|
|
152
|
+
|
|
153
|
+
- **Claude** (Anthropic SDK) — set `ANTHROPIC_API_KEY`
|
|
154
|
+
- **OpenAI / OpenAI-compatible** — set `OPENAI_API_KEY`, optionally set `base_url`
|
|
155
|
+
- **Codex CLI** — uses local `codex exec` with ChatGPT login credentials; no API key required
|
|
156
|
+
|
|
157
|
+
## Parity Engine
|
|
158
|
+
|
|
159
|
+
The parity engine runs 11 configurable heuristic signals to verify reversed code matches the original binary:
|
|
160
|
+
|
|
161
|
+
| Signal | Level | Description |
|
|
162
|
+
|--------|-------|-------------|
|
|
163
|
+
| Missing source | RED | No source body found for hooked function |
|
|
164
|
+
| Stub markers | RED | Source contains stub markers (e.g., NOTSA_UNREACHABLE) |
|
|
165
|
+
| Trivial stub | RED | Plugin-call heavy with tiny body and no control flow |
|
|
166
|
+
| Large ASM tiny source | RED | ASM >= 80 instructions but source <= 12 lines |
|
|
167
|
+
| Plugin-call heavy | YELLOW | Plugin calls dominate the function body |
|
|
168
|
+
| Short body | YELLOW | Body has fewer than 6 lines |
|
|
169
|
+
| Low call count | YELLOW | Decompile shows many callees but source has few |
|
|
170
|
+
| FP sensitivity | YELLOW | ASM has floating-point ops but source doesn't |
|
|
171
|
+
| Call count mismatch | YELLOW | Source call count differs significantly from ASM |
|
|
172
|
+
| NaN logic | YELLOW | Decompile has NaN handling but source doesn't |
|
|
173
|
+
| Inline wrapper | INFO | Function is a thin inline wrapper |
|
|
174
|
+
|
|
175
|
+
## Objective Verifier
|
|
176
|
+
|
|
177
|
+
The reversal loop also runs a conservative structural verifier after the LLM checker passes. It only blocks acceptance on strong mismatches such as:
|
|
178
|
+
|
|
179
|
+
- call-count gaps between candidate code and decompile/ASM
|
|
180
|
+
- control-flow gaps where the candidate is clearly missing branches or loops
|
|
181
|
+
|
|
182
|
+
This is intentionally narrower than full equivalence checking, but it catches obvious false positives before they are recorded as successful reversals.
|
|
183
|
+
|
|
184
|
+
This matters in practice because an LLM checker can still false-positive on code that looks plausible while missing real branch or call structure from the binary.
|
|
185
|
+
|
|
186
|
+
## Safety
|
|
187
|
+
|
|
188
|
+
- **No auto-commit**: re-agent writes code but never commits or pushes
|
|
189
|
+
- **Bounded retries**: Hard cap on fix loop iterations (default: 4)
|
|
190
|
+
- **Deterministic logs**: Every LLM call logged with timestamps
|
|
191
|
+
- **No destructive ops**: Never deletes files, modifies git, or runs builds
|
|
192
|
+
- **Session isolation**: Progress appended, never overwritten
|
|
193
|
+
|
|
194
|
+
## Development
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
git clone https://github.com/dryxio/auto-re-agent.git
|
|
198
|
+
cd auto-re-agent
|
|
199
|
+
python -m venv .venv && source .venv/bin/activate
|
|
200
|
+
pip install -e ".[dev]"
|
|
201
|
+
|
|
202
|
+
pytest tests/
|
|
203
|
+
ruff check src/
|
|
204
|
+
mypy src/re_agent/
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## License
|
|
208
|
+
|
|
209
|
+
MIT
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
# re-agent
|
|
2
|
+
|
|
3
|
+
Autonomous reverse-engineering agent — source-aware reverser/checker loop, objective verifier, parity engine, and Ghidra backend.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Demo: [YouTube](https://youtu.be/zBQJYMKmwAs?si=emi1kDsJ81-2-tc3)
|
|
8
|
+
|
|
9
|
+
re-agent automates a reverse-engineering workflow by combining a reverser/checker loop with Ghidra decompilation through [ghidra-ai-bridge](https://github.com/dryxio/ghidra-ai-bridge). The current pipeline also retrieves nearby project source context during generation and runs a conservative structural verifier before accepting checker passes.
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
re-agent reverse --class CTrain
|
|
13
|
+
│
|
|
14
|
+
├── Config (re-agent.yaml + env + CLI)
|
|
15
|
+
│ └── project_profile (stub_markers, hook_patterns, source_layout)
|
|
16
|
+
│
|
|
17
|
+
├── Orchestrator (single / class runner)
|
|
18
|
+
│ ├── Function Picker (ranks by caller count, filters completed)
|
|
19
|
+
│ ├── Context Gatherer (decompile + xrefs + structs + source retrieval)
|
|
20
|
+
│ │
|
|
21
|
+
│ ├── Agent Loop (reverser → checker → fix, max N rounds)
|
|
22
|
+
│ │ ├── LLM Providers: Claude | OpenAI-compatible APIs | Codex CLI
|
|
23
|
+
│ │ └── Prompt Templates (customizable .md files)
|
|
24
|
+
│ │
|
|
25
|
+
│ ├── Objective Verifier (call-count + control-flow sanity checks)
|
|
26
|
+
│ │
|
|
27
|
+
│ ├── Parity Engine (GREEN/YELLOW/RED verification gate)
|
|
28
|
+
│ │ ├── Source Indexer (C++ body parser)
|
|
29
|
+
│ │ ├── 11 Heuristic Signals (all configurable/toggleable)
|
|
30
|
+
│ │ └── Semantic Rules + Manual Approvals
|
|
31
|
+
│ │
|
|
32
|
+
│ └── Session State (JSON progress file)
|
|
33
|
+
│
|
|
34
|
+
└── RE Backend: ghidra-ai-bridge
|
|
35
|
+
└── Capability flags → graceful degradation
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Requirements
|
|
39
|
+
|
|
40
|
+
- Python 3.10+
|
|
41
|
+
- [ghidra-ai-bridge](https://github.com/Dryxio/ghidra-ai-bridge) — re-agent uses this as its backend to decompile functions, fetch xrefs, read structs/enums, and query Ghidra. Install it and point it at your Ghidra project before running `re-agent reverse`.
|
|
42
|
+
- One supported LLM setup:
|
|
43
|
+
- `ANTHROPIC_API_KEY` for Claude
|
|
44
|
+
- `OPENAI_API_KEY` for OpenAI-compatible APIs
|
|
45
|
+
- a local `codex` CLI login for the Codex provider
|
|
46
|
+
|
|
47
|
+
## Installation
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
pip install re-agent
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Quick Start
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# 1. Initialize project config
|
|
57
|
+
re-agent init
|
|
58
|
+
|
|
59
|
+
# 2. Edit re-agent.yaml with your project settings
|
|
60
|
+
|
|
61
|
+
# 3. Reverse a single function
|
|
62
|
+
re-agent reverse --address 0x6F86A0
|
|
63
|
+
|
|
64
|
+
# 4. Reverse all functions in a class
|
|
65
|
+
re-agent reverse --class CTrain --max-functions 10
|
|
66
|
+
|
|
67
|
+
# 5. Run parity checks
|
|
68
|
+
re-agent parity --address 0x6F86A0
|
|
69
|
+
|
|
70
|
+
# 6. Check progress
|
|
71
|
+
re-agent status
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Configuration
|
|
75
|
+
|
|
76
|
+
re-agent uses a layered configuration system (highest priority first): CLI flags > environment variables (`RE_AGENT_*`) > `re-agent.yaml` > defaults.
|
|
77
|
+
|
|
78
|
+
```yaml
|
|
79
|
+
llm:
|
|
80
|
+
provider: claude # claude | openai | openai-compat | codex
|
|
81
|
+
model: claude-sonnet-4-5-20250929
|
|
82
|
+
# api_key: set via RE_AGENT_LLM_API_KEY env var
|
|
83
|
+
timeout_s: 1800
|
|
84
|
+
|
|
85
|
+
backend:
|
|
86
|
+
type: ghidra-bridge
|
|
87
|
+
cli_path: ~/ghidra-tools/ghidra
|
|
88
|
+
|
|
89
|
+
orchestrator:
|
|
90
|
+
max_review_rounds: 4
|
|
91
|
+
max_functions_per_class: 10
|
|
92
|
+
objective_verifier_enabled: true
|
|
93
|
+
|
|
94
|
+
project_profile:
|
|
95
|
+
source_root: ./source/game_sa
|
|
96
|
+
hook_patterns:
|
|
97
|
+
- 'RH_ScopedInstall\s*\(\s*(\w+)\s*,\s*(0x[0-9A-Fa-f]+)'
|
|
98
|
+
stub_markers: ["NOTSA_UNREACHABLE"]
|
|
99
|
+
stub_call_prefix: "plugin::Call"
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
See [docs/configuration.md](docs/configuration.md) for all options.
|
|
103
|
+
|
|
104
|
+
## CLI Reference
|
|
105
|
+
|
|
106
|
+
| Command | Description |
|
|
107
|
+
|---------|-------------|
|
|
108
|
+
| `re-agent init` | Generate `re-agent.yaml` config file |
|
|
109
|
+
| `re-agent reverse --address ADDR` | Reverse a single function |
|
|
110
|
+
| `re-agent reverse --class CLASS` | Reverse all functions in a class |
|
|
111
|
+
| `re-agent reverse --dry-run` | Show what would be reversed |
|
|
112
|
+
| `re-agent parity --address ADDR` | Run parity checks on a function |
|
|
113
|
+
| `re-agent parity --filter REGEX` | Run parity checks matching pattern |
|
|
114
|
+
| `re-agent status` | Show reversal progress |
|
|
115
|
+
| `re-agent status --class CLASS` | Show progress for a specific class |
|
|
116
|
+
|
|
117
|
+
## LLM Providers
|
|
118
|
+
|
|
119
|
+
- **Claude** (Anthropic SDK) — set `ANTHROPIC_API_KEY`
|
|
120
|
+
- **OpenAI / OpenAI-compatible** — set `OPENAI_API_KEY`, optionally set `base_url`
|
|
121
|
+
- **Codex CLI** — uses local `codex exec` with ChatGPT login credentials; no API key required
|
|
122
|
+
|
|
123
|
+
## Parity Engine
|
|
124
|
+
|
|
125
|
+
The parity engine runs 11 configurable heuristic signals to verify reversed code matches the original binary:
|
|
126
|
+
|
|
127
|
+
| Signal | Level | Description |
|
|
128
|
+
|--------|-------|-------------|
|
|
129
|
+
| Missing source | RED | No source body found for hooked function |
|
|
130
|
+
| Stub markers | RED | Source contains stub markers (e.g., NOTSA_UNREACHABLE) |
|
|
131
|
+
| Trivial stub | RED | Plugin-call heavy with tiny body and no control flow |
|
|
132
|
+
| Large ASM tiny source | RED | ASM >= 80 instructions but source <= 12 lines |
|
|
133
|
+
| Plugin-call heavy | YELLOW | Plugin calls dominate the function body |
|
|
134
|
+
| Short body | YELLOW | Body has fewer than 6 lines |
|
|
135
|
+
| Low call count | YELLOW | Decompile shows many callees but source has few |
|
|
136
|
+
| FP sensitivity | YELLOW | ASM has floating-point ops but source doesn't |
|
|
137
|
+
| Call count mismatch | YELLOW | Source call count differs significantly from ASM |
|
|
138
|
+
| NaN logic | YELLOW | Decompile has NaN handling but source doesn't |
|
|
139
|
+
| Inline wrapper | INFO | Function is a thin inline wrapper |
|
|
140
|
+
|
|
141
|
+
## Objective Verifier
|
|
142
|
+
|
|
143
|
+
The reversal loop also runs a conservative structural verifier after the LLM checker passes. It only blocks acceptance on strong mismatches such as:
|
|
144
|
+
|
|
145
|
+
- call-count gaps between candidate code and decompile/ASM
|
|
146
|
+
- control-flow gaps where the candidate is clearly missing branches or loops
|
|
147
|
+
|
|
148
|
+
This is intentionally narrower than full equivalence checking, but it catches obvious false positives before they are recorded as successful reversals.
|
|
149
|
+
|
|
150
|
+
This matters in practice because an LLM checker can still false-positive on code that looks plausible while missing real branch or call structure from the binary.
|
|
151
|
+
|
|
152
|
+
## Safety
|
|
153
|
+
|
|
154
|
+
- **No auto-commit**: re-agent writes code but never commits or pushes
|
|
155
|
+
- **Bounded retries**: Hard cap on fix loop iterations (default: 4)
|
|
156
|
+
- **Deterministic logs**: Every LLM call logged with timestamps
|
|
157
|
+
- **No destructive ops**: Never deletes files, modifies git, or runs builds
|
|
158
|
+
- **Session isolation**: Progress appended, never overwritten
|
|
159
|
+
|
|
160
|
+
## Development
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
git clone https://github.com/dryxio/auto-re-agent.git
|
|
164
|
+
cd auto-re-agent
|
|
165
|
+
python -m venv .venv && source .venv/bin/activate
|
|
166
|
+
pip install -e ".[dev]"
|
|
167
|
+
|
|
168
|
+
pytest tests/
|
|
169
|
+
ruff check src/
|
|
170
|
+
mypy src/re_agent/
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## License
|
|
174
|
+
|
|
175
|
+
MIT
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Architecture
|
|
2
|
+
|
|
3
|
+
re-agent is structured as a layered pipeline:
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
CLI -> Config -> Orchestrator -> Agent Loop -> LLM Providers
|
|
7
|
+
| |
|
|
8
|
+
v v
|
|
9
|
+
Function Picker RE Backend (Ghidra)
|
|
10
|
+
|
|
|
11
|
+
v
|
|
12
|
+
Parity Engine
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Layers
|
|
16
|
+
|
|
17
|
+
- **CLI**: argparse entry points (init, reverse, parity, status)
|
|
18
|
+
- **Config**: YAML + env + CLI overlay, project profiles
|
|
19
|
+
- **Orchestrator**: Single function or class-level auto-advance
|
|
20
|
+
- **Agents**: Reverser + Checker with fix loop
|
|
21
|
+
- **LLM**: Protocol-based providers (Claude, Codex)
|
|
22
|
+
- **Backend**: RE tool abstraction with capability flags
|
|
23
|
+
- **Parity**: 11-signal verification engine with scoring
|
|
24
|
+
- **Reports**: JSON/markdown output, session tracking
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Configuration
|
|
2
|
+
|
|
3
|
+
re-agent is configured via `re-agent.yaml`, environment variables, and CLI flags.
|
|
4
|
+
|
|
5
|
+
## Priority Order
|
|
6
|
+
|
|
7
|
+
CLI flags > Environment variables > YAML config > Defaults
|
|
8
|
+
|
|
9
|
+
## Environment Variables
|
|
10
|
+
|
|
11
|
+
| Variable | Maps to |
|
|
12
|
+
|----------|---------|
|
|
13
|
+
| `RE_AGENT_LLM_PROVIDER` | `llm.provider` |
|
|
14
|
+
| `RE_AGENT_LLM_API_KEY` | `llm.api_key` |
|
|
15
|
+
| `RE_AGENT_LLM_MODEL` | `llm.model` |
|
|
16
|
+
| `RE_AGENT_LLM_BASE_URL` | `llm.base_url` |
|
|
17
|
+
| `RE_AGENT_BACKEND_CLI_PATH` | `backend.cli_path` |
|
|
18
|
+
| `RE_AGENT_BACKEND_TIMEOUT` | `backend.timeout_s` |
|
|
19
|
+
|
|
20
|
+
## LLM Config
|
|
21
|
+
|
|
22
|
+
```yaml
|
|
23
|
+
llm:
|
|
24
|
+
provider: "claude" # claude | openai | openai-compat | codex
|
|
25
|
+
model: "claude-sonnet-4-5-20250929"
|
|
26
|
+
api_key: null
|
|
27
|
+
base_url: null
|
|
28
|
+
max_tokens: 4096
|
|
29
|
+
temperature: 0.0
|
|
30
|
+
timeout_s: 1800
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Notes:
|
|
34
|
+
|
|
35
|
+
- `claude` uses the Anthropic SDK and typically reads `ANTHROPIC_API_KEY`
|
|
36
|
+
- `openai` and `openai-compat` use the OpenAI-compatible chat completions provider and typically read `OPENAI_API_KEY`
|
|
37
|
+
- `codex` uses the local `codex` CLI and ChatGPT login credentials instead of an API key
|
|
38
|
+
|
|
39
|
+
## Project Profile
|
|
40
|
+
|
|
41
|
+
The `project_profile` section makes re-agent work across different RE projects:
|
|
42
|
+
|
|
43
|
+
```yaml
|
|
44
|
+
project_profile:
|
|
45
|
+
hook_patterns:
|
|
46
|
+
- 'RH_ScopedInstall\s*\(\s*(\w+)\s*,\s*(0x[0-9A-Fa-f]+)'
|
|
47
|
+
stub_markers: ["NOTSA_UNREACHABLE"]
|
|
48
|
+
stub_call_prefix: "plugin::Call"
|
|
49
|
+
source_root: "./source/game_sa"
|
|
50
|
+
source_extensions: [".cpp", ".h", ".hpp"]
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Parity Config
|
|
54
|
+
|
|
55
|
+
```yaml
|
|
56
|
+
parity:
|
|
57
|
+
enabled: true
|
|
58
|
+
call_count_warn_diff: 3
|
|
59
|
+
inline_wrapper_autoskip: false
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Orchestrator Config
|
|
63
|
+
|
|
64
|
+
```yaml
|
|
65
|
+
orchestrator:
|
|
66
|
+
max_review_rounds: 4
|
|
67
|
+
max_functions_per_class: 10
|
|
68
|
+
objective_verifier_enabled: true
|
|
69
|
+
objective_call_count_tolerance: 3
|
|
70
|
+
objective_control_flow_tolerance: 2
|
|
71
|
+
```
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Getting Started
|
|
2
|
+
|
|
3
|
+
## Installation
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install re-agent
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## Quick Start
|
|
10
|
+
|
|
11
|
+
1. Initialize configuration:
|
|
12
|
+
```bash
|
|
13
|
+
re-agent init
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
2. Edit `re-agent.yaml` with your LLM API key and Ghidra bridge path.
|
|
17
|
+
|
|
18
|
+
3. Reverse a single function:
|
|
19
|
+
```bash
|
|
20
|
+
re-agent reverse --address 0x6F86A0 --class CTrain
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
4. Reverse a full class:
|
|
24
|
+
```bash
|
|
25
|
+
re-agent reverse --class CTrain --max-functions 5
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
5. Run parity checks:
|
|
29
|
+
```bash
|
|
30
|
+
re-agent parity --limit 50
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
6. Check progress:
|
|
34
|
+
```bash
|
|
35
|
+
re-agent status
|
|
36
|
+
```
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "auto-re-agent"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Autonomous reverse-engineering agent with a source-aware reverser/checker loop, objective verification, parity checks, and a Ghidra backend"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "Dryxio" }]
|
|
13
|
+
keywords = ["reverse-engineering", "ghidra", "llm", "autonomous-agent", "binary-analysis"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Security",
|
|
24
|
+
"Topic :: Software Development :: Disassemblers",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"pyyaml>=6.0",
|
|
28
|
+
"anthropic>=0.40",
|
|
29
|
+
"openai>=1.0",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[project.optional-dependencies]
|
|
33
|
+
ghidra-bridge = ["ghidra-ai-bridge>=0.1.0"]
|
|
34
|
+
dev = ["pytest>=8.0", "pytest-cov>=5.0", "ruff>=0.8", "mypy>=1.13"]
|
|
35
|
+
|
|
36
|
+
[project.scripts]
|
|
37
|
+
re-agent = "re_agent.cli.main:main"
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Homepage = "https://github.com/dryxio/auto-re-agent"
|
|
41
|
+
Repository = "https://github.com/dryxio/auto-re-agent"
|
|
42
|
+
Issues = "https://github.com/dryxio/auto-re-agent/issues"
|
|
43
|
+
|
|
44
|
+
[tool.hatch.build.targets.wheel]
|
|
45
|
+
packages = ["src/re_agent"]
|
|
46
|
+
|
|
47
|
+
[tool.ruff]
|
|
48
|
+
target-version = "py310"
|
|
49
|
+
line-length = 120
|
|
50
|
+
src = ["src"]
|
|
51
|
+
|
|
52
|
+
[tool.ruff.lint]
|
|
53
|
+
select = ["E", "F", "W", "I", "UP", "B", "SIM"]
|
|
54
|
+
|
|
55
|
+
[tool.mypy]
|
|
56
|
+
python_version = "3.10"
|
|
57
|
+
strict = true
|
|
58
|
+
warn_return_any = true
|
|
59
|
+
warn_unused_configs = true
|
|
60
|
+
|
|
61
|
+
[[tool.mypy.overrides]]
|
|
62
|
+
module = ["anthropic.*", "openai.*"]
|
|
63
|
+
ignore_missing_imports = true
|
|
64
|
+
|
|
65
|
+
[tool.pytest.ini_options]
|
|
66
|
+
testpaths = ["tests"]
|
|
67
|
+
markers = [
|
|
68
|
+
"llm: tests that require LLM API keys",
|
|
69
|
+
"ghidra: tests that require ghidra-ai-bridge",
|
|
70
|
+
]
|