pyfix-agent 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- eval_dataset/AttributeError/patched_script1.py +5 -0
- eval_dataset/AttributeError/patched_script2.py +5 -0
- eval_dataset/AttributeError/patched_script3.py +6 -0
- eval_dataset/AttributeError/script1.py +6 -0
- eval_dataset/AttributeError/script2.py +6 -0
- eval_dataset/AttributeError/script3.py +8 -0
- eval_dataset/AttributeError/test_script1.py +9 -0
- eval_dataset/AttributeError/test_script2.py +5 -0
- eval_dataset/AttributeError/test_script3.py +5 -0
- eval_dataset/IndexError/patched_script1.py +7 -0
- eval_dataset/IndexError/patched_script2.py +7 -0
- eval_dataset/IndexError/patched_script3.py +5 -0
- eval_dataset/IndexError/script1.py +5 -0
- eval_dataset/IndexError/script2.py +9 -0
- eval_dataset/IndexError/script3.py +4 -0
- eval_dataset/IndexError/test_script1.py +5 -0
- eval_dataset/IndexError/test_script2.py +5 -0
- eval_dataset/IndexError/test_script3.py +5 -0
- eval_dataset/LogicBugs/patched_script1.py +6 -0
- eval_dataset/LogicBugs/patched_script2.py +4 -0
- eval_dataset/LogicBugs/patched_script3.py +6 -0
- eval_dataset/LogicBugs/script1.py +6 -0
- eval_dataset/LogicBugs/script2.py +4 -0
- eval_dataset/LogicBugs/script3.py +6 -0
- eval_dataset/LogicBugs/test_script1.py +5 -0
- eval_dataset/LogicBugs/test_script2.py +4 -0
- eval_dataset/LogicBugs/test_script3.py +5 -0
- eval_dataset/NameError/patched_script1.py +4 -0
- eval_dataset/NameError/patched_script2.py +4 -0
- eval_dataset/NameError/patched_script3.py +7 -0
- eval_dataset/NameError/script1.py +5 -0
- eval_dataset/NameError/script2.py +4 -0
- eval_dataset/NameError/script3.py +8 -0
- eval_dataset/NameError/test_script1.py +5 -0
- eval_dataset/NameError/test_script2.py +4 -0
- eval_dataset/NameError/test_script3.py +4 -0
- eval_dataset/TypeError/patched_script1.py +3 -0
- eval_dataset/TypeError/patched_script2.py +6 -0
- eval_dataset/TypeError/patched_script3.py +3 -0
- eval_dataset/TypeError/script1.py +5 -0
- eval_dataset/TypeError/script2.py +7 -0
- eval_dataset/TypeError/script3.py +4 -0
- eval_dataset/TypeError/test_script1.py +4 -0
- eval_dataset/TypeError/test_script2.py +5 -0
- eval_dataset/TypeError/test_script3.py +5 -0
- pyfix_agent-1.0.0.dist-info/METADATA +175 -0
- pyfix_agent-1.0.0.dist-info/RECORD +50 -0
- pyfix_agent-1.0.0.dist-info/WHEEL +5 -0
- pyfix_agent-1.0.0.dist-info/entry_points.txt +2 -0
- pyfix_agent-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyfix-agent
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: An autonomous, multi-turn AI debugging agent built from scratch using AST surgery.
|
|
5
|
+
Author: Jaswin Reddy
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Topic :: Software Development :: Debuggers
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: huggingface_hub>=0.20.0
|
|
15
|
+
|
|
16
|
+
# 🚀 PyFix Agent: Autonomous ReAct Debugging Loop
|
|
17
|
+
|
|
18
|
+
An autonomous, multi-turn AI debugging agent built entirely from scratch in Python.
|
|
19
|
+
|
|
20
|
+
Unlike standard wrappers that simply ask an LLM to "fix this code," **PyFix Agent** implements a custom **ReAct (Reasoning and Acting)** state machine and utilizes **Abstract Syntax Tree (AST)** manipulation to surgically patch Python files in real-time. It evaluates its own fixes by executing the code inside a sandboxed subprocess, iterating dynamically until the script passes or it reaches the maximum iteration limit.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## 🧠 Core Architecture
|
|
25
|
+
|
|
26
|
+
This project deliberately avoids high-level agentic abstractions (such as LangChain or LlamaIndex) to build the core agentic loop from first principles.
|
|
27
|
+
|
|
28
|
+
```mermaid
|
|
29
|
+
graph TD
|
|
30
|
+
A[Start] --> B[Execute target script via Subprocess]
|
|
31
|
+
B --> C{Execution successful?}
|
|
32
|
+
C -- Yes --> D[Stop: Bug Fixed 🎉]
|
|
33
|
+
C -- No --> E[Extract Stack Trace & Error Message]
|
|
34
|
+
E --> F[Parse Stack Trace for last function name]
|
|
35
|
+
F --> G[Construct LLM Prompt with Context Memory]
|
|
36
|
+
G --> H[Query LLM for patch]
|
|
37
|
+
H --> I[Clean LLM markdown and parse AST]
|
|
38
|
+
I --> J{Function-level error?}
|
|
39
|
+
J -- Yes --> K[Use AST surgery to replace target function node]
|
|
40
|
+
J -- No --> L[Fallback: Replace entire file]
|
|
41
|
+
K --> M[Write patched script to disk]
|
|
42
|
+
L --> M
|
|
43
|
+
M --> N{Max iterations reached?}
|
|
44
|
+
N -- Yes --> O[Stop: Max iterations reached ❌]
|
|
45
|
+
N -- No --> B
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Key Architectural Pillars
|
|
49
|
+
|
|
50
|
+
1. **Execution Engine**: Runs the target script via Python subprocesses, capturing standard outputs, standard errors, and stack traces with safety timeout thresholds.
|
|
51
|
+
2. **Context Memory**: Maintains a chronological conversation history array, allowing the LLM to learn from its previously failed patching attempts without losing the original code context.
|
|
52
|
+
3. **AST Surgery**: Parses the LLM's response and uses Python's native `ast.NodeTransformer` to swap out broken function nodes with the corrected logic, leaving the rest of the file entirely untouched.
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## ⚖️ Design Choices & Trade-offs
|
|
57
|
+
|
|
58
|
+
Building an autonomous agent requires balancing safety, context window limits, and real-world unpredictability.
|
|
59
|
+
|
|
60
|
+
### 1. AST Function Surgery vs. Full File Overwrites
|
|
61
|
+
* **The Problem**: Asking an LLM to rewrite an entire 1,000-line script to fix a single typo is slow, expensive, and risks the model "truncating" or getting lazy with existing, working code.
|
|
62
|
+
* **The Solution**: The agent extracts the specific `function_name` from the traceback. It prompts the LLM only for the corrected function. The `PythonSurgery` class (inheriting from `ast.NodeTransformer`) then traverses the syntax tree, finds the broken `ast.FunctionDef`, and seamlessly swaps it with the new node.
|
|
63
|
+
* **The Trade-off**: While this guarantees perfect preservation of unrelated code, it requires specialized routing logic for errors that occur at the top-level `<module>` scope, which bypass the AST function surgery and require full-file patching.
|
|
64
|
+
|
|
65
|
+
### 2. Execution-Based Evaluation vs. Exact String Matching
|
|
66
|
+
* **The Problem**: How do we benchmark if the agent successfully fixed a bug? Traditional exact string matching fails because the LLM might use different variable names (e.g., `x += 1` instead of `x = x + 1`), resulting in false negatives.
|
|
67
|
+
* **The Solution**: The evaluation suite uses **Execution-Based Benchmarking**. The benchmark dynamically runs automated unit tests or validation scripts containing assertion statements against the patched files. If the patched script exits with code 0, it is marked as a success.
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## 📊 Evaluation Benchmark
|
|
72
|
+
|
|
73
|
+
The agent is evaluated against a curated dataset of scripts spanning 5 distinct error categories:
|
|
74
|
+
* **NameError**: Undefined variables, scope issues, and missing imports.
|
|
75
|
+
* **IndexError**: Off-by-one loop conditions and bounds checking.
|
|
76
|
+
* **TypeError**: Data type mismatches and unsupported operations.
|
|
77
|
+
* **AttributeError**: Typographical errors in object methods or calling methods on `NoneType`.
|
|
78
|
+
* **Logic Bugs**: Silent errors that require execution-based assertions to detect.
|
|
79
|
+
|
|
80
|
+
*(Currently evaluated against an automated function-level testing benchmark suite inside `eval_dataset/`)*
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## 🛠️ Installation & Usage
|
|
85
|
+
|
|
86
|
+
### Option 1: Standard Pip Installation (Recommended)
|
|
87
|
+
|
|
88
|
+
To install PyFix Agent locally in editable mode (which registers the CLI tool globally):
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Clone the repository
|
|
92
|
+
git clone https://github.com/yourusername/agent-debugging-loop.git
|
|
93
|
+
cd agent-debugging-loop
|
|
94
|
+
|
|
95
|
+
# Install package in editable mode
|
|
96
|
+
pip install -e .
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
This registers the global CLI tool `pyfix-agent` which can be executed from anywhere.
|
|
100
|
+
|
|
101
|
+
### Option 2: Run as a Python Script
|
|
102
|
+
|
|
103
|
+
If you prefer to run it without installing the package:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
pip install huggingface_hub
|
|
107
|
+
python pyfix_agent.py --script <path_to_script>
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Configuration
|
|
111
|
+
|
|
112
|
+
Export your Hugging Face Hub token to your environment variables to ensure secure API access:
|
|
113
|
+
|
|
114
|
+
**Bash (Linux/macOS):**
|
|
115
|
+
```bash
|
|
116
|
+
export HF_TOKEN="your_huggingface_token_here"
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**PowerShell (Windows):**
|
|
120
|
+
```powershell
|
|
121
|
+
$env:HF_TOKEN="your_huggingface_token_here"
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## 🚀 CLI Usage Guide
|
|
127
|
+
|
|
128
|
+
Point the agent at any broken Python script. Use the `--verbose` flag to watch the ReAct state machine's internal thought process.
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
# Run with default Qwen model
|
|
132
|
+
pyfix-agent --script my_broken_code.py --verbose --max_iter 5
|
|
133
|
+
|
|
134
|
+
# Run using a specific Hugging Face model
|
|
135
|
+
pyfix-agent --script my_broken_code.py --model "mistralai/Mixtral-8x7B-Instruct-v0.1" --max_iter 3
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### CLI Command Options
|
|
139
|
+
|
|
140
|
+
| Argument | Type | Default | Description |
|
|
141
|
+
|---|---|---|---|
|
|
142
|
+
| `--script` | `str` | *Required* | Path to the broken Python script to debug |
|
|
143
|
+
| `--max_iter` | `int` | `5` | Maximum number of debugging iterations |
|
|
144
|
+
| `--verbose` | `flag` | `False` | Enable logging of reasoning, tracebacks, and raw LLM responses |
|
|
145
|
+
| `--model` | `str` | `Qwen/Qwen2.5-72B-Instruct:cheapest` | Model endpoint ID on Hugging Face Serverless API |
|
|
146
|
+
|
|
147
|
+
### Running the Evaluation Benchmark
|
|
148
|
+
|
|
149
|
+
To run the full evaluation suite against the benchmark:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
python benchmark.py
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## 📦 Packaging & Release Recommendations
|
|
158
|
+
|
|
159
|
+
For releasing version 1.0.0 of **PyFix Agent** as a CLI tool:
|
|
160
|
+
|
|
161
|
+
### 1. Direct Python Package (Recommended for Python users)
|
|
162
|
+
Distribute PyFix Agent as a Python package via PyPI.
|
|
163
|
+
* **Build tool**: Use `build` (`pip install build`) to compile source distribution `.tar.gz` and wheel `.whl` files.
|
|
164
|
+
* **Upload tool**: Use `twine` to publish the artifacts to PyPI.
|
|
165
|
+
* **Install**: Users can install it directly with `pip install pyfix-agent` or run isolated using `pipx run pyfix-agent`.
|
|
166
|
+
|
|
167
|
+
### 2. Standalone Binary Executable (Recommended for Non-Python users)
|
|
168
|
+
Compile the script into a standalone executable using `PyInstaller`.
|
|
169
|
+
* **Compile**:
|
|
170
|
+
```bash
|
|
171
|
+
pip install pyinstaller
|
|
172
|
+
pyinstaller --onefile --name pyfix-agent pyfix_agent.py
|
|
173
|
+
```
|
|
174
|
+
* **Release Artifact**: Upload the compiled executable (`dist/pyfix-agent` or `dist/pyfix-agent.exe`) directly as a release asset in your GitHub Releases.
|
|
175
|
+
* *Note: The target environment still needs a Python interpreter installed to execute target scripts via `sys.executable`.*
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
eval_dataset/AttributeError/patched_script1.py,sha256=lH2owaBK062iOSgiQXoRixSgWfKPBpQ0f-fpF6kONgY,118
|
|
2
|
+
eval_dataset/AttributeError/patched_script2.py,sha256=IAiV4-pcqygm0GiqavPL1-IPCYTftcncr46U9BNRtXg,188
|
|
3
|
+
eval_dataset/AttributeError/patched_script3.py,sha256=ncDuden57gaXoSOtkntXhhz_lFJtELgoEHtb3JrLriQ,132
|
|
4
|
+
eval_dataset/AttributeError/script1.py,sha256=E7FKoS1PhGRakn_4zqHJ_gE-5LfjE27L34UaeO6JeWw,120
|
|
5
|
+
eval_dataset/AttributeError/script2.py,sha256=KVmNnHU179hyz3qcW8s0KigI9jnIUD41uijZolIRYjs,171
|
|
6
|
+
eval_dataset/AttributeError/script3.py,sha256=kjye14exdmKMIXW-xGBBw4_zkLfAxC4O8rn7dOp98ZI,165
|
|
7
|
+
eval_dataset/AttributeError/test_script1.py,sha256=GIi3NVw677UkA2tiCHDNRLuixcYwgYO-9IwGBbU_Nw0,186
|
|
8
|
+
eval_dataset/AttributeError/test_script2.py,sha256=z1PcCvnPIyZYTQdN7qrkBIf4t2HejCLsUheVwTUoE9c,217
|
|
9
|
+
eval_dataset/AttributeError/test_script3.py,sha256=eKG6oHUXfdtAEFq9pqhBckzc-9-p0P9w8lZMfRkO6Ik,190
|
|
10
|
+
eval_dataset/IndexError/patched_script1.py,sha256=v1FEjwLRs-xhIDoMM7sIEDvmG3l-6Fga8tFJ2ZFssxU,182
|
|
11
|
+
eval_dataset/IndexError/patched_script2.py,sha256=JEIPSUOLlv5fgDeJR2HTacWarI2lXDQAwM44gTBopK8,173
|
|
12
|
+
eval_dataset/IndexError/patched_script3.py,sha256=Bz72gx70jSsmcrV1wMBI46mtAq5IEt1rnL2xtmie76Y,103
|
|
13
|
+
eval_dataset/IndexError/script1.py,sha256=YBi1tGjrIUuQDzQ3gDhnn_bP2QuWmrwGoOo6dEQABH4,130
|
|
14
|
+
eval_dataset/IndexError/script2.py,sha256=gGR3euXQdlg1Ps_9nBsk9lr1BBx-tzweULvqWTIDccU,183
|
|
15
|
+
eval_dataset/IndexError/script3.py,sha256=lRIFNInmgXq9NM-ycjJ9Hx8nP4AIm02FKhKVskhR2Ao,65
|
|
16
|
+
eval_dataset/IndexError/test_script1.py,sha256=2HhbFPge2VJVZuFQeWBtL5qUOctSGwE4CbYRG3u8RSc,159
|
|
17
|
+
eval_dataset/IndexError/test_script2.py,sha256=DvWhP5nT0X5AR3QcakSj0f31yZJOF6h8HiOZRHuRYmM,168
|
|
18
|
+
eval_dataset/IndexError/test_script3.py,sha256=VQoVWGSgRUUbXTjG0UV-VPVsKs_Un7hWaiu7OUs3Tyw,127
|
|
19
|
+
eval_dataset/LogicBugs/patched_script1.py,sha256=wpX21nt4rXlDkjswLRkoTiGwZ1bEIgo_fOqbgvFWTjQ,107
|
|
20
|
+
eval_dataset/LogicBugs/patched_script2.py,sha256=wsGm-n7FVpUfdxkyRoGEqhqFuajqLOvL9h6dLk11v00,60
|
|
21
|
+
eval_dataset/LogicBugs/patched_script3.py,sha256=GToKyMIWF4n3v57QoEjVTYB_jxv6CfkntMfnEmEIN2E,92
|
|
22
|
+
eval_dataset/LogicBugs/script1.py,sha256=1eM6eYRiMEcY2ZW_y7IdykYz9IoYjCx92Yyv-SFJr4M,107
|
|
23
|
+
eval_dataset/LogicBugs/script2.py,sha256=DB9fdGCY3lsZQt4wyx4mqVavWTNCf6EZKCoz1y9JuV0,60
|
|
24
|
+
eval_dataset/LogicBugs/script3.py,sha256=x-fjTsaeva8W6LdbG-bNOwHLAHJtZ_VCESH779wUleY,92
|
|
25
|
+
eval_dataset/LogicBugs/test_script1.py,sha256=zIejCUs5wUedqju5NhvP_CZRUmBc6PWHqOrXTbHip9Y,119
|
|
26
|
+
eval_dataset/LogicBugs/test_script2.py,sha256=l08Ztlz7p5KYh2ZyLqpBYMerkCj-1CI3_gJufq3_YjY,93
|
|
27
|
+
eval_dataset/LogicBugs/test_script3.py,sha256=HqEOCMR3NdZ7EyqBuvfSMxDWi-CnGTkcBIxbdx_2aC8,126
|
|
28
|
+
eval_dataset/NameError/patched_script1.py,sha256=wBkN0NNaNLN53OgZpYFRSoOHso9xk4lNnE1jr87gcdU,126
|
|
29
|
+
eval_dataset/NameError/patched_script2.py,sha256=ClPn0Z9b_K9Lx6LEdDI6hHHGuHoZ39Dqn8KZo2KZTc4,93
|
|
30
|
+
eval_dataset/NameError/patched_script3.py,sha256=v0XYvRFw0RUNegYThJ9j0-NpBjfzjq0_v9XkPAy5ef0,186
|
|
31
|
+
eval_dataset/NameError/script1.py,sha256=G2oulAeKfBqWzwkR2bHjeCXp9XREyHVxSiaqYPT-DvU,129
|
|
32
|
+
eval_dataset/NameError/script2.py,sha256=TlRG3GYQ1uMsHL1u6zDUJZ-stV2xSvAORyJHrhrtYpg,78
|
|
33
|
+
eval_dataset/NameError/script3.py,sha256=7DeRoXXxoYUo6Qn6jZ4SqnlFhSCqq7-dV2Ee9ryEy1w,187
|
|
34
|
+
eval_dataset/NameError/test_script1.py,sha256=AWYfB4JaVg8w9kqlYxaqNNI5XNM1JDfzoWJYeWDyMcY,182
|
|
35
|
+
eval_dataset/NameError/test_script2.py,sha256=uVlxiYujHUA-Zlzo84jJRhQiTpInKPqjNDXZgnNqyWk,109
|
|
36
|
+
eval_dataset/NameError/test_script3.py,sha256=ykcZ5sDcuGbqUmCD67j3SphyfV2xnthkXhNNZNhrdBE,173
|
|
37
|
+
eval_dataset/TypeError/patched_script1.py,sha256=-B2vcea6MrvtF6ueaS1UuwZ2GvHh8HSQRpxwnV5awSk,87
|
|
38
|
+
eval_dataset/TypeError/patched_script2.py,sha256=YC2cTII3Ldj6gH8azD4aFu7hlfDsCt0XoSiZqeQbn30,145
|
|
39
|
+
eval_dataset/TypeError/patched_script3.py,sha256=zIUw1N0_eQTGso4hNSVQzueglXELWIAV6BNj2zqin-k,86
|
|
40
|
+
eval_dataset/TypeError/script1.py,sha256=gwaoA0186U2JTO2J5S77_zyVb0Rosy-5kv2887mj8B0,88
|
|
41
|
+
eval_dataset/TypeError/script2.py,sha256=TpPHxCBBlKOfwB2Z_hHKWef5oKXcVuJIuF0ehfVpK4c,144
|
|
42
|
+
eval_dataset/TypeError/script3.py,sha256=xPWU0pBfBTpf3MMdNo52dACEY83CVwoTeexpoO0h9U0,86
|
|
43
|
+
eval_dataset/TypeError/test_script1.py,sha256=x6bCVLWJict7MQAHGbbKkA7gALkcwtJz2n3ZKaygZTQ,129
|
|
44
|
+
eval_dataset/TypeError/test_script2.py,sha256=_pe32MB-Nj79vnRnbX4n_Spx-FMmKP9olZkS-GLuBag,122
|
|
45
|
+
eval_dataset/TypeError/test_script3.py,sha256=7iGiaij8vf-2okd4tp-5_PqDk680kgBq0J0jcttXRXs,167
|
|
46
|
+
pyfix_agent-1.0.0.dist-info/METADATA,sha256=5DKJb0dhN2nbGTv5HWG-xecIwf4MuFnkdNge3kk1IgY,7899
|
|
47
|
+
pyfix_agent-1.0.0.dist-info/WHEEL,sha256=K260EYznzXsJYBQGqmI8VTxEdiZYNvDZwW9cBh9-_MA,91
|
|
48
|
+
pyfix_agent-1.0.0.dist-info/entry_points.txt,sha256=cOrLCVV3N52-aRYIPPgpX2xPA_UyCrunCwOheeKpLCM,49
|
|
49
|
+
pyfix_agent-1.0.0.dist-info/top_level.txt,sha256=iZTUFeaLUxAhukvYAgg-T4827Qd8Ovhc17hd9pZ8TgM,13
|
|
50
|
+
pyfix_agent-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
eval_dataset
|