pyfix-agent 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. pyfix_agent-1.0.0/PKG-INFO +175 -0
  2. pyfix_agent-1.0.0/README.md +160 -0
  3. pyfix_agent-1.0.0/eval_dataset/AttributeError/patched_script1.py +5 -0
  4. pyfix_agent-1.0.0/eval_dataset/AttributeError/patched_script2.py +5 -0
  5. pyfix_agent-1.0.0/eval_dataset/AttributeError/patched_script3.py +6 -0
  6. pyfix_agent-1.0.0/eval_dataset/AttributeError/script1.py +6 -0
  7. pyfix_agent-1.0.0/eval_dataset/AttributeError/script2.py +6 -0
  8. pyfix_agent-1.0.0/eval_dataset/AttributeError/script3.py +8 -0
  9. pyfix_agent-1.0.0/eval_dataset/AttributeError/test_script1.py +9 -0
  10. pyfix_agent-1.0.0/eval_dataset/AttributeError/test_script2.py +5 -0
  11. pyfix_agent-1.0.0/eval_dataset/AttributeError/test_script3.py +5 -0
  12. pyfix_agent-1.0.0/eval_dataset/IndexError/patched_script1.py +7 -0
  13. pyfix_agent-1.0.0/eval_dataset/IndexError/patched_script2.py +7 -0
  14. pyfix_agent-1.0.0/eval_dataset/IndexError/patched_script3.py +5 -0
  15. pyfix_agent-1.0.0/eval_dataset/IndexError/script1.py +5 -0
  16. pyfix_agent-1.0.0/eval_dataset/IndexError/script2.py +9 -0
  17. pyfix_agent-1.0.0/eval_dataset/IndexError/script3.py +4 -0
  18. pyfix_agent-1.0.0/eval_dataset/IndexError/test_script1.py +5 -0
  19. pyfix_agent-1.0.0/eval_dataset/IndexError/test_script2.py +5 -0
  20. pyfix_agent-1.0.0/eval_dataset/IndexError/test_script3.py +5 -0
  21. pyfix_agent-1.0.0/eval_dataset/LogicBugs/patched_script1.py +6 -0
  22. pyfix_agent-1.0.0/eval_dataset/LogicBugs/patched_script2.py +4 -0
  23. pyfix_agent-1.0.0/eval_dataset/LogicBugs/patched_script3.py +6 -0
  24. pyfix_agent-1.0.0/eval_dataset/LogicBugs/script1.py +6 -0
  25. pyfix_agent-1.0.0/eval_dataset/LogicBugs/script2.py +4 -0
  26. pyfix_agent-1.0.0/eval_dataset/LogicBugs/script3.py +6 -0
  27. pyfix_agent-1.0.0/eval_dataset/LogicBugs/test_script1.py +5 -0
  28. pyfix_agent-1.0.0/eval_dataset/LogicBugs/test_script2.py +4 -0
  29. pyfix_agent-1.0.0/eval_dataset/LogicBugs/test_script3.py +5 -0
  30. pyfix_agent-1.0.0/eval_dataset/NameError/patched_script1.py +4 -0
  31. pyfix_agent-1.0.0/eval_dataset/NameError/patched_script2.py +4 -0
  32. pyfix_agent-1.0.0/eval_dataset/NameError/patched_script3.py +7 -0
  33. pyfix_agent-1.0.0/eval_dataset/NameError/script1.py +5 -0
  34. pyfix_agent-1.0.0/eval_dataset/NameError/script2.py +4 -0
  35. pyfix_agent-1.0.0/eval_dataset/NameError/script3.py +8 -0
  36. pyfix_agent-1.0.0/eval_dataset/NameError/test_script1.py +5 -0
  37. pyfix_agent-1.0.0/eval_dataset/NameError/test_script2.py +4 -0
  38. pyfix_agent-1.0.0/eval_dataset/NameError/test_script3.py +4 -0
  39. pyfix_agent-1.0.0/eval_dataset/TypeError/patched_script1.py +3 -0
  40. pyfix_agent-1.0.0/eval_dataset/TypeError/patched_script2.py +6 -0
  41. pyfix_agent-1.0.0/eval_dataset/TypeError/patched_script3.py +3 -0
  42. pyfix_agent-1.0.0/eval_dataset/TypeError/script1.py +5 -0
  43. pyfix_agent-1.0.0/eval_dataset/TypeError/script2.py +7 -0
  44. pyfix_agent-1.0.0/eval_dataset/TypeError/script3.py +4 -0
  45. pyfix_agent-1.0.0/eval_dataset/TypeError/test_script1.py +4 -0
  46. pyfix_agent-1.0.0/eval_dataset/TypeError/test_script2.py +5 -0
  47. pyfix_agent-1.0.0/eval_dataset/TypeError/test_script3.py +5 -0
  48. pyfix_agent-1.0.0/pyfix_agent.egg-info/PKG-INFO +175 -0
  49. pyfix_agent-1.0.0/pyfix_agent.egg-info/SOURCES.txt +53 -0
  50. pyfix_agent-1.0.0/pyfix_agent.egg-info/dependency_links.txt +1 -0
  51. pyfix_agent-1.0.0/pyfix_agent.egg-info/entry_points.txt +2 -0
  52. pyfix_agent-1.0.0/pyfix_agent.egg-info/requires.txt +1 -0
  53. pyfix_agent-1.0.0/pyfix_agent.egg-info/top_level.txt +1 -0
  54. pyfix_agent-1.0.0/pyproject.toml +27 -0
  55. pyfix_agent-1.0.0/setup.cfg +4 -0
@@ -0,0 +1,175 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyfix-agent
3
+ Version: 1.0.0
4
+ Summary: An autonomous, multi-turn AI debugging agent built from scratch using AST surgery.
5
+ Author: Jaswin Reddy
6
+ License: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Environment :: Console
11
+ Classifier: Topic :: Software Development :: Debuggers
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: huggingface_hub>=0.20.0
15
+
16
+ # 🚀 PyFix Agent: Autonomous ReAct Debugging Loop
17
+
18
+ An autonomous, multi-turn AI debugging agent built entirely from scratch in Python.
19
+
20
+ Unlike standard wrappers that simply ask an LLM to "fix this code," **PyFix Agent** implements a custom **ReAct (Reasoning and Acting)** state machine and utilizes **Abstract Syntax Tree (AST)** manipulation to surgically patch Python files in real-time. It evaluates its own fixes by executing the code inside a sandboxed subprocess, iterating dynamically until the script passes or it reaches the maximum iteration limit.
21
+
22
+ ---
23
+
24
+ ## 🧠 Core Architecture
25
+
26
+ This project deliberately avoids high-level agentic abstractions (such as LangChain or LlamaIndex) to build the core agentic loop from first principles.
27
+
28
+ ```mermaid
29
+ graph TD
30
+ A[Start] --> B[Execute target script via Subprocess]
31
+ B --> C{Execution successful?}
32
+ C -- Yes --> D[Stop: Bug Fixed 🎉]
33
+ C -- No --> E[Extract Stack Trace & Error Message]
34
+ E --> F[Parse Stack Trace for last function name]
35
+ F --> G[Construct LLM Prompt with Context Memory]
36
+ G --> H[Query LLM for patch]
37
+ H --> I[Clean LLM markdown and parse AST]
38
+ I --> J{Function-level error?}
39
+ J -- Yes --> K[Use AST surgery to replace target function node]
40
+ J -- No --> L[Fallback: Replace entire file]
41
+ K --> M[Write patched script to disk]
42
+ L --> M
43
+ M --> N{Max iterations reached?}
44
+ N -- Yes --> O[Stop: Max iterations reached ❌]
45
+ N -- No --> B
46
+ ```
47
+
48
+ ### Key Architectural Pillars
49
+
50
+ 1. **Execution Engine**: Runs the target script via Python subprocesses, capturing standard outputs, standard errors, and stack traces with safety timeout thresholds.
51
+ 2. **Context Memory**: Maintains a chronological conversation history array, allowing the LLM to learn from its previously failed patching attempts without losing the original code context.
52
+ 3. **AST Surgery**: Parses the LLM's response and uses Python's native `ast.NodeTransformer` to swap out broken function nodes with the corrected logic, leaving the rest of the file entirely untouched.
53
+
54
+ ---
55
+
56
+ ## ⚖️ Design Choices & Trade-offs
57
+
58
+ Building an autonomous agent requires balancing safety, context window limits, and real-world unpredictability.
59
+
60
+ ### 1. AST Function Surgery vs. Full File Overwrites
61
+ * **The Problem**: Asking an LLM to rewrite an entire 1,000-line script to fix a single typo is slow, expensive, and risks the model "truncating" or getting lazy with existing, working code.
62
+ * **The Solution**: The agent extracts the specific `function_name` from the traceback. It prompts the LLM only for the corrected function. The `PythonSurgery` class (inheriting from `ast.NodeTransformer`) then traverses the syntax tree, finds the broken `ast.FunctionDef`, and seamlessly swaps it with the new node.
63
+ * **The Trade-off**: While this guarantees perfect preservation of unrelated code, it requires specialized routing logic for errors that occur at the top-level `<module>` scope, which bypass the AST function surgery and require full-file patching.
64
+
65
+ ### 2. Execution-Based Evaluation vs. Exact String Matching
66
+ * **The Problem**: How do we benchmark if the agent successfully fixed a bug? Traditional exact string matching fails because the LLM might use different variable names (e.g., `x += 1` instead of `x = x + 1`), resulting in false negatives.
67
+ * **The Solution**: The evaluation suite uses **Execution-Based Benchmarking**. The benchmark dynamically runs automated unit tests or validation scripts containing assertion statements against the patched files. If the patched script exits with code 0, it is marked as a success.
68
+
69
+ ---
70
+
71
+ ## 📊 Evaluation Benchmark
72
+
73
+ The agent is evaluated against a curated dataset of scripts spanning 5 distinct error categories:
74
+ * **NameError**: Undefined variables, scope issues, and missing imports.
75
+ * **IndexError**: Off-by-one loop conditions and bounds checking.
76
+ * **TypeError**: Data type mismatches and unsupported operations.
77
+ * **AttributeError**: Typographical errors in object methods or calling methods on `NoneType`.
78
+ * **Logic Bugs**: Silent errors that require execution-based assertions to detect.
79
+
80
+ *(Currently evaluated against an automated function-level testing benchmark suite inside `eval_dataset/`)*
81
+
82
+ ---
83
+
84
+ ## 🛠️ Installation & Usage
85
+
86
+ ### Option 1: Standard Pip Installation (Recommended)
87
+
88
+ To install PyFix Agent locally in editable mode (which registers the CLI tool globally):
89
+
90
+ ```bash
91
+ # Clone the repository
92
+ git clone https://github.com/yourusername/agent-debugging-loop.git
93
+ cd agent-debugging-loop
94
+
95
+ # Install package in editable mode
96
+ pip install -e .
97
+ ```
98
+
99
+ This registers the global CLI tool `pyfix-agent` which can be executed from anywhere.
100
+
101
+ ### Option 2: Run as a Python Script
102
+
103
+ If you prefer to run it without installing the package:
104
+
105
+ ```bash
106
+ pip install huggingface_hub
107
+ python pyfix_agent.py --script <path_to_script>
108
+ ```
109
+
110
+ ### Configuration
111
+
112
+ Export your Hugging Face Hub token to your environment variables to ensure secure API access:
113
+
114
+ **Bash (Linux/macOS):**
115
+ ```bash
116
+ export HF_TOKEN="your_huggingface_token_here"
117
+ ```
118
+
119
+ **PowerShell (Windows):**
120
+ ```powershell
121
+ $env:HF_TOKEN="your_huggingface_token_here"
122
+ ```
123
+
124
+ ---
125
+
126
+ ## 🚀 CLI Usage Guide
127
+
128
+ Point the agent at any broken Python script. Use the `--verbose` flag to watch the ReAct state machine's internal thought process.
129
+
130
+ ```bash
131
+ # Run with default Qwen model
132
+ pyfix-agent --script my_broken_code.py --verbose --max_iter 5
133
+
134
+ # Run using a specific Hugging Face model
135
+ pyfix-agent --script my_broken_code.py --model "mistralai/Mixtral-8x7B-Instruct-v0.1" --max_iter 3
136
+ ```
137
+
138
+ ### CLI Command Options
139
+
140
+ | Argument | Type | Default | Description |
141
+ |---|---|---|---|
142
+ | `--script` | `str` | *Required* | Path to the broken Python script to debug |
143
+ | `--max_iter` | `int` | `5` | Maximum number of debugging iterations |
144
+ | `--verbose` | `flag` | `False` | Enable logging of reasoning, tracebacks, and raw LLM responses |
145
+ | `--model` | `str` | `Qwen/Qwen2.5-72B-Instruct:cheapest` | Model endpoint ID on Hugging Face Serverless API |
146
+
147
+ ### Running the Evaluation Benchmark
148
+
149
+ To run the full evaluation suite against the benchmark:
150
+
151
+ ```bash
152
+ python benchmark.py
153
+ ```
154
+
155
+ ---
156
+
157
+ ## 📦 Packaging & Release Recommendations
158
+
159
+ For releasing version 1.0.0 of **PyFix Agent** as a CLI tool:
160
+
161
+ ### 1. Direct Python Package (Recommended for Python users)
162
+ Distribute PyFix Agent as a Python package via PyPI.
163
+ * **Build tool**: Use `build` (`pip install build`) to compile source distribution `.tar.gz` and wheel `.whl` files.
164
+ * **Upload tool**: Use `twine` to publish the artifacts to PyPI.
165
+ * **Install**: Users can install it directly with `pip install pyfix-agent` or run isolated using `pipx run pyfix-agent`.
166
+
167
+ ### 2. Standalone Binary Executable (Recommended for Non-Python users)
168
+ Compile the script into a standalone executable using `PyInstaller`.
169
+ * **Compile**:
170
+ ```bash
171
+ pip install pyinstaller
172
+ pyinstaller --onefile --name pyfix-agent pyfix_agent.py
173
+ ```
174
+ * **Release Artifact**: Upload the compiled executable (`dist/pyfix-agent` or `dist/pyfix-agent.exe`) directly as a release asset in your GitHub Releases.
175
+ * *Note: The target environment still needs a Python interpreter installed to execute target scripts via `sys.executable`.*
@@ -0,0 +1,160 @@
1
+ # 🚀 PyFix Agent: Autonomous ReAct Debugging Loop
2
+
3
+ An autonomous, multi-turn AI debugging agent built entirely from scratch in Python.
4
+
5
+ Unlike standard wrappers that simply ask an LLM to "fix this code," **PyFix Agent** implements a custom **ReAct (Reasoning and Acting)** state machine and utilizes **Abstract Syntax Tree (AST)** manipulation to surgically patch Python files in real-time. It evaluates its own fixes by executing the code inside a sandboxed subprocess, iterating dynamically until the script passes or it reaches the maximum iteration limit.
6
+
7
+ ---
8
+
9
+ ## 🧠 Core Architecture
10
+
11
+ This project deliberately avoids high-level agentic abstractions (such as LangChain or LlamaIndex) to build the core agentic loop from first principles.
12
+
13
+ ```mermaid
14
+ graph TD
15
+ A[Start] --> B[Execute target script via Subprocess]
16
+ B --> C{Execution successful?}
17
+ C -- Yes --> D[Stop: Bug Fixed 🎉]
18
+ C -- No --> E[Extract Stack Trace & Error Message]
19
+ E --> F[Parse Stack Trace for last function name]
20
+ F --> G[Construct LLM Prompt with Context Memory]
21
+ G --> H[Query LLM for patch]
22
+ H --> I[Clean LLM markdown and parse AST]
23
+ I --> J{Function-level error?}
24
+ J -- Yes --> K[Use AST surgery to replace target function node]
25
+ J -- No --> L[Fallback: Replace entire file]
26
+ K --> M[Write patched script to disk]
27
+ L --> M
28
+ M --> N{Max iterations reached?}
29
+ N -- Yes --> O[Stop: Max iterations reached ❌]
30
+ N -- No --> B
31
+ ```
32
+
33
+ ### Key Architectural Pillars
34
+
35
+ 1. **Execution Engine**: Runs the target script via Python subprocesses, capturing standard outputs, standard errors, and stack traces with safety timeout thresholds.
36
+ 2. **Context Memory**: Maintains a chronological conversation history array, allowing the LLM to learn from its previously failed patching attempts without losing the original code context.
37
+ 3. **AST Surgery**: Parses the LLM's response and uses Python's native `ast.NodeTransformer` to swap out broken function nodes with the corrected logic, leaving the rest of the file entirely untouched.
38
+
39
+ ---
40
+
41
+ ## ⚖️ Design Choices & Trade-offs
42
+
43
+ Building an autonomous agent requires balancing safety, context window limits, and real-world unpredictability.
44
+
45
+ ### 1. AST Function Surgery vs. Full File Overwrites
46
+ * **The Problem**: Asking an LLM to rewrite an entire 1,000-line script to fix a single typo is slow, expensive, and risks the model "truncating" or getting lazy with existing, working code.
47
+ * **The Solution**: The agent extracts the specific `function_name` from the traceback. It prompts the LLM only for the corrected function. The `PythonSurgery` class (inheriting from `ast.NodeTransformer`) then traverses the syntax tree, finds the broken `ast.FunctionDef`, and seamlessly swaps it with the new node.
48
+ * **The Trade-off**: While this guarantees perfect preservation of unrelated code, it requires specialized routing logic for errors that occur at the top-level `<module>` scope, which bypass the AST function surgery and require full-file patching.
49
+
50
+ ### 2. Execution-Based Evaluation vs. Exact String Matching
51
+ * **The Problem**: How do we benchmark if the agent successfully fixed a bug? Traditional exact string matching fails because the LLM might use different variable names (e.g., `x += 1` instead of `x = x + 1`), resulting in false negatives.
52
+ * **The Solution**: The evaluation suite uses **Execution-Based Benchmarking**. The benchmark dynamically runs automated unit tests or validation scripts containing assertion statements against the patched files. If the patched script exits with code 0, it is marked as a success.
53
+
54
+ ---
55
+
56
+ ## 📊 Evaluation Benchmark
57
+
58
+ The agent is evaluated against a curated dataset of scripts spanning 5 distinct error categories:
59
+ * **NameError**: Undefined variables, scope issues, and missing imports.
60
+ * **IndexError**: Off-by-one loop conditions and bounds checking.
61
+ * **TypeError**: Data type mismatches and unsupported operations.
62
+ * **AttributeError**: Typographical errors in object methods or calling methods on `NoneType`.
63
+ * **Logic Bugs**: Silent errors that require execution-based assertions to detect.
64
+
65
+ *(Currently evaluated against an automated function-level testing benchmark suite inside `eval_dataset/`)*
66
+
67
+ ---
68
+
69
+ ## 🛠️ Installation & Usage
70
+
71
+ ### Option 1: Standard Pip Installation (Recommended)
72
+
73
+ To install PyFix Agent locally in editable mode (which registers the CLI tool globally):
74
+
75
+ ```bash
76
+ # Clone the repository
77
+ git clone https://github.com/yourusername/agent-debugging-loop.git
78
+ cd agent-debugging-loop
79
+
80
+ # Install package in editable mode
81
+ pip install -e .
82
+ ```
83
+
84
+ This registers the global CLI tool `pyfix-agent` which can be executed from anywhere.
85
+
86
+ ### Option 2: Run as a Python Script
87
+
88
+ If you prefer to run it without installing the package:
89
+
90
+ ```bash
91
+ pip install huggingface_hub
92
+ python pyfix_agent.py --script <path_to_script>
93
+ ```
94
+
95
+ ### Configuration
96
+
97
+ Export your Hugging Face Hub token to your environment variables to ensure secure API access:
98
+
99
+ **Bash (Linux/macOS):**
100
+ ```bash
101
+ export HF_TOKEN="your_huggingface_token_here"
102
+ ```
103
+
104
+ **PowerShell (Windows):**
105
+ ```powershell
106
+ $env:HF_TOKEN="your_huggingface_token_here"
107
+ ```
108
+
109
+ ---
110
+
111
+ ## 🚀 CLI Usage Guide
112
+
113
+ Point the agent at any broken Python script. Use the `--verbose` flag to watch the ReAct state machine's internal thought process.
114
+
115
+ ```bash
116
+ # Run with default Qwen model
117
+ pyfix-agent --script my_broken_code.py --verbose --max_iter 5
118
+
119
+ # Run using a specific Hugging Face model
120
+ pyfix-agent --script my_broken_code.py --model "mistralai/Mixtral-8x7B-Instruct-v0.1" --max_iter 3
121
+ ```
122
+
123
+ ### CLI Command Options
124
+
125
+ | Argument | Type | Default | Description |
126
+ |---|---|---|---|
127
+ | `--script` | `str` | *Required* | Path to the broken Python script to debug |
128
+ | `--max_iter` | `int` | `5` | Maximum number of debugging iterations |
129
+ | `--verbose` | `flag` | `False` | Enable logging of reasoning, tracebacks, and raw LLM responses |
130
+ | `--model` | `str` | `Qwen/Qwen2.5-72B-Instruct:cheapest` | Model endpoint ID on Hugging Face Serverless API |
131
+
132
+ ### Running the Evaluation Benchmark
133
+
134
+ To run the full evaluation suite against the benchmark:
135
+
136
+ ```bash
137
+ python benchmark.py
138
+ ```
139
+
140
+ ---
141
+
142
+ ## 📦 Packaging & Release Recommendations
143
+
144
+ For releasing version 1.0.0 of **PyFix Agent** as a CLI tool:
145
+
146
+ ### 1. Direct Python Package (Recommended for Python users)
147
+ Distribute PyFix Agent as a Python package via PyPI.
148
+ * **Build tool**: Use `build` (`pip install build`) to compile source distribution `.tar.gz` and wheel `.whl` files.
149
+ * **Upload tool**: Use `twine` to publish the artifacts to PyPI.
150
+ * **Install**: Users can install it directly with `pip install pyfix-agent` or run isolated using `pipx run pyfix-agent`.
151
+
152
+ ### 2. Standalone Binary Executable (Recommended for Non-Python users)
153
+ Compile the script into a standalone executable using `PyInstaller`.
154
+ * **Compile**:
155
+ ```bash
156
+ pip install pyinstaller
157
+ pyinstaller --onefile --name pyfix-agent pyfix_agent.py
158
+ ```
159
+ * **Release Artifact**: Upload the compiled executable (`dist/pyfix-agent` or `dist/pyfix-agent.exe`) directly as a release asset in your GitHub Releases.
160
+ * *Note: The target environment still needs a Python interpreter installed to execute target scripts via `sys.executable`.*
@@ -0,0 +1,5 @@
1
+ def buy_item(inventory, item):
2
+ inventory.append(item)
3
+ inventory = [1, 2, 3]
4
+ item = 4
5
+ buy_item(inventory, item)
@@ -0,0 +1,5 @@
1
+ words_list = ['python', 'programming', 'is', 'fun']
2
+
3
+ def capitalize_all_words(words_list):
4
+ return [word.capitalize() for word in words_list]
5
+ print(capitalize_all_words(words_list))
@@ -0,0 +1,6 @@
1
+ arr = [4, 2, 1, 3]
2
+
3
+ def decreasing_order_list(arr):
4
+ arr.sort(reverse=True)
5
+ return arr
6
+ print(decreasing_order_list(arr))
@@ -0,0 +1,6 @@
1
+ def buy_item(inventory, item):
2
+ inventory.appned(item)
3
+
4
+ inventory = [1, 2, 3]
5
+ item = 4
6
+ buy_item(inventory, item)
@@ -0,0 +1,6 @@
1
+ words_list = ["python", "programming", "is", "fun"]
2
+
3
+ def capitalize_all_words(words_list):
4
+ return words_list.capitalize()
5
+
6
+ print(capitalize_all_words(words_list))
@@ -0,0 +1,8 @@
1
+ arr = [4,2,1,3]
2
+
3
+ def decreasing_order_list(arr):
4
+ sorted_arr = arr.sort()
5
+ sorted_arr.reverse()
6
+ return sorted_arr
7
+
8
+ print(decreasing_order_list(arr))
@@ -0,0 +1,9 @@
1
+ from patched_script1 import buy_item
2
+
3
+ inventory = [1,3,4]
4
+ buy_item(inventory, 2)
5
+ assert inventory == [1, 3, 4, 2]
6
+
7
+ inventory = []
8
+ buy_item(inventory, 5)
9
+ assert inventory == [5]
@@ -0,0 +1,5 @@
1
+ from patched_script2 import capitalize_all_words
2
+
3
+ assert capitalize_all_words(["hello", "world"]) == ["Hello", "World"]
4
+ assert capitalize_all_words([]) == []
5
+ assert capitalize_all_words(["python"]) == ["Python"]
@@ -0,0 +1,5 @@
1
+ from patched_script3 import decreasing_order_list
2
+
3
+ assert decreasing_order_list([4,2,1,3]) == [4, 3, 2, 1]
4
+ assert decreasing_order_list([]) == []
5
+ assert decreasing_order_list([1]) == [1]
@@ -0,0 +1,7 @@
1
+ def get_last_element(arr):
2
+ if len(arr) == 0:
3
+ return None
4
+ return arr[len(arr) - 1]
5
+ arr = [1, 2, 3, 4, 5]
6
+ print(get_last_element(arr))
7
+ print(get_last_element([]))
@@ -0,0 +1,7 @@
1
+ def sum_array_elements(arr):
2
+ total = 0
3
+ for i in range(len(arr)):
4
+ total += arr[i]
5
+ return total
6
+ arr = [10, 20, 30, 40]
7
+ print(sum_array_elements(arr))
@@ -0,0 +1,5 @@
1
+ def get_tail(items):
2
+ if not items:
3
+ return None
4
+ return items[-1]
5
+ print(get_tail([]))
@@ -0,0 +1,5 @@
1
+ def get_last_element(arr):
2
+ return arr[len(arr)]
3
+ arr = [1,2,3,4,5]
4
+ print(get_last_element(arr))
5
+ print(get_last_element([]))
@@ -0,0 +1,9 @@
1
+ def sum_array_elements(arr):
2
+ total = 0
3
+ for i in range(len(arr) + 1):
4
+ total += arr[i]
5
+ return total
6
+
7
+
8
+ arr = [10, 20, 30, 40]
9
+ print(sum_array_elements(arr))
@@ -0,0 +1,4 @@
1
+ def get_tail(items):
2
+ return items[-1]
3
+
4
+ print(get_tail([]))
@@ -0,0 +1,5 @@
1
+ from patched_script1 import get_last_element
2
+
3
+ assert get_last_element([1, 2, 3]) == 3
4
+ assert get_last_element([1]) == 1
5
+ assert get_last_element([]) == None
@@ -0,0 +1,5 @@
1
+ from patched_script2 import sum_array_elements
2
+
3
+ assert sum_array_elements([1, 2, 3, 4]) == 10
4
+ assert sum_array_elements([]) == 0
5
+ assert sum_array_elements([1]) == 1
@@ -0,0 +1,5 @@
1
+ from patched_script3 import get_tail
2
+
3
+ assert get_tail([1, 2, 3]) == 3
4
+ assert get_tail([]) == None
5
+ assert get_tail([1]) == 1
@@ -0,0 +1,6 @@
1
+ def factorial(n):
2
+ if n == 0:
3
+ return 1
4
+ return n * factorial(n - 1)
5
+
6
+ print(factorial(5))
@@ -0,0 +1,4 @@
1
+ def is_even(n):
2
+ return n % 2 == 0
3
+
4
+ print(is_even(10))
@@ -0,0 +1,6 @@
1
+ def find_max(a, b):
2
+ if a < b:
3
+ return b
4
+ return a
5
+
6
+ print(find_max(10, 5))
@@ -0,0 +1,6 @@
1
+ def factorial(n):
2
+ if n == 0:
3
+ return 0
4
+ return n * factorial(n - 1)
5
+
6
+ print(factorial(5))
@@ -0,0 +1,4 @@
1
+ def is_even(n):
2
+ return n % 2 == 1
3
+
4
+ print(is_even(10))
@@ -0,0 +1,6 @@
1
+ def find_max(a, b):
2
+ if a < b:
3
+ return a
4
+ return b
5
+
6
+ print(find_max(10, 5))
@@ -0,0 +1,5 @@
1
+ from patched_script1 import factorial
2
+
3
+ assert factorial(5) == 120
4
+ assert factorial(0) == 1
5
+ assert factorial(1) == 1
@@ -0,0 +1,4 @@
1
+ from patched_script2 import is_even
2
+
3
+ assert is_even(10) == True
4
+ assert is_even(9) == False
@@ -0,0 +1,5 @@
1
+ from patched_script3 import find_max
2
+
3
+ assert find_max(5, 9) == 9
4
+ assert find_max(-10, -5) == -5
5
+ assert find_max(7, 3) == 7
@@ -0,0 +1,4 @@
1
+ def calculate_circle_area(radius):
2
+ pi_value = 3.14159
3
+ return pi_value * radius ** 2
4
+ print(calculate_circle_area(5))
@@ -0,0 +1,4 @@
1
+ def get_square_root(n):
2
+ import math
3
+ return math.sqrt(n)
4
+ print(get_square_root(25))
@@ -0,0 +1,7 @@
1
+ n1 = 'David'
2
+ n2 = 'Smith'
3
+
4
+ def format_greeting(first_name, last_name):
5
+ full_name = first_name + ' ' + last_name
6
+ return f'Hello, {full_name}!'
7
+ print(format_greeting(n1, n2))
@@ -0,0 +1,5 @@
1
+ def calculate_circle_area(radius):
2
+ pi_value = 3.14159
3
+ return p_value * (radius ** 2)
4
+
5
+ print(calculate_circle_area(5))
@@ -0,0 +1,4 @@
1
+ def get_square_root(n):
2
+ return math.sqrt(n)
3
+
4
+ print(get_square_root(25))
@@ -0,0 +1,8 @@
1
+ n1 = "David"
2
+ n2 = "Smith"
3
+
4
+ def format_greeting(first_name, last_name):
5
+ full_name = first_name + " " + last_name
6
+ return f"Hello, {fullname}!"
7
+
8
+ print(format_greeting(n1, n2))
@@ -0,0 +1,5 @@
1
+ from patched_script1 import calculate_circle_area
2
+
3
+ assert calculate_circle_area(5) == 3.14159 * 25
4
+ assert calculate_circle_area(0) == 0
5
+ assert calculate_circle_area(1) == 3.14159
@@ -0,0 +1,4 @@
1
+ from patched_script2 import get_square_root
2
+
3
+ assert get_square_root(4) == 2
4
+ assert get_square_root(9) == 3
@@ -0,0 +1,4 @@
1
+ from patched_script3 import format_greeting
2
+
3
+ assert format_greeting("David", "Smith") == "Hello, David Smith!"
4
+ assert format_greeting("John", "Doe") == "Hello, John Doe!"
@@ -0,0 +1,3 @@
1
+ def message(age):
2
+ return 'You are ' + str(age) + ' years old.'
3
+ print(message(10))
@@ -0,0 +1,6 @@
1
+ def multiply_three_numbers(a, b, c):
2
+ return a * b * c
3
+
4
+ def calculate(a):
5
+ return multiply_three_numbers(5, 10, a)
6
+ print(calculate(1))
@@ -0,0 +1,3 @@
1
+ def append_value(arr, val):
2
+ return arr + [val]
3
+ print(append_value([1, 2, 3], 4))
@@ -0,0 +1,5 @@
1
+ def message(age):
2
+ return "You are " + age + " years old."
3
+
4
+ print(message(10))
5
+
@@ -0,0 +1,7 @@
1
+ def multiply_three_numbers(a, b, c):
2
+ return a * b * c
3
+
4
+ def calculate(a):
5
+ return multiply_three_numbers(5, 10)
6
+
7
+ print(calculate(1))
@@ -0,0 +1,4 @@
1
+ def append_value(arr, val):
2
+ return arr + val
3
+
4
+ print(append_value([1, 2, 3], 4))
@@ -0,0 +1,4 @@
1
+ from patched_script1 import message
2
+
3
+ assert message(10) == "You are 10 years old."
4
+ assert message(0) == "You are 0 years old."
@@ -0,0 +1,5 @@
1
+ from patched_script2 import calculate
2
+
3
+ assert calculate(1) == 50
4
+ assert calculate(2) == 100
5
+ assert calculate(3) == 150
@@ -0,0 +1,5 @@
1
+ from patched_script3 import append_value
2
+
3
+ assert append_value([1, 2, 3], 4) == [1, 2, 3, 4]
4
+ assert append_value([], 1) == [1]
5
+ assert append_value([1], 2) == [1, 2]
@@ -0,0 +1,175 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyfix-agent
3
+ Version: 1.0.0
4
+ Summary: An autonomous, multi-turn AI debugging agent built from scratch using AST surgery.
5
+ Author: Jaswin Reddy
6
+ License: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Environment :: Console
11
+ Classifier: Topic :: Software Development :: Debuggers
12
+ Requires-Python: >=3.10
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: huggingface_hub>=0.20.0
15
+
16
+ # 🚀 PyFix Agent: Autonomous ReAct Debugging Loop
17
+
18
+ An autonomous, multi-turn AI debugging agent built entirely from scratch in Python.
19
+
20
+ Unlike standard wrappers that simply ask an LLM to "fix this code," **PyFix Agent** implements a custom **ReAct (Reasoning and Acting)** state machine and utilizes **Abstract Syntax Tree (AST)** manipulation to surgically patch Python files in real-time. It evaluates its own fixes by executing the code inside a sandboxed subprocess, iterating dynamically until the script passes or it reaches the maximum iteration limit.
21
+
22
+ ---
23
+
24
+ ## 🧠 Core Architecture
25
+
26
+ This project deliberately avoids high-level agentic abstractions (such as LangChain or LlamaIndex) to build the core agentic loop from first principles.
27
+
28
+ ```mermaid
29
+ graph TD
30
+ A[Start] --> B[Execute target script via Subprocess]
31
+ B --> C{Execution successful?}
32
+ C -- Yes --> D[Stop: Bug Fixed 🎉]
33
+ C -- No --> E[Extract Stack Trace & Error Message]
34
+ E --> F[Parse Stack Trace for last function name]
35
+ F --> G[Construct LLM Prompt with Context Memory]
36
+ G --> H[Query LLM for patch]
37
+ H --> I[Clean LLM markdown and parse AST]
38
+ I --> J{Function-level error?}
39
+ J -- Yes --> K[Use AST surgery to replace target function node]
40
+ J -- No --> L[Fallback: Replace entire file]
41
+ K --> M[Write patched script to disk]
42
+ L --> M
43
+ M --> N{Max iterations reached?}
44
+ N -- Yes --> O[Stop: Max iterations reached ❌]
45
+ N -- No --> B
46
+ ```
47
+
48
+ ### Key Architectural Pillars
49
+
50
+ 1. **Execution Engine**: Runs the target script via Python subprocesses, capturing standard outputs, standard errors, and stack traces with safety timeout thresholds.
51
+ 2. **Context Memory**: Maintains a chronological conversation history array, allowing the LLM to learn from its previously failed patching attempts without losing the original code context.
52
+ 3. **AST Surgery**: Parses the LLM's response and uses Python's native `ast.NodeTransformer` to swap out broken function nodes with the corrected logic, leaving the rest of the file entirely untouched.
53
+
54
+ ---
55
+
56
+ ## ⚖️ Design Choices & Trade-offs
57
+
58
+ Building an autonomous agent requires balancing safety, context window limits, and real-world unpredictability.
59
+
60
+ ### 1. AST Function Surgery vs. Full File Overwrites
61
+ * **The Problem**: Asking an LLM to rewrite an entire 1,000-line script to fix a single typo is slow, expensive, and risks the model "truncating" or getting lazy with existing, working code.
62
+ * **The Solution**: The agent extracts the specific `function_name` from the traceback. It prompts the LLM only for the corrected function. The `PythonSurgery` class (inheriting from `ast.NodeTransformer`) then traverses the syntax tree, finds the broken `ast.FunctionDef`, and seamlessly swaps it with the new node.
63
+ * **The Trade-off**: While this guarantees perfect preservation of unrelated code, it requires specialized routing logic for errors that occur at the top-level `<module>` scope, which bypass the AST function surgery and require full-file patching.
64
+
65
+ ### 2. Execution-Based Evaluation vs. Exact String Matching
66
+ * **The Problem**: How do we benchmark if the agent successfully fixed a bug? Traditional exact string matching fails because the LLM might use different variable names (e.g., `x += 1` instead of `x = x + 1`), resulting in false negatives.
67
+ * **The Solution**: The evaluation suite uses **Execution-Based Benchmarking**. The benchmark dynamically runs automated unit tests or validation scripts containing assertion statements against the patched files. If the patched script exits with code 0, it is marked as a success.
68
+
69
+ ---
70
+
71
+ ## 📊 Evaluation Benchmark
72
+
73
+ The agent is evaluated against a curated dataset of scripts spanning 5 distinct error categories:
74
+ * **NameError**: Undefined variables, scope issues, and missing imports.
75
+ * **IndexError**: Off-by-one loop conditions and bounds checking.
76
+ * **TypeError**: Data type mismatches and unsupported operations.
77
+ * **AttributeError**: Typographical errors in object methods or calling methods on `NoneType`.
78
+ * **Logic Bugs**: Silent errors that require execution-based assertions to detect.
79
+
80
+ *(Currently evaluated against an automated function-level testing benchmark suite inside `eval_dataset/`)*
81
+
82
+ ---
83
+
84
+ ## 🛠️ Installation & Usage
85
+
86
+ ### Option 1: Standard Pip Installation (Recommended)
87
+
88
+ To install PyFix Agent locally in editable mode (which registers the CLI tool globally):
89
+
90
+ ```bash
91
+ # Clone the repository
92
+ git clone https://github.com/yourusername/agent-debugging-loop.git
93
+ cd agent-debugging-loop
94
+
95
+ # Install package in editable mode
96
+ pip install -e .
97
+ ```
98
+
99
+ This registers the global CLI tool `pyfix-agent` which can be executed from anywhere.
100
+
101
+ ### Option 2: Run as a Python Script
102
+
103
+ If you prefer to run it without installing the package:
104
+
105
+ ```bash
106
+ pip install huggingface_hub
107
+ python pyfix_agent.py --script <path_to_script>
108
+ ```
109
+
110
+ ### Configuration
111
+
112
+ Export your Hugging Face Hub token to your environment variables to ensure secure API access:
113
+
114
+ **Bash (Linux/macOS):**
115
+ ```bash
116
+ export HF_TOKEN="your_huggingface_token_here"
117
+ ```
118
+
119
+ **PowerShell (Windows):**
120
+ ```powershell
121
+ $env:HF_TOKEN="your_huggingface_token_here"
122
+ ```
123
+
124
+ ---
125
+
126
+ ## 🚀 CLI Usage Guide
127
+
128
+ Point the agent at any broken Python script. Use the `--verbose` flag to watch the ReAct state machine's internal thought process.
129
+
130
+ ```bash
131
+ # Run with default Qwen model
132
+ pyfix-agent --script my_broken_code.py --verbose --max_iter 5
133
+
134
+ # Run using a specific Hugging Face model
135
+ pyfix-agent --script my_broken_code.py --model "mistralai/Mixtral-8x7B-Instruct-v0.1" --max_iter 3
136
+ ```
137
+
138
+ ### CLI Command Options
139
+
140
+ | Argument | Type | Default | Description |
141
+ |---|---|---|---|
142
+ | `--script` | `str` | *Required* | Path to the broken Python script to debug |
143
+ | `--max_iter` | `int` | `5` | Maximum number of debugging iterations |
144
+ | `--verbose` | `flag` | `False` | Enable logging of reasoning, tracebacks, and raw LLM responses |
145
+ | `--model` | `str` | `Qwen/Qwen2.5-72B-Instruct:cheapest` | Model endpoint ID on Hugging Face Serverless API |
146
+
147
+ ### Running the Evaluation Benchmark
148
+
149
+ To run the full evaluation suite against the benchmark:
150
+
151
+ ```bash
152
+ python benchmark.py
153
+ ```
154
+
155
+ ---
156
+
157
+ ## 📦 Packaging & Release Recommendations
158
+
159
+ For releasing version 1.0.0 of **PyFix Agent** as a CLI tool:
160
+
161
+ ### 1. Direct Python Package (Recommended for Python users)
162
+ Distribute PyFix Agent as a Python package via PyPI.
163
+ * **Build tool**: Use `build` (`pip install build`) to compile source distribution `.tar.gz` and wheel `.whl` files.
164
+ * **Upload tool**: Use `twine` to publish the artifacts to PyPI.
165
+ * **Install**: Users can install it directly with `pip install pyfix-agent` or run isolated using `pipx run pyfix-agent`.
166
+
167
+ ### 2. Standalone Binary Executable (Recommended for Non-Python users)
168
+ Compile the script into a standalone executable using `PyInstaller`.
169
+ * **Compile**:
170
+ ```bash
171
+ pip install pyinstaller
172
+ pyinstaller --onefile --name pyfix-agent pyfix_agent.py
173
+ ```
174
+ * **Release Artifact**: Upload the compiled executable (`dist/pyfix-agent` or `dist/pyfix-agent.exe`) directly as a release asset in your GitHub Releases.
175
+ * *Note: The target environment still needs a Python interpreter installed to execute target scripts via `sys.executable`.*
@@ -0,0 +1,53 @@
1
+ README.md
2
+ pyproject.toml
3
+ eval_dataset/AttributeError/patched_script1.py
4
+ eval_dataset/AttributeError/patched_script2.py
5
+ eval_dataset/AttributeError/patched_script3.py
6
+ eval_dataset/AttributeError/script1.py
7
+ eval_dataset/AttributeError/script2.py
8
+ eval_dataset/AttributeError/script3.py
9
+ eval_dataset/AttributeError/test_script1.py
10
+ eval_dataset/AttributeError/test_script2.py
11
+ eval_dataset/AttributeError/test_script3.py
12
+ eval_dataset/IndexError/patched_script1.py
13
+ eval_dataset/IndexError/patched_script2.py
14
+ eval_dataset/IndexError/patched_script3.py
15
+ eval_dataset/IndexError/script1.py
16
+ eval_dataset/IndexError/script2.py
17
+ eval_dataset/IndexError/script3.py
18
+ eval_dataset/IndexError/test_script1.py
19
+ eval_dataset/IndexError/test_script2.py
20
+ eval_dataset/IndexError/test_script3.py
21
+ eval_dataset/LogicBugs/patched_script1.py
22
+ eval_dataset/LogicBugs/patched_script2.py
23
+ eval_dataset/LogicBugs/patched_script3.py
24
+ eval_dataset/LogicBugs/script1.py
25
+ eval_dataset/LogicBugs/script2.py
26
+ eval_dataset/LogicBugs/script3.py
27
+ eval_dataset/LogicBugs/test_script1.py
28
+ eval_dataset/LogicBugs/test_script2.py
29
+ eval_dataset/LogicBugs/test_script3.py
30
+ eval_dataset/NameError/patched_script1.py
31
+ eval_dataset/NameError/patched_script2.py
32
+ eval_dataset/NameError/patched_script3.py
33
+ eval_dataset/NameError/script1.py
34
+ eval_dataset/NameError/script2.py
35
+ eval_dataset/NameError/script3.py
36
+ eval_dataset/NameError/test_script1.py
37
+ eval_dataset/NameError/test_script2.py
38
+ eval_dataset/NameError/test_script3.py
39
+ eval_dataset/TypeError/patched_script1.py
40
+ eval_dataset/TypeError/patched_script2.py
41
+ eval_dataset/TypeError/patched_script3.py
42
+ eval_dataset/TypeError/script1.py
43
+ eval_dataset/TypeError/script2.py
44
+ eval_dataset/TypeError/script3.py
45
+ eval_dataset/TypeError/test_script1.py
46
+ eval_dataset/TypeError/test_script2.py
47
+ eval_dataset/TypeError/test_script3.py
48
+ pyfix_agent.egg-info/PKG-INFO
49
+ pyfix_agent.egg-info/SOURCES.txt
50
+ pyfix_agent.egg-info/dependency_links.txt
51
+ pyfix_agent.egg-info/entry_points.txt
52
+ pyfix_agent.egg-info/requires.txt
53
+ pyfix_agent.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ pyfix-agent = pyfix_agent:main
@@ -0,0 +1 @@
1
+ huggingface_hub>=0.20.0
@@ -0,0 +1 @@
1
+ eval_dataset
@@ -0,0 +1,27 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "pyfix-agent"
7
+ version = "1.0.0"
8
+ description = "An autonomous, multi-turn AI debugging agent built from scratch using AST surgery."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Jaswin Reddy"}
14
+ ]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ "Environment :: Console",
20
+ "Topic :: Software Development :: Debuggers",
21
+ ]
22
+ dependencies = [
23
+ "huggingface_hub>=0.20.0",
24
+ ]
25
+
26
+ [project.scripts]
27
+ pyfix-agent = "pyfix_agent:main"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+