repgen-ai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repgen_ai-0.1.0/MANIFEST.in +3 -0
- repgen_ai-0.1.0/PKG-INFO +199 -0
- repgen_ai-0.1.0/README.md +160 -0
- repgen_ai-0.1.0/pyproject.toml +45 -0
- repgen_ai-0.1.0/repgen/__init__.py +51 -0
- repgen_ai-0.1.0/repgen/__pycache__/__init__.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/__pycache__/cli.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/__pycache__/core.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/__pycache__/server.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/__pycache__/utils.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/cli.py +375 -0
- repgen_ai-0.1.0/repgen/core.py +239 -0
- repgen_ai-0.1.0/repgen/retrieval/__init__.py +4 -0
- repgen_ai-0.1.0/repgen/retrieval/__pycache__/__init__.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/__pycache__/config.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/__pycache__/pipeline.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/config.py +53 -0
- repgen_ai-0.1.0/repgen/retrieval/core/__init__.py +0 -0
- repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/__init__.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/code_indexer.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/dependency_analyzer.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/module_analyzer.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/training_code_detector.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/utils.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/core/code_indexer.py +138 -0
- repgen_ai-0.1.0/repgen/retrieval/core/dependency_analyzer.py +121 -0
- repgen_ai-0.1.0/repgen/retrieval/core/module_analyzer.py +65 -0
- repgen_ai-0.1.0/repgen/retrieval/core/training_code_detector.py +240 -0
- repgen_ai-0.1.0/repgen/retrieval/core/utils.py +52 -0
- repgen_ai-0.1.0/repgen/retrieval/models/__init__.py +0 -0
- repgen_ai-0.1.0/repgen/retrieval/models/__pycache__/__init__.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/models/__pycache__/hybrid_search.cpython-313.pyc +0 -0
- repgen_ai-0.1.0/repgen/retrieval/models/hybrid_search.py +151 -0
- repgen_ai-0.1.0/repgen/retrieval/pipeline.py +166 -0
- repgen_ai-0.1.0/repgen/server.py +111 -0
- repgen_ai-0.1.0/repgen/utils.py +550 -0
- repgen_ai-0.1.0/repgen_ai.egg-info/PKG-INFO +199 -0
- repgen_ai-0.1.0/repgen_ai.egg-info/SOURCES.txt +42 -0
- repgen_ai-0.1.0/repgen_ai.egg-info/dependency_links.txt +1 -0
- repgen_ai-0.1.0/repgen_ai.egg-info/requires.txt +21 -0
- repgen_ai-0.1.0/repgen_ai.egg-info/top_level.txt +1 -0
- repgen_ai-0.1.0/requirements.txt +21 -0
- repgen_ai-0.1.0/setup.cfg +4 -0
- repgen_ai-0.1.0/setup.py +35 -0
repgen_ai-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: repgen-ai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Automated reproduction generation for bug reports using LLMs
|
|
5
|
+
Home-page: https://github.com/mehilshah/RepGen
|
|
6
|
+
Author: Mehil B. Shah
|
|
7
|
+
Author-email: Mehil Shah <shahmehil@dal.ca>
|
|
8
|
+
Project-URL: Homepage, https://github.com/mehilshah/RepGen
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/mehilshah/RepGen/issues
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: annoy
|
|
16
|
+
Requires-Dist: numpy
|
|
17
|
+
Requires-Dist: pylint
|
|
18
|
+
Requires-Dist: rank_bm25
|
|
19
|
+
Requires-Dist: requests
|
|
20
|
+
Requires-Dist: scikit_learn
|
|
21
|
+
Requires-Dist: sentence_transformers
|
|
22
|
+
Requires-Dist: torch
|
|
23
|
+
Requires-Dist: transformers
|
|
24
|
+
Requires-Dist: pandas
|
|
25
|
+
Requires-Dist: openai
|
|
26
|
+
Requires-Dist: rich
|
|
27
|
+
Requires-Dist: mkdocs
|
|
28
|
+
Requires-Dist: mkdocs-material
|
|
29
|
+
Requires-Dist: watchfiles
|
|
30
|
+
Requires-Dist: black
|
|
31
|
+
Requires-Dist: isort
|
|
32
|
+
Requires-Dist: mypy
|
|
33
|
+
Requires-Dist: flake8
|
|
34
|
+
Requires-Dist: pre-commit
|
|
35
|
+
Requires-Dist: rich-argparse
|
|
36
|
+
Dynamic: author
|
|
37
|
+
Dynamic: home-page
|
|
38
|
+
Dynamic: requires-python
|
|
39
|
+
|
|
40
|
+
# RepGen - Automated Bug Reproduction
|
|
41
|
+
|
|
42
|
+
<div align="center">
|
|
43
|
+
|
|
44
|
+
<p align="center">
|
|
45
|
+
<img src="https://img.shields.io/badge/Status-Active%20Research-4CAF50?style=flat-square" />
|
|
46
|
+
<img src="https://img.shields.io/badge/Python-3.12-3776AB?style=flat-square&logo=python&logoColor=white" />
|
|
47
|
+
<img src="https://img.shields.io/github/actions/workflow/status/mehilshah/RepGen/ci.yml?branch=main&label=Build&style=flat-square&logo=github" />
|
|
48
|
+
<img src="https://img.shields.io/badge/Execution-Dockerized-2496ED?style=flat-square&logo=docker&logoColor=white" />
|
|
49
|
+
<img src="https://img.shields.io/badge/License-MIT-lightgrey?style=flat-square" />
|
|
50
|
+
</p>
|
|
51
|
+
|
|
52
|
+
[Features](#features) • [Quick Start](#quick-start) • [VS Code](#vs-code-extension) • [Documentation](#documentation) • [Paper](https://arxiv.org/abs/2512.14990)
|
|
53
|
+
|
|
54
|
+
</div>
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
## Overview
|
|
58
|
+
|
|
59
|
+
**RepGen** is a production-grade tool that leverages state-of-the-art Large Language Models (LLMs) to automatically reproduce bugs in software libraries. By analyzing bug reports and repository context, RepGen plans a reproduction strategy and generates executable Python scripts to replicate the issue.
|
|
60
|
+
|
|
61
|
+
Ideally suited for Deep Learning libraries, RepGen automates the tedious first step of debugging: creating a Minimum Reproducible Example (MRE).
|
|
62
|
+
|
|
63
|
+
## Features
|
|
64
|
+
|
|
65
|
+
- **Smart Retrieval**: Uses hybrid search (BM25 + Semantic) to find relevant code snippets and training loops for context.
|
|
66
|
+
- **Multi-Backend Support**: Seamlessly switch between LLM providers:
|
|
67
|
+
- **Ollama**: Run locally with models like `qwen2.5-coder`, `llama3`, or `mistral`.
|
|
68
|
+
- **OpenAI**: Utilize `gpt-4o` and `gpt-3.5-turbo` for high-precision generation.
|
|
69
|
+
- **Gemini** & **Claude**: leverage multimodal and reasoning capabilities.
|
|
70
|
+
- **Remote Input Handling**:
|
|
71
|
+
- Direct support for **GitHub Issue URLs** (fetches content via API).
|
|
72
|
+
- Direct support for **Git Repository URLs** (clones automatically to temp workspace).
|
|
73
|
+
- **Professional VS Code Extension**: A fully integrated sidebar to run reproductions directly in your editor.
|
|
74
|
+
- **Docker Ready**: Full containerization for easy deployment.
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Quick Start
|
|
79
|
+
|
|
80
|
+
The easiest way to run RepGen is using **Docker**. This ensures a consistent environment with all dependencies pre-installed.
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
# 1. Build the image
|
|
84
|
+
cd RepGen
|
|
85
|
+
docker build -t repgen .
|
|
86
|
+
|
|
87
|
+
# 2. Run the server (mounting the volume for development if needed)
|
|
88
|
+
docker run -p 8000:8000 repgen
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The API will be available at `http://localhost:8000`.
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Developer Setup
|
|
96
|
+
|
|
97
|
+
If you prefer to run locally or contribute to the core logic:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
# 1. Create a virtual environment
|
|
101
|
+
python3 -m venv venv
|
|
102
|
+
source venv/bin/activate # On Windows: .\venv\Scripts\activate
|
|
103
|
+
|
|
104
|
+
# 2. Install RepGen in editable mode
|
|
105
|
+
pip install -e .
|
|
106
|
+
|
|
107
|
+
# 3. Run the CLI
|
|
108
|
+
repgen
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## VS Code Extension
|
|
114
|
+
|
|
115
|
+
RepGen includes a premium VS Code extension for a seamless workflow.
|
|
116
|
+
|
|
117
|
+
### Features
|
|
118
|
+
- **Persistent Sidebar**: Stays active while you work.
|
|
119
|
+
- **Direct Code Display**: Generated reproduction scripts appear directly in the sidebar.
|
|
120
|
+
- **"Open in Editor"**: One-click to open generated code in a new editor tab for review.
|
|
121
|
+
- **Status Tracking**: Real-time progress indicators.
|
|
122
|
+
|
|
123
|
+
### Installation
|
|
124
|
+
1. Navigate to `extensions/vscode`.
|
|
125
|
+
2. Run `npm install` && `npm run compile`.
|
|
126
|
+
3. Open the folder in VS Code and press `F5` to launch the Extension Development Host.
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Ecosystem
|
|
131
|
+
|
|
132
|
+
### REST API
|
|
133
|
+
The core engine runs as a FastAPI service.
|
|
134
|
+
- **Start**: `uvicorn repgen.server:app --reload --reload-dir repgen`
|
|
135
|
+
- **Docs**: `http://localhost:8000/docs`
|
|
136
|
+
|
|
137
|
+
### Web Dashboard
|
|
138
|
+
A modern, React-based UI to manage reproduction tasks visually.
|
|
139
|
+
- **Location**: `ui/`
|
|
140
|
+
- **Start**: `cd ui && npm install && npm run dev`
|
|
141
|
+
|
|
142
|
+
### Browser Extension
|
|
143
|
+
Injects a "Reproduce" button directly into GitHub Issues.
|
|
144
|
+
- **Location**: `extensions/chrome/`
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## Usage Examples
|
|
149
|
+
|
|
150
|
+
### CLI Automation
|
|
151
|
+
```bash
|
|
152
|
+
repgen \
|
|
153
|
+
--bug-report https://github.com/owner/repo/issues/123 \
|
|
154
|
+
--repo-path https://github.com/owner/repo.git \
|
|
155
|
+
--backend openai \
|
|
156
|
+
--model gpt-4o
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Configuration
|
|
160
|
+
Set API keys via environment variables:
|
|
161
|
+
```bash
|
|
162
|
+
export OPENAI_API_KEY="sk-..."
|
|
163
|
+
export GEMINI_API_KEY="AIza..."
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## Contributing
|
|
169
|
+
|
|
170
|
+
Contributions are welcome! Please check out the issues tab or submit a PR.
|
|
171
|
+
|
|
172
|
+
## Issues
|
|
173
|
+
|
|
174
|
+
If you encounter any issues, please open a GitHub issue or contact Mehil Shah at [shahmehil@dal.ca](mailto:shahmehil@dal.ca).
|
|
175
|
+
|
|
176
|
+
## License
|
|
177
|
+
|
|
178
|
+
MIT © [Mehil Shah](https://github.com/mehilshah)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
## Future Work and Research Directions
|
|
182
|
+
|
|
183
|
+
1. **Scalable Execution via Cluster Schedulers**
|
|
184
|
+
Integrate RepGen with workload managers such as **SLURM** to offload reproduction tasks to HPC or GPU clusters. This would enable asynchronous execution, queue-based scheduling, and automated user notifications once reproduction artifacts are ready.
|
|
185
|
+
|
|
186
|
+
2. **Automated Verification in Isolated Sandboxes**
|
|
187
|
+
Extend RepGen with sandboxed execution environments that automatically validate whether a generated reproduction script successfully triggers the reported bug. This would close the loop between generation and confirmation.
|
|
188
|
+
|
|
189
|
+
3. **Bug-Type–Aware Verification Strategies**
|
|
190
|
+
Develop specialized verification mechanisms tailored to different bug classes (e.g., crashes, numerical instability, performance regressions, nondeterministic failures). Each class may require distinct success criteria and instrumentation.
|
|
191
|
+
|
|
192
|
+
4. **Fine-Grained Bug Localization and Understanding**
|
|
193
|
+
Move beyond reproduction toward **bug comprehension**, including identifying the most likely fault-inducing components, APIs, or configuration parameters involved in the failure.
|
|
194
|
+
|
|
195
|
+
5. **Understanding Practitioner Adoption Barriers**
|
|
196
|
+
Read papers about why practitioners underutilize automated debugging tools, and insights from these papers can guide usability improvements and feature prioritization.
|
|
197
|
+
|
|
198
|
+
6. **CI/CD and GitHub Actions Integration**
|
|
199
|
+
Integrate RepGen directly into **GitHub Actions** and other CI pipelines, enabling automated bug reproduction as part of issue triage, regression testing, or pull request validation workflows.
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# RepGen - Automated Bug Reproduction
|
|
2
|
+
|
|
3
|
+
<div align="center">
|
|
4
|
+
|
|
5
|
+
<p align="center">
|
|
6
|
+
<img src="https://img.shields.io/badge/Status-Active%20Research-4CAF50?style=flat-square" />
|
|
7
|
+
<img src="https://img.shields.io/badge/Python-3.12-3776AB?style=flat-square&logo=python&logoColor=white" />
|
|
8
|
+
<img src="https://img.shields.io/github/actions/workflow/status/mehilshah/RepGen/ci.yml?branch=main&label=Build&style=flat-square&logo=github" />
|
|
9
|
+
<img src="https://img.shields.io/badge/Execution-Dockerized-2496ED?style=flat-square&logo=docker&logoColor=white" />
|
|
10
|
+
<img src="https://img.shields.io/badge/License-MIT-lightgrey?style=flat-square" />
|
|
11
|
+
</p>
|
|
12
|
+
|
|
13
|
+
[Features](#features) • [Quick Start](#quick-start) • [VS Code](#vs-code-extension) • [Documentation](#documentation) • [Paper](https://arxiv.org/abs/2512.14990)
|
|
14
|
+
|
|
15
|
+
</div>
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## Overview
|
|
19
|
+
|
|
20
|
+
**RepGen** is a production-grade tool that leverages state-of-the-art Large Language Models (LLMs) to automatically reproduce bugs in software libraries. By analyzing bug reports and repository context, RepGen plans a reproduction strategy and generates executable Python scripts to replicate the issue.
|
|
21
|
+
|
|
22
|
+
Ideally suited for Deep Learning libraries, RepGen automates the tedious first step of debugging: creating a Minimum Reproducible Example (MRE).
|
|
23
|
+
|
|
24
|
+
## Features
|
|
25
|
+
|
|
26
|
+
- **Smart Retrieval**: Uses hybrid search (BM25 + Semantic) to find relevant code snippets and training loops for context.
|
|
27
|
+
- **Multi-Backend Support**: Seamlessly switch between LLM providers:
|
|
28
|
+
- **Ollama**: Run locally with models like `qwen2.5-coder`, `llama3`, or `mistral`.
|
|
29
|
+
- **OpenAI**: Utilize `gpt-4o` and `gpt-3.5-turbo` for high-precision generation.
|
|
30
|
+
- **Gemini** & **Claude**: leverage multimodal and reasoning capabilities.
|
|
31
|
+
- **Remote Input Handling**:
|
|
32
|
+
- Direct support for **GitHub Issue URLs** (fetches content via API).
|
|
33
|
+
- Direct support for **Git Repository URLs** (clones automatically to temp workspace).
|
|
34
|
+
- **Professional VS Code Extension**: A fully integrated sidebar to run reproductions directly in your editor.
|
|
35
|
+
- **Docker Ready**: Full containerization for easy deployment.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
The easiest way to run RepGen is using **Docker**. This ensures a consistent environment with all dependencies pre-installed.
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# 1. Build the image
|
|
45
|
+
cd RepGen
|
|
46
|
+
docker build -t repgen .
|
|
47
|
+
|
|
48
|
+
# 2. Run the server (mounting the volume for development if needed)
|
|
49
|
+
docker run -p 8000:8000 repgen
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
The API will be available at `http://localhost:8000`.
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Developer Setup
|
|
57
|
+
|
|
58
|
+
If you prefer to run locally or contribute to the core logic:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
# 1. Create a virtual environment
|
|
62
|
+
python3 -m venv venv
|
|
63
|
+
source venv/bin/activate # On Windows: .\venv\Scripts\activate
|
|
64
|
+
|
|
65
|
+
# 2. Install RepGen in editable mode
|
|
66
|
+
pip install -e .
|
|
67
|
+
|
|
68
|
+
# 3. Run the CLI
|
|
69
|
+
repgen
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## VS Code Extension
|
|
75
|
+
|
|
76
|
+
RepGen includes a premium VS Code extension for a seamless workflow.
|
|
77
|
+
|
|
78
|
+
### Features
|
|
79
|
+
- **Persistent Sidebar**: Stays active while you work.
|
|
80
|
+
- **Direct Code Display**: Generated reproduction scripts appear directly in the sidebar.
|
|
81
|
+
- **"Open in Editor"**: One-click to open generated code in a new editor tab for review.
|
|
82
|
+
- **Status Tracking**: Real-time progress indicators.
|
|
83
|
+
|
|
84
|
+
### Installation
|
|
85
|
+
1. Navigate to `extensions/vscode`.
|
|
86
|
+
2. Run `npm install` && `npm run compile`.
|
|
87
|
+
3. Open the folder in VS Code and press `F5` to launch the Extension Development Host.
|
|
88
|
+
|
|
89
|
+
---
|
|
90
|
+
|
|
91
|
+
## Ecosystem
|
|
92
|
+
|
|
93
|
+
### REST API
|
|
94
|
+
The core engine runs as a FastAPI service.
|
|
95
|
+
- **Start**: `uvicorn repgen.server:app --reload --reload-dir repgen`
|
|
96
|
+
- **Docs**: `http://localhost:8000/docs`
|
|
97
|
+
|
|
98
|
+
### Web Dashboard
|
|
99
|
+
A modern, React-based UI to manage reproduction tasks visually.
|
|
100
|
+
- **Location**: `ui/`
|
|
101
|
+
- **Start**: `cd ui && npm install && npm run dev`
|
|
102
|
+
|
|
103
|
+
### Browser Extension
|
|
104
|
+
Injects a "Reproduce" button directly into GitHub Issues.
|
|
105
|
+
- **Location**: `extensions/chrome/`
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Usage Examples
|
|
110
|
+
|
|
111
|
+
### CLI Automation
|
|
112
|
+
```bash
|
|
113
|
+
repgen \
|
|
114
|
+
--bug-report https://github.com/owner/repo/issues/123 \
|
|
115
|
+
--repo-path https://github.com/owner/repo.git \
|
|
116
|
+
--backend openai \
|
|
117
|
+
--model gpt-4o
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Configuration
|
|
121
|
+
Set API keys via environment variables:
|
|
122
|
+
```bash
|
|
123
|
+
export OPENAI_API_KEY="sk-..."
|
|
124
|
+
export GEMINI_API_KEY="AIza..."
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Contributing
|
|
130
|
+
|
|
131
|
+
Contributions are welcome! Please check out the issues tab or submit a PR.
|
|
132
|
+
|
|
133
|
+
## Issues
|
|
134
|
+
|
|
135
|
+
If you encounter any issues, please open a GitHub issue or contact Mehil Shah at [shahmehil@dal.ca](mailto:shahmehil@dal.ca).
|
|
136
|
+
|
|
137
|
+
## License
|
|
138
|
+
|
|
139
|
+
MIT © [Mehil Shah](https://github.com/mehilshah)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
## Future Work and Research Directions
|
|
143
|
+
|
|
144
|
+
1. **Scalable Execution via Cluster Schedulers**
|
|
145
|
+
Integrate RepGen with workload managers such as **SLURM** to offload reproduction tasks to HPC or GPU clusters. This would enable asynchronous execution, queue-based scheduling, and automated user notifications once reproduction artifacts are ready.
|
|
146
|
+
|
|
147
|
+
2. **Automated Verification in Isolated Sandboxes**
|
|
148
|
+
Extend RepGen with sandboxed execution environments that automatically validate whether a generated reproduction script successfully triggers the reported bug. This would close the loop between generation and confirmation.
|
|
149
|
+
|
|
150
|
+
3. **Bug-Type–Aware Verification Strategies**
|
|
151
|
+
Develop specialized verification mechanisms tailored to different bug classes (e.g., crashes, numerical instability, performance regressions, nondeterministic failures). Each class may require distinct success criteria and instrumentation.
|
|
152
|
+
|
|
153
|
+
4. **Fine-Grained Bug Localization and Understanding**
|
|
154
|
+
Move beyond reproduction toward **bug comprehension**, including identifying the most likely fault-inducing components, APIs, or configuration parameters involved in the failure.
|
|
155
|
+
|
|
156
|
+
5. **Understanding Practitioner Adoption Barriers**
|
|
157
|
+
Read papers about why practitioners underutilize automated debugging tools, and insights from these papers can guide usability improvements and feature prioritization.
|
|
158
|
+
|
|
159
|
+
6. **CI/CD and GitHub Actions Integration**
|
|
160
|
+
Integrate RepGen directly into **GitHub Actions** and other CI pipelines, enabling automated bug reproduction as part of issue triage, regression testing, or pull request validation workflows.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "repgen-ai"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Automated reproduction generation for bug reports using LLMs"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
authors = [
|
|
11
|
+
{ name = "Mehil Shah", email = "shahmehil@dal.ca" },
|
|
12
|
+
]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
]
|
|
18
|
+
requires-python = ">=3.8"
|
|
19
|
+
dependencies = [
|
|
20
|
+
"annoy",
|
|
21
|
+
"numpy",
|
|
22
|
+
"pylint",
|
|
23
|
+
"rank_bm25",
|
|
24
|
+
"requests",
|
|
25
|
+
"scikit_learn",
|
|
26
|
+
"sentence_transformers",
|
|
27
|
+
"torch",
|
|
28
|
+
"transformers",
|
|
29
|
+
"pandas",
|
|
30
|
+
"openai",
|
|
31
|
+
"rich",
|
|
32
|
+
"mkdocs",
|
|
33
|
+
"mkdocs-material",
|
|
34
|
+
"watchfiles",
|
|
35
|
+
"black",
|
|
36
|
+
"isort",
|
|
37
|
+
"mypy",
|
|
38
|
+
"flake8",
|
|
39
|
+
"pre-commit",
|
|
40
|
+
"rich-argparse",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
[project.urls]
|
|
44
|
+
"Homepage" = "https://github.com/mehilshah/RepGen"
|
|
45
|
+
"Bug Tracker" = "https://github.com/mehilshah/RepGen/issues"
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from .core import RepGenService
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def reproduce(
|
|
7
|
+
bug_report_source: str,
|
|
8
|
+
repo_source: str,
|
|
9
|
+
backend: str = "openai",
|
|
10
|
+
model: str = "gpt-4o",
|
|
11
|
+
api_key: Optional[str] = None,
|
|
12
|
+
commit: Optional[str] = None,
|
|
13
|
+
output_dir: str = "./repgen_results",
|
|
14
|
+
) -> str:
|
|
15
|
+
"""
|
|
16
|
+
Generates a reproduction script for a given bug report and repository.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
bug_report_source: URL or path to the bug report.
|
|
20
|
+
repo_source: URL or path to the repository.
|
|
21
|
+
backend: The LLM backend to use (default: "openai").
|
|
22
|
+
model: The model name to use (default: "gpt-4o").
|
|
23
|
+
api_key: API key for the backend (optional).
|
|
24
|
+
commit: Specific commit hash to checkout (optional).
|
|
25
|
+
output_dir: Directory to store intermediate artifacts (default: "./repgen_results").
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
The generated reproduction script as a string.
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
RuntimeError: If reproduction fails.
|
|
32
|
+
"""
|
|
33
|
+
service = RepGenService(output_dir=output_dir)
|
|
34
|
+
result = service.run_reproduction(
|
|
35
|
+
bug_report_source=bug_report_source,
|
|
36
|
+
repo_source=repo_source,
|
|
37
|
+
backend=backend,
|
|
38
|
+
model=model,
|
|
39
|
+
commit=commit,
|
|
40
|
+
api_key=api_key,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if result["success"] and result["files"]:
|
|
44
|
+
# Return the content of the first generated file (usually the reproduction script)
|
|
45
|
+
return result["files"][0]["content"]
|
|
46
|
+
else:
|
|
47
|
+
error_msg = result.get("error", "Unknown error during reproduction")
|
|
48
|
+
raise RuntimeError(f"Reproduction failed: {error_msg}")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
__all__ = ["reproduce", "RepGenService"]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|