repgen-ai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. repgen_ai-0.1.0/MANIFEST.in +3 -0
  2. repgen_ai-0.1.0/PKG-INFO +199 -0
  3. repgen_ai-0.1.0/README.md +160 -0
  4. repgen_ai-0.1.0/pyproject.toml +45 -0
  5. repgen_ai-0.1.0/repgen/__init__.py +51 -0
  6. repgen_ai-0.1.0/repgen/__pycache__/__init__.cpython-313.pyc +0 -0
  7. repgen_ai-0.1.0/repgen/__pycache__/cli.cpython-313.pyc +0 -0
  8. repgen_ai-0.1.0/repgen/__pycache__/core.cpython-313.pyc +0 -0
  9. repgen_ai-0.1.0/repgen/__pycache__/server.cpython-313.pyc +0 -0
  10. repgen_ai-0.1.0/repgen/__pycache__/utils.cpython-313.pyc +0 -0
  11. repgen_ai-0.1.0/repgen/cli.py +375 -0
  12. repgen_ai-0.1.0/repgen/core.py +239 -0
  13. repgen_ai-0.1.0/repgen/retrieval/__init__.py +4 -0
  14. repgen_ai-0.1.0/repgen/retrieval/__pycache__/__init__.cpython-313.pyc +0 -0
  15. repgen_ai-0.1.0/repgen/retrieval/__pycache__/config.cpython-313.pyc +0 -0
  16. repgen_ai-0.1.0/repgen/retrieval/__pycache__/pipeline.cpython-313.pyc +0 -0
  17. repgen_ai-0.1.0/repgen/retrieval/config.py +53 -0
  18. repgen_ai-0.1.0/repgen/retrieval/core/__init__.py +0 -0
  19. repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/__init__.cpython-313.pyc +0 -0
  20. repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/code_indexer.cpython-313.pyc +0 -0
  21. repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/dependency_analyzer.cpython-313.pyc +0 -0
  22. repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/module_analyzer.cpython-313.pyc +0 -0
  23. repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/training_code_detector.cpython-313.pyc +0 -0
  24. repgen_ai-0.1.0/repgen/retrieval/core/__pycache__/utils.cpython-313.pyc +0 -0
  25. repgen_ai-0.1.0/repgen/retrieval/core/code_indexer.py +138 -0
  26. repgen_ai-0.1.0/repgen/retrieval/core/dependency_analyzer.py +121 -0
  27. repgen_ai-0.1.0/repgen/retrieval/core/module_analyzer.py +65 -0
  28. repgen_ai-0.1.0/repgen/retrieval/core/training_code_detector.py +240 -0
  29. repgen_ai-0.1.0/repgen/retrieval/core/utils.py +52 -0
  30. repgen_ai-0.1.0/repgen/retrieval/models/__init__.py +0 -0
  31. repgen_ai-0.1.0/repgen/retrieval/models/__pycache__/__init__.cpython-313.pyc +0 -0
  32. repgen_ai-0.1.0/repgen/retrieval/models/__pycache__/hybrid_search.cpython-313.pyc +0 -0
  33. repgen_ai-0.1.0/repgen/retrieval/models/hybrid_search.py +151 -0
  34. repgen_ai-0.1.0/repgen/retrieval/pipeline.py +166 -0
  35. repgen_ai-0.1.0/repgen/server.py +111 -0
  36. repgen_ai-0.1.0/repgen/utils.py +550 -0
  37. repgen_ai-0.1.0/repgen_ai.egg-info/PKG-INFO +199 -0
  38. repgen_ai-0.1.0/repgen_ai.egg-info/SOURCES.txt +42 -0
  39. repgen_ai-0.1.0/repgen_ai.egg-info/dependency_links.txt +1 -0
  40. repgen_ai-0.1.0/repgen_ai.egg-info/requires.txt +21 -0
  41. repgen_ai-0.1.0/repgen_ai.egg-info/top_level.txt +1 -0
  42. repgen_ai-0.1.0/requirements.txt +21 -0
  43. repgen_ai-0.1.0/setup.cfg +4 -0
  44. repgen_ai-0.1.0/setup.py +35 -0
@@ -0,0 +1,3 @@
1
+ include requirements.txt
2
+ include README.md
3
+ recursive-include repgen *
@@ -0,0 +1,199 @@
1
+ Metadata-Version: 2.4
2
+ Name: repgen-ai
3
+ Version: 0.1.0
4
+ Summary: Automated reproduction generation for bug reports using LLMs
5
+ Home-page: https://github.com/mehilshah/RepGen
6
+ Author: Mehil B. Shah
7
+ Author-email: Mehil Shah <shahmehil@dal.ca>
8
+ Project-URL: Homepage, https://github.com/mehilshah/RepGen
9
+ Project-URL: Bug Tracker, https://github.com/mehilshah/RepGen/issues
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: annoy
16
+ Requires-Dist: numpy
17
+ Requires-Dist: pylint
18
+ Requires-Dist: rank_bm25
19
+ Requires-Dist: requests
20
+ Requires-Dist: scikit_learn
21
+ Requires-Dist: sentence_transformers
22
+ Requires-Dist: torch
23
+ Requires-Dist: transformers
24
+ Requires-Dist: pandas
25
+ Requires-Dist: openai
26
+ Requires-Dist: rich
27
+ Requires-Dist: mkdocs
28
+ Requires-Dist: mkdocs-material
29
+ Requires-Dist: watchfiles
30
+ Requires-Dist: black
31
+ Requires-Dist: isort
32
+ Requires-Dist: mypy
33
+ Requires-Dist: flake8
34
+ Requires-Dist: pre-commit
35
+ Requires-Dist: rich-argparse
36
+ Dynamic: author
37
+ Dynamic: home-page
38
+ Dynamic: requires-python
39
+
40
+ # RepGen - Automated Bug Reproduction
41
+
42
+ <div align="center">
43
+
44
+ <p align="center">
45
+ <img src="https://img.shields.io/badge/Status-Active%20Research-4CAF50?style=flat-square" />
46
+ <img src="https://img.shields.io/badge/Python-3.12-3776AB?style=flat-square&logo=python&logoColor=white" />
47
+ <img src="https://img.shields.io/github/actions/workflow/status/mehilshah/RepGen/ci.yml?branch=main&label=Build&style=flat-square&logo=github" />
48
+ <img src="https://img.shields.io/badge/Execution-Dockerized-2496ED?style=flat-square&logo=docker&logoColor=white" />
49
+ <img src="https://img.shields.io/badge/License-MIT-lightgrey?style=flat-square" />
50
+ </p>
51
+
52
+ [Features](#features) • [Quick Start](#quick-start) • [VS Code](#vs-code-extension) • [Documentation](#documentation) • [Paper](https://arxiv.org/abs/2512.14990)
53
+
54
+ </div>
55
+
56
+
57
+ ## Overview
58
+
59
+ **RepGen** is a production-grade tool that leverages state-of-the-art Large Language Models (LLMs) to automatically reproduce bugs in software libraries. By analyzing bug reports and repository context, RepGen plans a reproduction strategy and generates executable Python scripts to replicate the issue.
60
+
61
+ Ideally suited for Deep Learning libraries, RepGen automates the tedious first step of debugging: creating a Minimum Reproducible Example (MRE).
62
+
63
+ ## Features
64
+
65
+ - **Smart Retrieval**: Uses hybrid search (BM25 + Semantic) to find relevant code snippets and training loops for context.
66
+ - **Multi-Backend Support**: Seamlessly switch between LLM providers:
67
+ - **Ollama**: Run locally with models like `qwen2.5-coder`, `llama3`, or `mistral`.
68
+ - **OpenAI**: Utilize `gpt-4o` and `gpt-3.5-turbo` for high-precision generation.
69
+ - **Gemini** & **Claude**: leverage multimodal and reasoning capabilities.
70
+ - **Remote Input Handling**:
71
+ - Direct support for **GitHub Issue URLs** (fetches content via API).
72
+ - Direct support for **Git Repository URLs** (clones automatically to temp workspace).
73
+ - **Professional VS Code Extension**: A fully integrated sidebar to run reproductions directly in your editor.
74
+ - **Docker Ready**: Full containerization for easy deployment.
75
+
76
+ ---
77
+
78
+ ## Quick Start
79
+
80
+ The easiest way to run RepGen is using **Docker**. This ensures a consistent environment with all dependencies pre-installed.
81
+
82
+ ```bash
83
+ # 1. Build the image
84
+ cd RepGen
85
+ docker build -t repgen .
86
+
87
+ # 2. Run the server (mounting the volume for development if needed)
88
+ docker run -p 8000:8000 repgen
89
+ ```
90
+
91
+ The API will be available at `http://localhost:8000`.
92
+
93
+ ---
94
+
95
+ ## Developer Setup
96
+
97
+ If you prefer to run locally or contribute to the core logic:
98
+
99
+ ```bash
100
+ # 1. Create a virtual environment
101
+ python3 -m venv venv
102
+ source venv/bin/activate # On Windows: .\venv\Scripts\activate
103
+
104
+ # 2. Install RepGen in editable mode
105
+ pip install -e .
106
+
107
+ # 3. Run the CLI
108
+ repgen
109
+ ```
110
+
111
+ ---
112
+
113
+ ## VS Code Extension
114
+
115
+ RepGen includes a premium VS Code extension for a seamless workflow.
116
+
117
+ ### Features
118
+ - **Persistent Sidebar**: Stays active while you work.
119
+ - **Direct Code Display**: Generated reproduction scripts appear directly in the sidebar.
120
+ - **"Open in Editor"**: One-click to open generated code in a new editor tab for review.
121
+ - **Status Tracking**: Real-time progress indicators.
122
+
123
+ ### Installation
124
+ 1. Navigate to `extensions/vscode`.
125
+ 2. Run `npm install` && `npm run compile`.
126
+ 3. Open the folder in VS Code and press `F5` to launch the Extension Development Host.
127
+
128
+ ---
129
+
130
+ ## Ecosystem
131
+
132
+ ### REST API
133
+ The core engine runs as a FastAPI service.
134
+ - **Start**: `uvicorn repgen.server:app --reload --reload-dir repgen`
135
+ - **Docs**: `http://localhost:8000/docs`
136
+
137
+ ### Web Dashboard
138
+ A modern, React-based UI to manage reproduction tasks visually.
139
+ - **Location**: `ui/`
140
+ - **Start**: `cd ui && npm install && npm run dev`
141
+
142
+ ### Browser Extension
143
+ Injects a "Reproduce" button directly into GitHub Issues.
144
+ - **Location**: `extensions/chrome/`
145
+
146
+ ---
147
+
148
+ ## Usage Examples
149
+
150
+ ### CLI Automation
151
+ ```bash
152
+ repgen \
153
+ --bug-report https://github.com/owner/repo/issues/123 \
154
+ --repo-path https://github.com/owner/repo.git \
155
+ --backend openai \
156
+ --model gpt-4o
157
+ ```
158
+
159
+ ### Configuration
160
+ Set API keys via environment variables:
161
+ ```bash
162
+ export OPENAI_API_KEY="sk-..."
163
+ export GEMINI_API_KEY="AIza..."
164
+ ```
165
+
166
+ ---
167
+
168
+ ## Contributing
169
+
170
+ Contributions are welcome! Please check out the issues tab or submit a PR.
171
+
172
+ ## Issues
173
+
174
+ If you encounter any issues, please open a GitHub issue or contact Mehil Shah at [shahmehil@dal.ca](mailto:shahmehil@dal.ca).
175
+
176
+ ## License
177
+
178
+ MIT © [Mehil Shah](https://github.com/mehilshah)
179
+
180
+
181
+ ## Future Work and Research Directions
182
+
183
+ 1. **Scalable Execution via Cluster Schedulers**
184
+ Integrate RepGen with workload managers such as **SLURM** to offload reproduction tasks to HPC or GPU clusters. This would enable asynchronous execution, queue-based scheduling, and automated user notifications once reproduction artifacts are ready.
185
+
186
+ 2. **Automated Verification in Isolated Sandboxes**
187
+ Extend RepGen with sandboxed execution environments that automatically validate whether a generated reproduction script successfully triggers the reported bug. This would close the loop between generation and confirmation.
188
+
189
+ 3. **Bug-Type–Aware Verification Strategies**
190
+ Develop specialized verification mechanisms tailored to different bug classes (e.g., crashes, numerical instability, performance regressions, nondeterministic failures). Each class may require distinct success criteria and instrumentation.
191
+
192
+ 4. **Fine-Grained Bug Localization and Understanding**
193
+ Move beyond reproduction toward **bug comprehension**, including identifying the most likely fault-inducing components, APIs, or configuration parameters involved in the failure.
194
+
195
+ 5. **Understanding Practitioner Adoption Barriers**
196
+ Read papers about why practitioners underutilize automated debugging tools, and insights from these papers can guide usability improvements and feature prioritization.
197
+
198
+ 6. **CI/CD and GitHub Actions Integration**
199
+ Integrate RepGen directly into **GitHub Actions** and other CI pipelines, enabling automated bug reproduction as part of issue triage, regression testing, or pull request validation workflows.
@@ -0,0 +1,160 @@
1
+ # RepGen - Automated Bug Reproduction
2
+
3
+ <div align="center">
4
+
5
+ <p align="center">
6
+ <img src="https://img.shields.io/badge/Status-Active%20Research-4CAF50?style=flat-square" />
7
+ <img src="https://img.shields.io/badge/Python-3.12-3776AB?style=flat-square&logo=python&logoColor=white" />
8
+ <img src="https://img.shields.io/github/actions/workflow/status/mehilshah/RepGen/ci.yml?branch=main&label=Build&style=flat-square&logo=github" />
9
+ <img src="https://img.shields.io/badge/Execution-Dockerized-2496ED?style=flat-square&logo=docker&logoColor=white" />
10
+ <img src="https://img.shields.io/badge/License-MIT-lightgrey?style=flat-square" />
11
+ </p>
12
+
13
+ [Features](#features) • [Quick Start](#quick-start) • [VS Code](#vs-code-extension) • [Documentation](#documentation) • [Paper](https://arxiv.org/abs/2512.14990)
14
+
15
+ </div>
16
+
17
+
18
+ ## Overview
19
+
20
+ **RepGen** is a production-grade tool that leverages state-of-the-art Large Language Models (LLMs) to automatically reproduce bugs in software libraries. By analyzing bug reports and repository context, RepGen plans a reproduction strategy and generates executable Python scripts to replicate the issue.
21
+
22
+ Ideally suited for Deep Learning libraries, RepGen automates the tedious first step of debugging: creating a Minimum Reproducible Example (MRE).
23
+
24
+ ## Features
25
+
26
+ - **Smart Retrieval**: Uses hybrid search (BM25 + Semantic) to find relevant code snippets and training loops for context.
27
+ - **Multi-Backend Support**: Seamlessly switch between LLM providers:
28
+ - **Ollama**: Run locally with models like `qwen2.5-coder`, `llama3`, or `mistral`.
29
+ - **OpenAI**: Utilize `gpt-4o` and `gpt-3.5-turbo` for high-precision generation.
30
+ - **Gemini** & **Claude**: leverage multimodal and reasoning capabilities.
31
+ - **Remote Input Handling**:
32
+ - Direct support for **GitHub Issue URLs** (fetches content via API).
33
+ - Direct support for **Git Repository URLs** (clones automatically to temp workspace).
34
+ - **Professional VS Code Extension**: A fully integrated sidebar to run reproductions directly in your editor.
35
+ - **Docker Ready**: Full containerization for easy deployment.
36
+
37
+ ---
38
+
39
+ ## Quick Start
40
+
41
+ The easiest way to run RepGen is using **Docker**. This ensures a consistent environment with all dependencies pre-installed.
42
+
43
+ ```bash
44
+ # 1. Build the image
45
+ cd RepGen
46
+ docker build -t repgen .
47
+
48
+ # 2. Run the server (mounting the volume for development if needed)
49
+ docker run -p 8000:8000 repgen
50
+ ```
51
+
52
+ The API will be available at `http://localhost:8000`.
53
+
54
+ ---
55
+
56
+ ## Developer Setup
57
+
58
+ If you prefer to run locally or contribute to the core logic:
59
+
60
+ ```bash
61
+ # 1. Create a virtual environment
62
+ python3 -m venv venv
63
+ source venv/bin/activate # On Windows: .\venv\Scripts\activate
64
+
65
+ # 2. Install RepGen in editable mode
66
+ pip install -e .
67
+
68
+ # 3. Run the CLI
69
+ repgen
70
+ ```
71
+
72
+ ---
73
+
74
+ ## VS Code Extension
75
+
76
+ RepGen includes a premium VS Code extension for a seamless workflow.
77
+
78
+ ### Features
79
+ - **Persistent Sidebar**: Stays active while you work.
80
+ - **Direct Code Display**: Generated reproduction scripts appear directly in the sidebar.
81
+ - **"Open in Editor"**: One-click to open generated code in a new editor tab for review.
82
+ - **Status Tracking**: Real-time progress indicators.
83
+
84
+ ### Installation
85
+ 1. Navigate to `extensions/vscode`.
86
+ 2. Run `npm install` && `npm run compile`.
87
+ 3. Open the folder in VS Code and press `F5` to launch the Extension Development Host.
88
+
89
+ ---
90
+
91
+ ## Ecosystem
92
+
93
+ ### REST API
94
+ The core engine runs as a FastAPI service.
95
+ - **Start**: `uvicorn repgen.server:app --reload --reload-dir repgen`
96
+ - **Docs**: `http://localhost:8000/docs`
97
+
98
+ ### Web Dashboard
99
+ A modern, React-based UI to manage reproduction tasks visually.
100
+ - **Location**: `ui/`
101
+ - **Start**: `cd ui && npm install && npm run dev`
102
+
103
+ ### Browser Extension
104
+ Injects a "Reproduce" button directly into GitHub Issues.
105
+ - **Location**: `extensions/chrome/`
106
+
107
+ ---
108
+
109
+ ## Usage Examples
110
+
111
+ ### CLI Automation
112
+ ```bash
113
+ repgen \
114
+ --bug-report https://github.com/owner/repo/issues/123 \
115
+ --repo-path https://github.com/owner/repo.git \
116
+ --backend openai \
117
+ --model gpt-4o
118
+ ```
119
+
120
+ ### Configuration
121
+ Set API keys via environment variables:
122
+ ```bash
123
+ export OPENAI_API_KEY="sk-..."
124
+ export GEMINI_API_KEY="AIza..."
125
+ ```
126
+
127
+ ---
128
+
129
+ ## Contributing
130
+
131
+ Contributions are welcome! Please check out the issues tab or submit a PR.
132
+
133
+ ## Issues
134
+
135
+ If you encounter any issues, please open a GitHub issue or contact Mehil Shah at [shahmehil@dal.ca](mailto:shahmehil@dal.ca).
136
+
137
+ ## License
138
+
139
+ MIT © [Mehil Shah](https://github.com/mehilshah)
140
+
141
+
142
+ ## Future Work and Research Directions
143
+
144
+ 1. **Scalable Execution via Cluster Schedulers**
145
+ Integrate RepGen with workload managers such as **SLURM** to offload reproduction tasks to HPC or GPU clusters. This would enable asynchronous execution, queue-based scheduling, and automated user notifications once reproduction artifacts are ready.
146
+
147
+ 2. **Automated Verification in Isolated Sandboxes**
148
+ Extend RepGen with sandboxed execution environments that automatically validate whether a generated reproduction script successfully triggers the reported bug. This would close the loop between generation and confirmation.
149
+
150
+ 3. **Bug-Type–Aware Verification Strategies**
151
+ Develop specialized verification mechanisms tailored to different bug classes (e.g., crashes, numerical instability, performance regressions, nondeterministic failures). Each class may require distinct success criteria and instrumentation.
152
+
153
+ 4. **Fine-Grained Bug Localization and Understanding**
154
+ Move beyond reproduction toward **bug comprehension**, including identifying the most likely fault-inducing components, APIs, or configuration parameters involved in the failure.
155
+
156
+ 5. **Understanding Practitioner Adoption Barriers**
157
+ Read papers about why practitioners underutilize automated debugging tools, and insights from these papers can guide usability improvements and feature prioritization.
158
+
159
+ 6. **CI/CD and GitHub Actions Integration**
160
+ Integrate RepGen directly into **GitHub Actions** and other CI pipelines, enabling automated bug reproduction as part of issue triage, regression testing, or pull request validation workflows.
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "repgen-ai"
7
+ version = "0.1.0"
8
+ description = "Automated reproduction generation for bug reports using LLMs"
9
+ readme = "README.md"
10
+ authors = [
11
+ { name = "Mehil Shah", email = "shahmehil@dal.ca" },
12
+ ]
13
+ classifiers = [
14
+ "Programming Language :: Python :: 3",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Operating System :: OS Independent",
17
+ ]
18
+ requires-python = ">=3.8"
19
+ dependencies = [
20
+ "annoy",
21
+ "numpy",
22
+ "pylint",
23
+ "rank_bm25",
24
+ "requests",
25
+ "scikit_learn",
26
+ "sentence_transformers",
27
+ "torch",
28
+ "transformers",
29
+ "pandas",
30
+ "openai",
31
+ "rich",
32
+ "mkdocs",
33
+ "mkdocs-material",
34
+ "watchfiles",
35
+ "black",
36
+ "isort",
37
+ "mypy",
38
+ "flake8",
39
+ "pre-commit",
40
+ "rich-argparse",
41
+ ]
42
+
43
+ [project.urls]
44
+ "Homepage" = "https://github.com/mehilshah/RepGen"
45
+ "Bug Tracker" = "https://github.com/mehilshah/RepGen/issues"
@@ -0,0 +1,51 @@
1
+ from typing import Optional
2
+
3
+ from .core import RepGenService
4
+
5
+
6
+ def reproduce(
7
+ bug_report_source: str,
8
+ repo_source: str,
9
+ backend: str = "openai",
10
+ model: str = "gpt-4o",
11
+ api_key: Optional[str] = None,
12
+ commit: Optional[str] = None,
13
+ output_dir: str = "./repgen_results",
14
+ ) -> str:
15
+ """
16
+ Generates a reproduction script for a given bug report and repository.
17
+
18
+ Args:
19
+ bug_report_source: URL or path to the bug report.
20
+ repo_source: URL or path to the repository.
21
+ backend: The LLM backend to use (default: "openai").
22
+ model: The model name to use (default: "gpt-4o").
23
+ api_key: API key for the backend (optional).
24
+ commit: Specific commit hash to checkout (optional).
25
+ output_dir: Directory to store intermediate artifacts (default: "./repgen_results").
26
+
27
+ Returns:
28
+ The generated reproduction script as a string.
29
+
30
+ Raises:
31
+ RuntimeError: If reproduction fails.
32
+ """
33
+ service = RepGenService(output_dir=output_dir)
34
+ result = service.run_reproduction(
35
+ bug_report_source=bug_report_source,
36
+ repo_source=repo_source,
37
+ backend=backend,
38
+ model=model,
39
+ commit=commit,
40
+ api_key=api_key,
41
+ )
42
+
43
+ if result["success"] and result["files"]:
44
+ # Return the content of the first generated file (usually the reproduction script)
45
+ return result["files"][0]["content"]
46
+ else:
47
+ error_msg = result.get("error", "Unknown error during reproduction")
48
+ raise RuntimeError(f"Reproduction failed: {error_msg}")
49
+
50
+
51
+ __all__ = ["reproduce", "RepGenService"]