repgen-ai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repgen/__init__.py +51 -0
- repgen/__pycache__/__init__.cpython-313.pyc +0 -0
- repgen/__pycache__/cli.cpython-313.pyc +0 -0
- repgen/__pycache__/core.cpython-313.pyc +0 -0
- repgen/__pycache__/server.cpython-313.pyc +0 -0
- repgen/__pycache__/utils.cpython-313.pyc +0 -0
- repgen/cli.py +375 -0
- repgen/core.py +239 -0
- repgen/retrieval/__init__.py +4 -0
- repgen/retrieval/__pycache__/__init__.cpython-313.pyc +0 -0
- repgen/retrieval/__pycache__/config.cpython-313.pyc +0 -0
- repgen/retrieval/__pycache__/pipeline.cpython-313.pyc +0 -0
- repgen/retrieval/config.py +53 -0
- repgen/retrieval/core/__init__.py +0 -0
- repgen/retrieval/core/__pycache__/__init__.cpython-313.pyc +0 -0
- repgen/retrieval/core/__pycache__/code_indexer.cpython-313.pyc +0 -0
- repgen/retrieval/core/__pycache__/dependency_analyzer.cpython-313.pyc +0 -0
- repgen/retrieval/core/__pycache__/module_analyzer.cpython-313.pyc +0 -0
- repgen/retrieval/core/__pycache__/training_code_detector.cpython-313.pyc +0 -0
- repgen/retrieval/core/__pycache__/utils.cpython-313.pyc +0 -0
- repgen/retrieval/core/code_indexer.py +138 -0
- repgen/retrieval/core/dependency_analyzer.py +121 -0
- repgen/retrieval/core/module_analyzer.py +65 -0
- repgen/retrieval/core/training_code_detector.py +240 -0
- repgen/retrieval/core/utils.py +52 -0
- repgen/retrieval/models/__init__.py +0 -0
- repgen/retrieval/models/__pycache__/__init__.cpython-313.pyc +0 -0
- repgen/retrieval/models/__pycache__/hybrid_search.cpython-313.pyc +0 -0
- repgen/retrieval/models/hybrid_search.py +151 -0
- repgen/retrieval/pipeline.py +166 -0
- repgen/server.py +111 -0
- repgen/utils.py +550 -0
- repgen_ai-0.1.0.dist-info/METADATA +199 -0
- repgen_ai-0.1.0.dist-info/RECORD +36 -0
- repgen_ai-0.1.0.dist-info/WHEEL +5 -0
- repgen_ai-0.1.0.dist-info/top_level.txt +1 -0
repgen/__init__.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from .core import RepGenService
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def reproduce(
|
|
7
|
+
bug_report_source: str,
|
|
8
|
+
repo_source: str,
|
|
9
|
+
backend: str = "openai",
|
|
10
|
+
model: str = "gpt-4o",
|
|
11
|
+
api_key: Optional[str] = None,
|
|
12
|
+
commit: Optional[str] = None,
|
|
13
|
+
output_dir: str = "./repgen_results",
|
|
14
|
+
) -> str:
|
|
15
|
+
"""
|
|
16
|
+
Generates a reproduction script for a given bug report and repository.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
bug_report_source: URL or path to the bug report.
|
|
20
|
+
repo_source: URL or path to the repository.
|
|
21
|
+
backend: The LLM backend to use (default: "openai").
|
|
22
|
+
model: The model name to use (default: "gpt-4o").
|
|
23
|
+
api_key: API key for the backend (optional).
|
|
24
|
+
commit: Specific commit hash to checkout (optional).
|
|
25
|
+
output_dir: Directory to store intermediate artifacts (default: "./repgen_results").
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
The generated reproduction script as a string.
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
RuntimeError: If reproduction fails.
|
|
32
|
+
"""
|
|
33
|
+
service = RepGenService(output_dir=output_dir)
|
|
34
|
+
result = service.run_reproduction(
|
|
35
|
+
bug_report_source=bug_report_source,
|
|
36
|
+
repo_source=repo_source,
|
|
37
|
+
backend=backend,
|
|
38
|
+
model=model,
|
|
39
|
+
commit=commit,
|
|
40
|
+
api_key=api_key,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if result["success"] and result["files"]:
|
|
44
|
+
# Return the content of the first generated file (usually the reproduction script)
|
|
45
|
+
return result["files"][0]["content"]
|
|
46
|
+
else:
|
|
47
|
+
error_msg = result.get("error", "Unknown error during reproduction")
|
|
48
|
+
raise RuntimeError(f"Reproduction failed: {error_msg}")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
__all__ = ["reproduce", "RepGenService"]
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
repgen/cli.py
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
|
|
7
|
+
# Rich Imports
|
|
8
|
+
from rich.panel import Panel
|
|
9
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
|
|
10
|
+
from rich.prompt import Confirm, IntPrompt, Prompt
|
|
11
|
+
from rich.table import Table
|
|
12
|
+
from rich_argparse import RichHelpFormatter
|
|
13
|
+
|
|
14
|
+
from .core import RepGenService
|
|
15
|
+
from .utils import console
|
|
16
|
+
|
|
17
|
+
# Import from utils
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ==========================================
|
|
21
|
+
# INTERACTIVE UI HELPERS
|
|
22
|
+
# ==========================================
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def check_ollama_status():
|
|
26
|
+
try:
|
|
27
|
+
subprocess.run(["ollama", "--version"], capture_output=True, check=True)
|
|
28
|
+
return True
|
|
29
|
+
except Exception:
|
|
30
|
+
return False
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def ensure_ollama_model(model: str):
|
|
34
|
+
try:
|
|
35
|
+
result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
|
|
36
|
+
if model not in result.stdout:
|
|
37
|
+
console.print(f"[yellow]Model {model} not found. Pulling...[/yellow]")
|
|
38
|
+
subprocess.run(["ollama", "pull", model], check=True)
|
|
39
|
+
console.print(f"[green]Successfully pulled {model}[/green]")
|
|
40
|
+
except Exception as e:
|
|
41
|
+
console.print(f"[red]Error checking/pulling model: {e}[/red]")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_interactive_config():
|
|
45
|
+
"""Interactive prompt to gather missing configurations via Rich."""
|
|
46
|
+
console.print(
|
|
47
|
+
Panel.fit(
|
|
48
|
+
"[bold cyan]RepGen Interactive Setup[/bold cyan]", border_style="cyan"
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# 1. Backend Selection
|
|
53
|
+
backends = ["ollama", "openai", "gemini", "claude"]
|
|
54
|
+
console.print("\n[bold]Select LLM Backend:[/bold]")
|
|
55
|
+
for i, b in enumerate(backends, 1):
|
|
56
|
+
console.print(f" {i}) [green]{b.capitalize()}[/green]")
|
|
57
|
+
|
|
58
|
+
choice = IntPrompt.ask(
|
|
59
|
+
"Choice",
|
|
60
|
+
choices=[str(i) for i in range(1, len(backends) + 1)],
|
|
61
|
+
show_choices=False,
|
|
62
|
+
)
|
|
63
|
+
backend = backends[choice - 1]
|
|
64
|
+
|
|
65
|
+
# 2. Environment & Key Setup
|
|
66
|
+
model = ""
|
|
67
|
+
if backend == "ollama":
|
|
68
|
+
with console.status("[bold green]Checking Ollama status...[/bold green]"):
|
|
69
|
+
is_installed = check_ollama_status()
|
|
70
|
+
|
|
71
|
+
if not is_installed:
|
|
72
|
+
console.print(
|
|
73
|
+
"[bold red]Error: Ollama is not installed or not running.[/bold red]"
|
|
74
|
+
)
|
|
75
|
+
if Confirm.ask("Would you like me to try installing it via Homebrew?"):
|
|
76
|
+
try:
|
|
77
|
+
subprocess.run(["brew", "install", "ollama"], check=True)
|
|
78
|
+
console.print(
|
|
79
|
+
"[green]Ollama installed. Please start it logic and try again.[/green]"
|
|
80
|
+
)
|
|
81
|
+
except Exception:
|
|
82
|
+
console.print(
|
|
83
|
+
"[red]Installation failed. Please install manually.[/red]"
|
|
84
|
+
)
|
|
85
|
+
sys.exit(1)
|
|
86
|
+
else:
|
|
87
|
+
sys.exit(1)
|
|
88
|
+
|
|
89
|
+
model = Prompt.ask("Ollama Model", default="qwen2.5-coder:7b")
|
|
90
|
+
with console.status(
|
|
91
|
+
f"[bold green]Ensuring model {model} is available...[/bold green]"
|
|
92
|
+
):
|
|
93
|
+
ensure_ollama_model(model)
|
|
94
|
+
|
|
95
|
+
else:
|
|
96
|
+
env_keys = {
|
|
97
|
+
"openai": "OPENAI_API_KEY",
|
|
98
|
+
"gemini": "GEMINI_API_KEY",
|
|
99
|
+
"claude": "ANTHROPIC_API_KEY",
|
|
100
|
+
}
|
|
101
|
+
env_var = env_keys[backend]
|
|
102
|
+
|
|
103
|
+
if not os.getenv(env_var):
|
|
104
|
+
console.print(f"\n[yellow]{env_var} not found.[/yellow]")
|
|
105
|
+
key = Prompt.ask(
|
|
106
|
+
f"Enter your {backend.capitalize()} API Key", password=True
|
|
107
|
+
)
|
|
108
|
+
if key:
|
|
109
|
+
os.environ[env_var] = key
|
|
110
|
+
else:
|
|
111
|
+
console.print("[red]API Key required. Terminating.[/red]")
|
|
112
|
+
sys.exit(1)
|
|
113
|
+
|
|
114
|
+
# Model Selection Suggestions
|
|
115
|
+
suggestions = {
|
|
116
|
+
"openai": ["gpt-4o", "gpt-4-turbo", "gpt-3.5-turbo"],
|
|
117
|
+
"gemini": ["gemini-1.5-pro", "gemini-1.5-flash"],
|
|
118
|
+
"claude": ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229"],
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
model = Prompt.ask(
|
|
122
|
+
f"Select {backend.capitalize()} Model",
|
|
123
|
+
choices=suggestions[backend],
|
|
124
|
+
default=suggestions[backend][0],
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return backend, model
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# ==========================================
|
|
131
|
+
# MAIN EXECUTION
|
|
132
|
+
# ==========================================
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def main():
|
|
136
|
+
# Silence internal loggers to keep the UI clean (Agent-style)
|
|
137
|
+
import logging
|
|
138
|
+
|
|
139
|
+
logging.getLogger("repgen").setLevel(logging.ERROR)
|
|
140
|
+
|
|
141
|
+
# Styled Banner
|
|
142
|
+
console.print(
|
|
143
|
+
Panel(
|
|
144
|
+
"[bold white]RepGen CLI: Automated Deep Learning Bug Reproduction[/bold white]\n"
|
|
145
|
+
"[italic grey70]Generate reproduction scripts from bug reports automatically.[/italic grey70]",
|
|
146
|
+
border_style="bold blue",
|
|
147
|
+
title="[bold cyan]RepGen[/bold cyan]",
|
|
148
|
+
subtitle="[dim]v1.0.0[/dim]",
|
|
149
|
+
expand=False,
|
|
150
|
+
padding=(1, 2),
|
|
151
|
+
)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
parser = argparse.ArgumentParser(
|
|
155
|
+
description="RepGen CLI: Automated Bug Reproduction",
|
|
156
|
+
formatter_class=RichHelpFormatter,
|
|
157
|
+
)
|
|
158
|
+
parser.add_argument("--bug-report", help="Path or URL to the bug report")
|
|
159
|
+
parser.add_argument("--repo-path", help="Path or URL to the repository")
|
|
160
|
+
parser.add_argument("--commit", help="Specific git commit ID to checkout")
|
|
161
|
+
parser.add_argument(
|
|
162
|
+
"--output-dir", default="./repgen_results", help="Directory for results"
|
|
163
|
+
)
|
|
164
|
+
parser.add_argument(
|
|
165
|
+
"--backend",
|
|
166
|
+
choices=["ollama", "openai", "gemini", "claude"],
|
|
167
|
+
help="LLM backend",
|
|
168
|
+
)
|
|
169
|
+
parser.add_argument("--model", help="Model name")
|
|
170
|
+
parser.add_argument(
|
|
171
|
+
"--max-attempts", type=int, default=3, help="Max attempts per context"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
args = parser.parse_args()
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
# Interactive setup if arguments are missing
|
|
178
|
+
if not args.bug_report:
|
|
179
|
+
args.bug_report = Prompt.ask(
|
|
180
|
+
"[bold cyan]Enter URI to Bug Report file[/bold cyan] (Path or URL)"
|
|
181
|
+
)
|
|
182
|
+
if not args.repo_path:
|
|
183
|
+
args.repo_path = Prompt.ask(
|
|
184
|
+
"[bold cyan]Enter URI to Repository[/bold cyan] (Path or URL)"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
if not args.backend or not args.model:
|
|
188
|
+
args.backend, args.model = get_interactive_config()
|
|
189
|
+
|
|
190
|
+
service = RepGenService(output_dir=args.output_dir)
|
|
191
|
+
|
|
192
|
+
console.print("\n[bold]Starting Reproduction Pipeline...[/bold]")
|
|
193
|
+
start_time = time.time()
|
|
194
|
+
|
|
195
|
+
# Define UI Helper for Agent Panels
|
|
196
|
+
def print_agent_panel(
|
|
197
|
+
content: str, agent_name: str, color: str, title_desc: str = ""
|
|
198
|
+
):
|
|
199
|
+
from rich.markdown import Markdown
|
|
200
|
+
from rich.panel import Panel
|
|
201
|
+
|
|
202
|
+
width = min(120, console.size.width - 4)
|
|
203
|
+
|
|
204
|
+
md = Markdown(content)
|
|
205
|
+
title = f"{agent_name} ({title_desc})" if title_desc else agent_name
|
|
206
|
+
|
|
207
|
+
panel = Panel(
|
|
208
|
+
md,
|
|
209
|
+
title=f"[bold {color}]{title}[/bold {color}]",
|
|
210
|
+
title_align="left",
|
|
211
|
+
border_style=color,
|
|
212
|
+
width=width,
|
|
213
|
+
padding=(0, 1),
|
|
214
|
+
)
|
|
215
|
+
console.print(panel)
|
|
216
|
+
|
|
217
|
+
with Progress(
|
|
218
|
+
SpinnerColumn("dots", style="cyan"),
|
|
219
|
+
TextColumn("[bold blue]{task.description}"),
|
|
220
|
+
TimeElapsedColumn(),
|
|
221
|
+
console=console,
|
|
222
|
+
transient=False,
|
|
223
|
+
) as progress:
|
|
224
|
+
task = progress.add_task("Initializing...", total=None)
|
|
225
|
+
|
|
226
|
+
def progress_cb(stage, msg, data=None):
|
|
227
|
+
# Update description based on stage
|
|
228
|
+
stage_colors = {
|
|
229
|
+
"setup": "cyan",
|
|
230
|
+
"retrieval": "magenta",
|
|
231
|
+
"refinement": "yellow",
|
|
232
|
+
"generation": "green",
|
|
233
|
+
}
|
|
234
|
+
color = stage_colors.get(stage, "white")
|
|
235
|
+
progress.update(
|
|
236
|
+
task, description=f"[{color}][{stage.upper()}][/] {msg}"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Display intermediate artifacts
|
|
240
|
+
if data:
|
|
241
|
+
if data.get("type") == "refined_report":
|
|
242
|
+
print_agent_panel(
|
|
243
|
+
data["content"],
|
|
244
|
+
"Refinement Agent",
|
|
245
|
+
"yellow",
|
|
246
|
+
"Bug Report processed",
|
|
247
|
+
)
|
|
248
|
+
elif data.get("type") == "plan":
|
|
249
|
+
import json
|
|
250
|
+
|
|
251
|
+
try:
|
|
252
|
+
plan_obj = json.loads(data["content"])
|
|
253
|
+
# Format list as a bulleted list for better readability
|
|
254
|
+
plan_text = ""
|
|
255
|
+
for step in plan_obj:
|
|
256
|
+
plan_text += f"- {step}\n"
|
|
257
|
+
print_agent_panel(
|
|
258
|
+
plan_text,
|
|
259
|
+
"Code Generation Agent",
|
|
260
|
+
"green",
|
|
261
|
+
f"Plan for Context {data.get('index')}",
|
|
262
|
+
)
|
|
263
|
+
except Exception:
|
|
264
|
+
print_agent_panel(
|
|
265
|
+
data["content"],
|
|
266
|
+
"Code Generation Agent",
|
|
267
|
+
"green",
|
|
268
|
+
f"Plan for Context {data.get('index')}",
|
|
269
|
+
)
|
|
270
|
+
elif data.get("type") == "context":
|
|
271
|
+
# Summarize context
|
|
272
|
+
summary = f"**Loaded Context {data.get('index')}**\n\n"
|
|
273
|
+
summary += f"Contains {len(data['content'].splitlines())} lines of code context."
|
|
274
|
+
print_agent_panel(
|
|
275
|
+
summary,
|
|
276
|
+
"Context Retrieval Agent",
|
|
277
|
+
"blue",
|
|
278
|
+
f"Context {data.get('index')}",
|
|
279
|
+
)
|
|
280
|
+
elif data.get("type") == "feedback":
|
|
281
|
+
print_agent_panel(
|
|
282
|
+
f"**Feedback received:** {data['content']}",
|
|
283
|
+
"Self-Correction",
|
|
284
|
+
"red",
|
|
285
|
+
"Error detected",
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
result = service.run_reproduction(
|
|
289
|
+
bug_report_source=args.bug_report,
|
|
290
|
+
repo_source=args.repo_path,
|
|
291
|
+
backend=args.backend,
|
|
292
|
+
model=args.model,
|
|
293
|
+
commit=args.commit,
|
|
294
|
+
max_attempts=args.max_attempts,
|
|
295
|
+
progress_callback=progress_cb,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
end_time = time.time()
|
|
299
|
+
duration = end_time - start_time
|
|
300
|
+
|
|
301
|
+
console.print(f"\n[bold]Total Duration:[/bold] {duration:.2f} seconds\n")
|
|
302
|
+
|
|
303
|
+
if result["success"]:
|
|
304
|
+
console.print(
|
|
305
|
+
Panel(
|
|
306
|
+
"[bold green]Reproduction Success![/bold green]",
|
|
307
|
+
title="System",
|
|
308
|
+
title_align="left",
|
|
309
|
+
border_style="green",
|
|
310
|
+
width=min(120, console.size.width - 4),
|
|
311
|
+
)
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
# Display Results Table
|
|
315
|
+
table = Table(
|
|
316
|
+
title="Generated Files",
|
|
317
|
+
show_header=True,
|
|
318
|
+
header_style="bold magenta",
|
|
319
|
+
width=min(120, console.size.width - 4),
|
|
320
|
+
)
|
|
321
|
+
table.add_column("File Name", style="cyan")
|
|
322
|
+
table.add_column("Path", style="dim")
|
|
323
|
+
table.add_column("Size", justify="right")
|
|
324
|
+
|
|
325
|
+
for file_info in result["files"]:
|
|
326
|
+
path = file_info["path"]
|
|
327
|
+
content = file_info["content"]
|
|
328
|
+
name = os.path.basename(path)
|
|
329
|
+
size = f"{len(content.encode('utf-8'))} bytes"
|
|
330
|
+
table.add_row(name, path, size)
|
|
331
|
+
|
|
332
|
+
console.print(table)
|
|
333
|
+
|
|
334
|
+
# Display File Contents
|
|
335
|
+
from rich.syntax import Syntax
|
|
336
|
+
|
|
337
|
+
for file_info in result["files"]:
|
|
338
|
+
syntax = Syntax(
|
|
339
|
+
file_info["content"], "python", theme="monokai", line_numbers=True
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
panel = Panel(
|
|
343
|
+
syntax,
|
|
344
|
+
title=f"[bold cyan]Reproducer Code ({os.path.basename(file_info['path'])})[/bold cyan]",
|
|
345
|
+
title_align="left",
|
|
346
|
+
border_style="cyan",
|
|
347
|
+
width=min(120, console.size.width - 4),
|
|
348
|
+
)
|
|
349
|
+
console.print(panel)
|
|
350
|
+
|
|
351
|
+
console.print("\n[bold green]Next Steps:[/bold green]")
|
|
352
|
+
for file_info in result["files"]:
|
|
353
|
+
console.print(f" Run: [code]python {file_info['path']}[/code]")
|
|
354
|
+
|
|
355
|
+
else:
|
|
356
|
+
console.print(
|
|
357
|
+
Panel(
|
|
358
|
+
f"Error: {result['error']}",
|
|
359
|
+
title="[bold red]Reproduction Failed[/bold red]",
|
|
360
|
+
title_align="left",
|
|
361
|
+
border_style="red",
|
|
362
|
+
width=min(120, console.size.width - 4),
|
|
363
|
+
)
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
except KeyboardInterrupt:
|
|
367
|
+
console.print("\n[bold red]⚠ Operation cancelled by user.[/bold red]")
|
|
368
|
+
sys.exit(130)
|
|
369
|
+
except Exception as e:
|
|
370
|
+
console.print(f"\n[bold red]Unexpected Error:[/bold red] {e}")
|
|
371
|
+
sys.exit(1)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
if __name__ == "__main__":
|
|
375
|
+
main()
|
repgen/core.py
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import time
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, Dict, Optional
|
|
6
|
+
|
|
7
|
+
from .retrieval.pipeline import RetrievalPipeline
|
|
8
|
+
from .utils import (
|
|
9
|
+
_build_prompt,
|
|
10
|
+
check_relevance,
|
|
11
|
+
check_structural_correctness,
|
|
12
|
+
checkout_commit,
|
|
13
|
+
clean_context,
|
|
14
|
+
create_prompt_plan,
|
|
15
|
+
create_prompt_refinement,
|
|
16
|
+
extract_json_content,
|
|
17
|
+
fetch_content,
|
|
18
|
+
prepare_repository,
|
|
19
|
+
query_llm,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger("repgen.core")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RepGenService:
|
|
26
|
+
def __init__(self, output_dir: str = "./repgen_results"):
|
|
27
|
+
self.output_dir = Path(output_dir)
|
|
28
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
def run_reproduction(
|
|
31
|
+
self,
|
|
32
|
+
bug_report_source: str,
|
|
33
|
+
repo_source: str,
|
|
34
|
+
backend: str,
|
|
35
|
+
model: str,
|
|
36
|
+
commit: Optional[str] = None,
|
|
37
|
+
max_attempts: int = 3,
|
|
38
|
+
api_key: Optional[str] = None,
|
|
39
|
+
progress_callback=None,
|
|
40
|
+
) -> Dict[str, Any]:
|
|
41
|
+
"""
|
|
42
|
+
Executes the full reproduction pipeline.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
bug_report_source: URL or path to bug report
|
|
46
|
+
repo_source: URL or path to repository
|
|
47
|
+
backend: LLM backend (ollama, openai, etc)
|
|
48
|
+
model: Model name
|
|
49
|
+
commit: Optional commit hash
|
|
50
|
+
max_attempts: Max retry attempts per context
|
|
51
|
+
api_key: Optional API key for backend
|
|
52
|
+
progress_callback: Optional function(stage, message)
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Dict containing results and paths
|
|
56
|
+
"""
|
|
57
|
+
result = {"success": False, "project_id": "", "files": [], "error": None}
|
|
58
|
+
|
|
59
|
+
def notify(stage, msg, data=None):
|
|
60
|
+
if progress_callback:
|
|
61
|
+
progress_callback(stage, msg, data)
|
|
62
|
+
logger.info(f"[{stage}] {msg}")
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
# 1. Prepare Inputs
|
|
66
|
+
notify("setup", "Fetching inputs...")
|
|
67
|
+
bug_report_content = fetch_content(bug_report_source)
|
|
68
|
+
local_repo_path = prepare_repository(repo_source)
|
|
69
|
+
|
|
70
|
+
if commit:
|
|
71
|
+
notify("setup", f"Checking out commit {commit}...")
|
|
72
|
+
if not checkout_commit(local_repo_path, commit):
|
|
73
|
+
raise RuntimeError("Failed to checkout commit")
|
|
74
|
+
|
|
75
|
+
# 2. Initialize Pipeline
|
|
76
|
+
# Create a unique project ID/folder
|
|
77
|
+
project_id = f"RepGen_Run_{int(time.time())}"
|
|
78
|
+
run_dir = self.output_dir / project_id
|
|
79
|
+
run_dir.mkdir(parents=True, exist_ok=True)
|
|
80
|
+
|
|
81
|
+
local_bug_report_path = run_dir / "input_bug_report.txt"
|
|
82
|
+
local_bug_report_path.write_text(bug_report_content)
|
|
83
|
+
|
|
84
|
+
notify("retrieval", "Initializing Retrieval Pipeline...")
|
|
85
|
+
pipeline = RetrievalPipeline(
|
|
86
|
+
repo_path=local_repo_path,
|
|
87
|
+
bug_report_path=str(local_bug_report_path),
|
|
88
|
+
output_dir=str(self.output_dir),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# 3. Retrieval
|
|
92
|
+
notify("retrieval", "Running Code Retrieval...")
|
|
93
|
+
ret_result = pipeline.run_pipeline()
|
|
94
|
+
if not ret_result:
|
|
95
|
+
raise RuntimeError("Retrieval failed to find relevant code")
|
|
96
|
+
|
|
97
|
+
# 4. Refinement
|
|
98
|
+
notify("refinement", "Refining Bug Report...")
|
|
99
|
+
refined_report = query_llm(
|
|
100
|
+
create_prompt_refinement(bug_report_content),
|
|
101
|
+
backend,
|
|
102
|
+
model,
|
|
103
|
+
api_key=api_key,
|
|
104
|
+
)
|
|
105
|
+
if not refined_report:
|
|
106
|
+
refined_report = bug_report_content
|
|
107
|
+
|
|
108
|
+
refined_path = (
|
|
109
|
+
pipeline.config.REFINED_BUG_REPORT_DIR_OUT / "refined_report.txt"
|
|
110
|
+
)
|
|
111
|
+
refined_path.write_text(refined_report)
|
|
112
|
+
notify(
|
|
113
|
+
"refinement",
|
|
114
|
+
"Bug Report Refined",
|
|
115
|
+
data={"type": "refined_report", "content": refined_report},
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# 5. Generation
|
|
119
|
+
context_files = sorted(list(pipeline.config.CONTEXT_DIR_OUT.glob("*.json")))
|
|
120
|
+
if not context_files:
|
|
121
|
+
raise RuntimeError("No context files generated")
|
|
122
|
+
|
|
123
|
+
notify(
|
|
124
|
+
"generation",
|
|
125
|
+
f"Starting generation for {len(context_files)} contexts...",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
successful_gen = False
|
|
129
|
+
generated_script_path = None
|
|
130
|
+
|
|
131
|
+
for idx, ctx_file in enumerate(context_files, 1):
|
|
132
|
+
ctx_num = ctx_file.stem.split("_")[-1]
|
|
133
|
+
notify("generation", f"Processing Context {idx}/{len(context_files)}")
|
|
134
|
+
|
|
135
|
+
with open(ctx_file, "r") as f:
|
|
136
|
+
ctx_content = f.read()
|
|
137
|
+
|
|
138
|
+
notify(
|
|
139
|
+
"generation",
|
|
140
|
+
f"Context {ctx_num} Loaded",
|
|
141
|
+
data={
|
|
142
|
+
"type": "context",
|
|
143
|
+
"content": clean_context(ctx_content),
|
|
144
|
+
"index": idx,
|
|
145
|
+
},
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Plan
|
|
149
|
+
plan_raw = query_llm(
|
|
150
|
+
create_prompt_plan(refined_report, json.loads(ctx_content)),
|
|
151
|
+
backend,
|
|
152
|
+
model,
|
|
153
|
+
api_key=api_key,
|
|
154
|
+
)
|
|
155
|
+
plan = extract_json_content(plan_raw) or "[]"
|
|
156
|
+
notify(
|
|
157
|
+
"generation",
|
|
158
|
+
f"Plan Generated for Context {ctx_num}",
|
|
159
|
+
data={"type": "plan", "content": plan, "index": idx},
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
prompt_code = _build_prompt(refined_report, ctx_content, plan)
|
|
163
|
+
|
|
164
|
+
# Attempts
|
|
165
|
+
for attempt in range(1, max_attempts + 1):
|
|
166
|
+
notify("generation", f"Context {ctx_num} - Attempt {attempt}")
|
|
167
|
+
|
|
168
|
+
raw_code = query_llm(prompt_code, backend, model, api_key=api_key)
|
|
169
|
+
code = extract_json_content(raw_code) or raw_code
|
|
170
|
+
|
|
171
|
+
is_valid, err = check_structural_correctness(code)
|
|
172
|
+
if err:
|
|
173
|
+
notify(
|
|
174
|
+
"generation",
|
|
175
|
+
f"Context {ctx_num} - Attempt {attempt} - Syntax Error",
|
|
176
|
+
data={"type": "feedback", "content": err, "code": code},
|
|
177
|
+
)
|
|
178
|
+
prompt_code = _build_prompt(
|
|
179
|
+
refined_report,
|
|
180
|
+
ctx_content,
|
|
181
|
+
plan,
|
|
182
|
+
feedback=f"Syntax Error: {err}",
|
|
183
|
+
)
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
if not check_relevance(
|
|
187
|
+
refined_report, code, backend, model, api_key=api_key
|
|
188
|
+
):
|
|
189
|
+
notify(
|
|
190
|
+
"generation",
|
|
191
|
+
f"Context {ctx_num} - Attempt {attempt} - Irrelevant",
|
|
192
|
+
data={
|
|
193
|
+
"type": "feedback",
|
|
194
|
+
"content": "Code deemed irrelevant",
|
|
195
|
+
"code": code,
|
|
196
|
+
},
|
|
197
|
+
)
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
# Success
|
|
201
|
+
generated_script_path = (
|
|
202
|
+
pipeline.config.REPRODUCTION_DIR_OUT / f"reproduce_{ctx_num}.py"
|
|
203
|
+
)
|
|
204
|
+
generated_script_path.write_text(code)
|
|
205
|
+
|
|
206
|
+
successful_gen = True
|
|
207
|
+
result["success"] = True
|
|
208
|
+
|
|
209
|
+
result["files"].append(
|
|
210
|
+
{"path": str(generated_script_path), "content": code}
|
|
211
|
+
)
|
|
212
|
+
notify(
|
|
213
|
+
"generation",
|
|
214
|
+
"Reproduction Successful",
|
|
215
|
+
data={
|
|
216
|
+
"type": "code",
|
|
217
|
+
"content": code,
|
|
218
|
+
"path": str(generated_script_path),
|
|
219
|
+
},
|
|
220
|
+
)
|
|
221
|
+
break
|
|
222
|
+
|
|
223
|
+
if successful_gen:
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
if not successful_gen:
|
|
227
|
+
result["error"] = "Failed to generate valid script after all attempts"
|
|
228
|
+
|
|
229
|
+
result["project_id"] = pipeline.config.PROJECT_ID
|
|
230
|
+
return result
|
|
231
|
+
|
|
232
|
+
except RuntimeError as e:
|
|
233
|
+
logger.error(f"Pipeline flow error: {e}")
|
|
234
|
+
result["error"] = str(e)
|
|
235
|
+
return result
|
|
236
|
+
except Exception as e:
|
|
237
|
+
logger.exception("Unexpected reproduction failure")
|
|
238
|
+
result["error"] = f"Unexpected error: {str(e)}"
|
|
239
|
+
return result
|
|
Binary file
|
|
Binary file
|
|
Binary file
|