mini-swe-agent 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mini_swe_agent-1.1.0.dist-info/METADATA +288 -0
- mini_swe_agent-1.1.0.dist-info/RECORD +47 -0
- mini_swe_agent-1.1.0.dist-info/WHEEL +5 -0
- mini_swe_agent-1.1.0.dist-info/entry_points.txt +5 -0
- mini_swe_agent-1.1.0.dist-info/licenses/LICENSE.md +21 -0
- mini_swe_agent-1.1.0.dist-info/top_level.txt +1 -0
- minisweagent/__init__.py +67 -0
- minisweagent/__main__.py +7 -0
- minisweagent/agents/__init__.py +1 -0
- minisweagent/agents/default.py +129 -0
- minisweagent/agents/interactive.py +148 -0
- minisweagent/agents/interactive_textual.py +324 -0
- minisweagent/config/README.md +9 -0
- minisweagent/config/__init__.py +24 -0
- minisweagent/config/__pycache__/__init__.cpython-313.pyc +0 -0
- minisweagent/config/default.yaml +143 -0
- minisweagent/config/extra/__init__.py +1 -0
- minisweagent/config/extra/swebench.yaml +229 -0
- minisweagent/config/github_issue.yaml +146 -0
- minisweagent/config/local.yaml +154 -0
- minisweagent/config/local2.tcss +128 -0
- minisweagent/environments/__init__.py +1 -0
- minisweagent/environments/docker.py +98 -0
- minisweagent/environments/extra/__init__.py +0 -0
- minisweagent/environments/extra/swerex_docker.py +39 -0
- minisweagent/environments/local.py +33 -0
- minisweagent/environments/singularity.py +52 -0
- minisweagent/models/__init__.py +81 -0
- minisweagent/models/anthropic.py +19 -0
- minisweagent/models/litellm_model.py +64 -0
- minisweagent/models/test_models.py +38 -0
- minisweagent/models/utils/cache_control.py +42 -0
- minisweagent/models/utils/key_per_thread.py +18 -0
- minisweagent/py.typed +0 -0
- minisweagent/run/__init__.py +1 -0
- minisweagent/run/extra/__init__.py +0 -0
- minisweagent/run/extra/config.py +100 -0
- minisweagent/run/extra/swebench.py +235 -0
- minisweagent/run/extra/swebench_single.py +53 -0
- minisweagent/run/extra/utils/batch_progress.py +164 -0
- minisweagent/run/github_issue.py +80 -0
- minisweagent/run/hello_world.py +36 -0
- minisweagent/run/inspector.py +212 -0
- minisweagent/run/mini.py +118 -0
- minisweagent/run/mini_extra.py +44 -0
- minisweagent/run/utils/__init__.py +0 -0
- minisweagent/run/utils/save.py +35 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
"""Run mini-SWE-agent on SWEBench instances.
|
|
4
|
+
|
|
5
|
+
[not dim]
|
|
6
|
+
More information about the usage: [bold green]https://mini-swe-agent.com/latest/usage/swebench/[/bold green]
|
|
7
|
+
[/not dim]
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import concurrent.futures
|
|
11
|
+
import json
|
|
12
|
+
import random
|
|
13
|
+
import re
|
|
14
|
+
import threading
|
|
15
|
+
import time
|
|
16
|
+
import traceback
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
import typer
|
|
20
|
+
import yaml
|
|
21
|
+
from datasets import load_dataset
|
|
22
|
+
from rich.live import Live
|
|
23
|
+
|
|
24
|
+
from minisweagent.agents.default import DefaultAgent
|
|
25
|
+
from minisweagent.config import builtin_config_dir, get_config_path
|
|
26
|
+
from minisweagent.environments.docker import DockerEnvironment
|
|
27
|
+
from minisweagent.models import get_model
|
|
28
|
+
from minisweagent.run.extra.utils.batch_progress import RunBatchProgressManager
|
|
29
|
+
from minisweagent.run.utils.save import save_traj
|
|
30
|
+
|
|
31
|
+
app = typer.Typer(rich_markup_mode="rich", add_completion=False)
|
|
32
|
+
|
|
33
|
+
DATASET_MAPPING = {
|
|
34
|
+
"full": "princeton-nlp/SWE-Bench",
|
|
35
|
+
"verified": "princeton-nlp/SWE-Bench_Verified",
|
|
36
|
+
"lite": "princeton-nlp/SWE-Bench_Lite",
|
|
37
|
+
"multimodal": "princeton-nlp/SWE-Bench_Multimodal",
|
|
38
|
+
"multilingual": "swe-bench/SWE-Bench_Multilingual",
|
|
39
|
+
"smith": "SWE-bench/SWE-smith",
|
|
40
|
+
"_test": "klieret/swe-bench-dummy-test-dataset",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
_OUTPUT_FILE_LOCK = threading.Lock()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class ProgressTrackingAgent(DefaultAgent):
|
|
48
|
+
"""Simple wrapper around DefaultAgent that provides progress updates."""
|
|
49
|
+
|
|
50
|
+
def __init__(self, *args, progress_manager: RunBatchProgressManager, instance_id: str = "", **kwargs):
|
|
51
|
+
super().__init__(*args, **kwargs)
|
|
52
|
+
self.progress_manager: RunBatchProgressManager = progress_manager
|
|
53
|
+
self.instance_id = instance_id
|
|
54
|
+
|
|
55
|
+
def step(self) -> dict:
|
|
56
|
+
"""Override step to provide progress updates."""
|
|
57
|
+
self.progress_manager.update_instance_status(
|
|
58
|
+
self.instance_id, f"Step {self.model.n_calls + 1:3d} (${self.model.cost:.2f})"
|
|
59
|
+
)
|
|
60
|
+
return super().step()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_swebench_docker_image_name(instance: dict) -> str:
|
|
64
|
+
"""Get the image name for a SWEBench instance."""
|
|
65
|
+
image_name = instance.get("image_name", None)
|
|
66
|
+
if image_name is None:
|
|
67
|
+
# Docker doesn't allow double underscore, so we replace them with a magic token
|
|
68
|
+
iid = instance["instance_id"]
|
|
69
|
+
id_docker_compatible = iid.replace("__", "_1776_")
|
|
70
|
+
image_name = f"swebench/sweb.eval.x86_64.{id_docker_compatible}:latest".lower()
|
|
71
|
+
return image_name
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def update_preds_file(output_path: Path, instance_id: str, model_name: str, result: str):
|
|
75
|
+
"""Update the output JSON file with results from a single instance."""
|
|
76
|
+
with _OUTPUT_FILE_LOCK:
|
|
77
|
+
output_data = {}
|
|
78
|
+
if output_path.exists():
|
|
79
|
+
output_data = json.loads(output_path.read_text())
|
|
80
|
+
output_data[instance_id] = {
|
|
81
|
+
"model_name_or_path": model_name,
|
|
82
|
+
"instance_id": instance_id,
|
|
83
|
+
"model_patch": result,
|
|
84
|
+
}
|
|
85
|
+
output_path.write_text(json.dumps(output_data, indent=2))
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def remove_from_preds_file(output_path: Path, instance_id: str):
|
|
89
|
+
"""Remove an instance from the predictions file."""
|
|
90
|
+
if not output_path.exists():
|
|
91
|
+
return
|
|
92
|
+
with _OUTPUT_FILE_LOCK:
|
|
93
|
+
output_data = json.loads(output_path.read_text())
|
|
94
|
+
if instance_id in output_data:
|
|
95
|
+
del output_data[instance_id]
|
|
96
|
+
output_path.write_text(json.dumps(output_data, indent=2))
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def process_instance(
|
|
100
|
+
instance: dict,
|
|
101
|
+
output_dir: Path,
|
|
102
|
+
model_name: str | None,
|
|
103
|
+
config_path: str | Path,
|
|
104
|
+
progress_manager: RunBatchProgressManager,
|
|
105
|
+
) -> None:
|
|
106
|
+
"""Process a single SWEBench instance."""
|
|
107
|
+
instance_id = instance["instance_id"]
|
|
108
|
+
instance_dir = output_dir / instance_id
|
|
109
|
+
# avoid inconsistent state if something here fails and there's leftover previous files
|
|
110
|
+
remove_from_preds_file(output_dir / "preds.json", instance_id)
|
|
111
|
+
(instance_dir / f"{instance_id}.traj.json").unlink(missing_ok=True)
|
|
112
|
+
|
|
113
|
+
image_name = get_swebench_docker_image_name(instance)
|
|
114
|
+
config = yaml.safe_load(get_config_path(config_path).read_text())
|
|
115
|
+
model = get_model(model_name, config=config.get("model", {}))
|
|
116
|
+
task = instance["problem_statement"]
|
|
117
|
+
|
|
118
|
+
progress_manager.on_instance_start(instance_id)
|
|
119
|
+
progress_manager.update_instance_status(instance_id, "Pulling/starting docker")
|
|
120
|
+
|
|
121
|
+
agent = None
|
|
122
|
+
extra_info = None
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
env = DockerEnvironment(**(config.get("environment", {}) | {"image": image_name}))
|
|
126
|
+
agent = ProgressTrackingAgent(
|
|
127
|
+
model,
|
|
128
|
+
env,
|
|
129
|
+
progress_manager=progress_manager,
|
|
130
|
+
instance_id=instance_id,
|
|
131
|
+
**config.get("agent", {}),
|
|
132
|
+
)
|
|
133
|
+
exit_status, result = agent.run(task)
|
|
134
|
+
except Exception as e:
|
|
135
|
+
print(f"Error processing instance {instance_id}: {e}\n{traceback.format_exc()}")
|
|
136
|
+
exit_status, result = type(e).__name__, str(e)
|
|
137
|
+
extra_info = {"traceback": traceback.format_exc()}
|
|
138
|
+
finally:
|
|
139
|
+
save_traj(
|
|
140
|
+
agent,
|
|
141
|
+
instance_dir / f"{instance_id}.traj.json",
|
|
142
|
+
exit_status=exit_status,
|
|
143
|
+
result=result,
|
|
144
|
+
extra_info=extra_info,
|
|
145
|
+
instance_id=instance_id,
|
|
146
|
+
)
|
|
147
|
+
update_preds_file(output_dir / "preds.json", instance_id, model.config.model_name, result)
|
|
148
|
+
progress_manager.on_instance_end(instance_id, exit_status)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def filter_instances(
|
|
152
|
+
instances: list[dict], *, filter_spec: str, slice_spec: str = "", shuffle: bool = False
|
|
153
|
+
) -> list[dict]:
|
|
154
|
+
"""Filter and slice a list of SWEBench instances."""
|
|
155
|
+
if shuffle:
|
|
156
|
+
instances = sorted(instances.copy(), key=lambda x: x["instance_id"])
|
|
157
|
+
random.seed(42)
|
|
158
|
+
random.shuffle(instances)
|
|
159
|
+
before_filter = len(instances)
|
|
160
|
+
instances = [instance for instance in instances if re.match(filter_spec, instance["instance_id"])]
|
|
161
|
+
if (after_filter := len(instances)) != before_filter:
|
|
162
|
+
print(f"Instance filter: {before_filter} -> {after_filter} instances")
|
|
163
|
+
if slice_spec:
|
|
164
|
+
values = [int(x) if x else None for x in slice_spec.split(":")]
|
|
165
|
+
instances = instances[slice(*values)]
|
|
166
|
+
if (after_slice := len(instances)) != before_filter:
|
|
167
|
+
print(f"Instance slice: {before_filter} -> {after_slice} instances")
|
|
168
|
+
return instances
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@app.command()
|
|
172
|
+
def main(
|
|
173
|
+
subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset"),
|
|
174
|
+
split: str = typer.Option("dev", "--split", help="Dataset split"),
|
|
175
|
+
slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)"),
|
|
176
|
+
filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex"),
|
|
177
|
+
shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances"),
|
|
178
|
+
output: str = typer.Option("", "-o", "--output", help="Output directory"),
|
|
179
|
+
workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing"),
|
|
180
|
+
model: str | None = typer.Option(None, "-m", "--model", help="Model to use"),
|
|
181
|
+
redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances"),
|
|
182
|
+
config: Path = typer.Option(
|
|
183
|
+
builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file"
|
|
184
|
+
),
|
|
185
|
+
) -> None:
|
|
186
|
+
"""Run mini-SWE-agent on SWEBench instances"""
|
|
187
|
+
dataset_path = DATASET_MAPPING.get(subset, subset)
|
|
188
|
+
print(f"Loading dataset {dataset_path}, split {split}...")
|
|
189
|
+
instances = list(load_dataset(dataset_path, split=split))
|
|
190
|
+
|
|
191
|
+
instances = filter_instances(instances, filter_spec=filter_spec, slice_spec=slice_spec, shuffle=shuffle)
|
|
192
|
+
output_path = Path(output)
|
|
193
|
+
if not redo_existing and (output_path / "preds.json").exists():
|
|
194
|
+
existing_instances = list(json.loads((output_path / "preds.json").read_text()).keys())
|
|
195
|
+
print(f"Skipping {len(existing_instances)} existing instances")
|
|
196
|
+
instances = [instance for instance in instances if instance["instance_id"] not in existing_instances]
|
|
197
|
+
|
|
198
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
199
|
+
print(f"Running on {len(instances)} instances...")
|
|
200
|
+
print(f"Results will be saved to {output_path}")
|
|
201
|
+
|
|
202
|
+
progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")
|
|
203
|
+
|
|
204
|
+
def process_futures(futures: dict[concurrent.futures.Future, str]):
|
|
205
|
+
for future in concurrent.futures.as_completed(futures):
|
|
206
|
+
try:
|
|
207
|
+
future.result()
|
|
208
|
+
except concurrent.futures.CancelledError:
|
|
209
|
+
pass
|
|
210
|
+
except Exception as e:
|
|
211
|
+
instance_id = futures[future]
|
|
212
|
+
print(f"Error in future for instance {instance_id}: {e}")
|
|
213
|
+
traceback.print_exc()
|
|
214
|
+
progress_manager.on_uncaught_exception(instance_id, e)
|
|
215
|
+
|
|
216
|
+
with Live(progress_manager.render_group, refresh_per_second=4):
|
|
217
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
|
218
|
+
futures = {
|
|
219
|
+
executor.submit(process_instance, instance, output_path, model, config, progress_manager): instance[
|
|
220
|
+
"instance_id"
|
|
221
|
+
]
|
|
222
|
+
for instance in instances
|
|
223
|
+
}
|
|
224
|
+
try:
|
|
225
|
+
process_futures(futures)
|
|
226
|
+
except KeyboardInterrupt:
|
|
227
|
+
print("Cancelling all pending jobs. Press ^C again to exit immediately.")
|
|
228
|
+
for future in futures:
|
|
229
|
+
if not future.running() and not future.done():
|
|
230
|
+
future.cancel()
|
|
231
|
+
process_futures(futures)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
if __name__ == "__main__":
|
|
235
|
+
app()
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Run on a single SWE-Bench instance."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
import yaml
|
|
7
|
+
from datasets import load_dataset
|
|
8
|
+
|
|
9
|
+
from minisweagent.agents.interactive import InteractiveAgent
|
|
10
|
+
from minisweagent.config import builtin_config_dir, get_config_path
|
|
11
|
+
from minisweagent.environments.docker import DockerEnvironment
|
|
12
|
+
from minisweagent.models import get_model
|
|
13
|
+
from minisweagent.run.extra.swebench import DATASET_MAPPING, get_swebench_docker_image_name
|
|
14
|
+
|
|
15
|
+
app = typer.Typer(add_completion=False)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@app.command()
|
|
19
|
+
def main(
|
|
20
|
+
subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset"),
|
|
21
|
+
split: str = typer.Option("dev", "--split", help="Dataset split"),
|
|
22
|
+
instance_spec: str = typer.Option(None, "-i", "--instance", help="SWE-Bench instance ID"),
|
|
23
|
+
model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use"),
|
|
24
|
+
config_path: Path = typer.Option(
|
|
25
|
+
builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file"
|
|
26
|
+
),
|
|
27
|
+
) -> None:
|
|
28
|
+
"""Run on a single SWE-Bench instance."""
|
|
29
|
+
try:
|
|
30
|
+
dataset_path = DATASET_MAPPING[subset]
|
|
31
|
+
except KeyError:
|
|
32
|
+
dataset_path = subset
|
|
33
|
+
print(f"Loading dataset {dataset_path}, split {split}...")
|
|
34
|
+
instances = {
|
|
35
|
+
inst["instance_id"]: inst # type: ignore
|
|
36
|
+
for inst in load_dataset(dataset_path, split=split)
|
|
37
|
+
}
|
|
38
|
+
if instance_spec.isnumeric():
|
|
39
|
+
instance_spec = sorted(instances.keys())[int(instance_spec)]
|
|
40
|
+
instance: dict = instances[instance_spec] # type: ignore
|
|
41
|
+
|
|
42
|
+
_config = yaml.safe_load(get_config_path(config_path).read_text())
|
|
43
|
+
env = DockerEnvironment(**(_config.get("environment", {}) | {"image": get_swebench_docker_image_name(instance)}))
|
|
44
|
+
agent = InteractiveAgent(
|
|
45
|
+
get_model(model_name, _config.get("model", {})),
|
|
46
|
+
env,
|
|
47
|
+
**(_config.get("agent", {}) | {"mode": "yolo"}),
|
|
48
|
+
)
|
|
49
|
+
agent.run(instance["problem_statement"])
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == "__main__":
|
|
53
|
+
app()
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""This module contains an auxiliary class for rendering progress of a batch run.
|
|
2
|
+
It's identical to the one used in swe-agent.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import collections
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from threading import Lock
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
from rich.console import Group
|
|
11
|
+
from rich.progress import (
|
|
12
|
+
BarColumn,
|
|
13
|
+
MofNCompleteColumn,
|
|
14
|
+
Progress,
|
|
15
|
+
SpinnerColumn,
|
|
16
|
+
TaskID,
|
|
17
|
+
TaskProgressColumn,
|
|
18
|
+
TextColumn,
|
|
19
|
+
TimeElapsedColumn,
|
|
20
|
+
TimeRemainingColumn,
|
|
21
|
+
)
|
|
22
|
+
from rich.table import Table
|
|
23
|
+
|
|
24
|
+
import minisweagent.models
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _shorten_str(s: str, max_len: int, shorten_left=False) -> str:
|
|
28
|
+
if not shorten_left:
|
|
29
|
+
s = s[: max_len - 3] + "..." if len(s) > max_len else s
|
|
30
|
+
else:
|
|
31
|
+
s = "..." + s[-max_len + 3 :] if len(s) > max_len else s
|
|
32
|
+
return f"{s:<{max_len}}"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class RunBatchProgressManager:
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
num_instances: int,
|
|
39
|
+
yaml_report_path: Path | None = None,
|
|
40
|
+
):
|
|
41
|
+
"""This class manages a progress bar/UI for run-batch
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
num_instances: Number of task instances
|
|
45
|
+
yaml_report_path: Path to save a yaml report of the instances and their exit statuses
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
self._spinner_tasks: dict[str, TaskID] = {}
|
|
49
|
+
"""We need to map instance ID to the task ID that is used by the rich progress bar."""
|
|
50
|
+
|
|
51
|
+
self._lock = Lock()
|
|
52
|
+
|
|
53
|
+
self._instances_by_exit_status = collections.defaultdict(list)
|
|
54
|
+
self._main_progress_bar = Progress(
|
|
55
|
+
SpinnerColumn(spinner_name="dots2"),
|
|
56
|
+
TextColumn("[progress.description]{task.description} (${task.fields[total_cost]})"),
|
|
57
|
+
BarColumn(),
|
|
58
|
+
MofNCompleteColumn(),
|
|
59
|
+
TaskProgressColumn(),
|
|
60
|
+
TimeElapsedColumn(),
|
|
61
|
+
TextColumn("[cyan]eta:[/cyan]"),
|
|
62
|
+
TimeRemainingColumn(),
|
|
63
|
+
# Wait 5 min before estimating speed
|
|
64
|
+
speed_estimate_period=60 * 5,
|
|
65
|
+
)
|
|
66
|
+
self._task_progress_bar = Progress(
|
|
67
|
+
SpinnerColumn(spinner_name="dots2"),
|
|
68
|
+
TextColumn("{task.fields[instance_id]}"),
|
|
69
|
+
TextColumn("{task.fields[status]}"),
|
|
70
|
+
TimeElapsedColumn(),
|
|
71
|
+
)
|
|
72
|
+
"""Task progress bar for individual instances. There's only one progress bar
|
|
73
|
+
with one task for each instance.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
self._main_task_id = self._main_progress_bar.add_task(
|
|
77
|
+
"[cyan]Overall Progress", total=num_instances, total_cost="0.00"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
self.render_group = Group(Table(), self._task_progress_bar, self._main_progress_bar)
|
|
81
|
+
self._yaml_report_path = yaml_report_path
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def n_completed(self) -> int:
|
|
85
|
+
return sum(len(instances) for instances in self._instances_by_exit_status.values())
|
|
86
|
+
|
|
87
|
+
def update_exit_status_table(self):
|
|
88
|
+
# We cannot update the existing table, so we need to create a new one and
|
|
89
|
+
# assign it back to the render group.
|
|
90
|
+
t = Table()
|
|
91
|
+
t.add_column("Exit Status")
|
|
92
|
+
t.add_column("Count", justify="right", style="bold cyan")
|
|
93
|
+
t.add_column("Most recent instances")
|
|
94
|
+
t.show_header = False
|
|
95
|
+
with self._lock:
|
|
96
|
+
t.show_header = True
|
|
97
|
+
# Sort by number of instances in descending order
|
|
98
|
+
sorted_items = sorted(self._instances_by_exit_status.items(), key=lambda x: len(x[1]), reverse=True)
|
|
99
|
+
for status, instances in sorted_items:
|
|
100
|
+
instances_str = _shorten_str(", ".join(reversed(instances)), 55)
|
|
101
|
+
t.add_row(status, str(len(instances)), instances_str)
|
|
102
|
+
assert self.render_group is not None
|
|
103
|
+
self.render_group.renderables[0] = t
|
|
104
|
+
|
|
105
|
+
def _update_total_costs(self) -> None:
|
|
106
|
+
with self._lock:
|
|
107
|
+
self._main_progress_bar.update(
|
|
108
|
+
self._main_task_id, total_cost=f"{minisweagent.models.GLOBAL_MODEL_STATS.cost:.2f}"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
def update_instance_status(self, instance_id: str, message: str):
|
|
112
|
+
assert self._task_progress_bar is not None
|
|
113
|
+
assert self._main_progress_bar is not None
|
|
114
|
+
with self._lock:
|
|
115
|
+
self._task_progress_bar.update(
|
|
116
|
+
self._spinner_tasks[instance_id],
|
|
117
|
+
status=_shorten_str(message, 30),
|
|
118
|
+
instance_id=_shorten_str(instance_id, 25, shorten_left=True),
|
|
119
|
+
)
|
|
120
|
+
self._update_total_costs()
|
|
121
|
+
|
|
122
|
+
def on_instance_start(self, instance_id: str):
|
|
123
|
+
with self._lock:
|
|
124
|
+
self._spinner_tasks[instance_id] = self._task_progress_bar.add_task(
|
|
125
|
+
description=f"Task {instance_id}",
|
|
126
|
+
status="Task initialized",
|
|
127
|
+
total=None,
|
|
128
|
+
instance_id=instance_id,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def on_instance_end(self, instance_id: str, exit_status: str | None) -> None:
|
|
132
|
+
self._instances_by_exit_status[exit_status].append(instance_id)
|
|
133
|
+
with self._lock:
|
|
134
|
+
try:
|
|
135
|
+
self._task_progress_bar.remove_task(self._spinner_tasks[instance_id])
|
|
136
|
+
except KeyError:
|
|
137
|
+
pass
|
|
138
|
+
self._main_progress_bar.update(TaskID(0), advance=1)
|
|
139
|
+
self.update_exit_status_table()
|
|
140
|
+
self._update_total_costs()
|
|
141
|
+
if self._yaml_report_path is not None:
|
|
142
|
+
self._save_overview_data_yaml(self._yaml_report_path)
|
|
143
|
+
|
|
144
|
+
def on_uncaught_exception(self, instance_id: str, exception: Exception) -> None:
|
|
145
|
+
self.on_instance_end(instance_id, f"Uncaught {type(exception).__name__}")
|
|
146
|
+
|
|
147
|
+
def print_report(self) -> None:
|
|
148
|
+
"""Print complete list of instances and their exit statuses."""
|
|
149
|
+
for status, instances in self._instances_by_exit_status.items():
|
|
150
|
+
print(f"{status}: {len(instances)}")
|
|
151
|
+
for instance in instances:
|
|
152
|
+
print(f" {instance}")
|
|
153
|
+
|
|
154
|
+
def _get_overview_data(self) -> dict:
|
|
155
|
+
"""Get data like exit statuses, total costs, etc."""
|
|
156
|
+
return {
|
|
157
|
+
# convert defaultdict to dict because of serialization
|
|
158
|
+
"instances_by_exit_status": dict(self._instances_by_exit_status),
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
def _save_overview_data_yaml(self, path: Path) -> None:
|
|
162
|
+
"""Save a yaml report of the instances and their exit statuses."""
|
|
163
|
+
with self._lock:
|
|
164
|
+
path.write_text(yaml.dump(self._get_overview_data(), indent=4))
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
import typer
|
|
7
|
+
import yaml
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
from minisweagent.agents.interactive import InteractiveAgent
|
|
11
|
+
from minisweagent.config import builtin_config_dir, get_config_path
|
|
12
|
+
from minisweagent.environments.docker import DockerEnvironment
|
|
13
|
+
from minisweagent.models import get_model
|
|
14
|
+
from minisweagent.run.extra.config import configure_if_first_time
|
|
15
|
+
from minisweagent.run.utils.save import save_traj
|
|
16
|
+
|
|
17
|
+
DEFAULT_CONFIG = Path(os.getenv("MSWEA_GITHUB_CONFIG_PATH", builtin_config_dir / "github_issue.yaml"))
|
|
18
|
+
console = Console(highlight=False)
|
|
19
|
+
app = typer.Typer(rich_markup_mode="rich", add_completion=False)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def fetch_github_issue(issue_url: str) -> str:
|
|
23
|
+
"""Fetch GitHub issue text from the URL."""
|
|
24
|
+
# Convert GitHub issue URL to API URL
|
|
25
|
+
api_url = issue_url.replace("github.com", "api.github.com/repos").replace("/issues/", "/issues/")
|
|
26
|
+
|
|
27
|
+
headers = {}
|
|
28
|
+
if github_token := os.getenv("GITHUB_TOKEN"):
|
|
29
|
+
headers["Authorization"] = f"token {github_token}"
|
|
30
|
+
|
|
31
|
+
response = requests.get(api_url, headers=headers)
|
|
32
|
+
issue_data = response.json()
|
|
33
|
+
|
|
34
|
+
title = issue_data["title"]
|
|
35
|
+
body = issue_data["body"] or ""
|
|
36
|
+
|
|
37
|
+
return f"GitHub Issue: {title}\n\n{body}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@app.command()
|
|
41
|
+
def main(
|
|
42
|
+
issue_url: str = typer.Option(prompt="Enter GitHub issue URL", help="GitHub issue URL"),
|
|
43
|
+
config: Path = typer.Option(DEFAULT_CONFIG, "-c", "--config", help="Path to config file"),
|
|
44
|
+
model: str | None = typer.Option(None, "-m", "--model", help="Model to use"),
|
|
45
|
+
yolo: bool = typer.Option(False, "-y", "--yolo", help="Run without confirmation"),
|
|
46
|
+
) -> InteractiveAgent:
|
|
47
|
+
"""Run mini-SWE-agent on a GitHub issue"""
|
|
48
|
+
configure_if_first_time()
|
|
49
|
+
|
|
50
|
+
_config = yaml.safe_load(get_config_path(config).read_text())
|
|
51
|
+
_agent_config = _config.get("agent", {})
|
|
52
|
+
if yolo:
|
|
53
|
+
_agent_config["mode"] = "yolo"
|
|
54
|
+
|
|
55
|
+
task = fetch_github_issue(issue_url)
|
|
56
|
+
|
|
57
|
+
agent = InteractiveAgent(
|
|
58
|
+
get_model(model, _config.get("model", {})),
|
|
59
|
+
DockerEnvironment(**_config.get("environment", {})),
|
|
60
|
+
**_agent_config,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
repo_url = issue_url.split("/issues/")[0]
|
|
64
|
+
if github_token := os.getenv("GITHUB_TOKEN"):
|
|
65
|
+
repo_url = repo_url.replace("https://github.com/", f"https://{github_token}@github.com/") + ".git"
|
|
66
|
+
|
|
67
|
+
agent.env.execute(f"git clone {repo_url} /testbed", cwd="/")
|
|
68
|
+
|
|
69
|
+
exit_status, result = None, None
|
|
70
|
+
try:
|
|
71
|
+
exit_status, result = agent.run(task)
|
|
72
|
+
except KeyboardInterrupt:
|
|
73
|
+
console.print("\n[bold red]KeyboardInterrupt -- goodbye[/bold red]")
|
|
74
|
+
finally:
|
|
75
|
+
save_traj(agent, Path("traj.json"), exit_status=exit_status, result=result)
|
|
76
|
+
return agent
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
if __name__ == "__main__":
|
|
80
|
+
app()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import typer
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from minisweagent import package_dir
|
|
8
|
+
from minisweagent.agents.default import DefaultAgent
|
|
9
|
+
from minisweagent.environments.local import LocalEnvironment
|
|
10
|
+
from minisweagent.models.litellm_model import LitellmModel
|
|
11
|
+
|
|
12
|
+
app = typer.Typer()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@app.command()
|
|
16
|
+
def main(
|
|
17
|
+
task: str = typer.Option(..., "-t", "--task", help="Task/problem statement", show_default=False, prompt=True),
|
|
18
|
+
model_name: str = typer.Option(
|
|
19
|
+
os.getenv("MSWEA_MODEL_NAME"),
|
|
20
|
+
"-m",
|
|
21
|
+
"--model",
|
|
22
|
+
help="Model name (defaults to MSWEA_MODEL_NAME env var)",
|
|
23
|
+
prompt="What model do you want to use?",
|
|
24
|
+
),
|
|
25
|
+
) -> DefaultAgent:
|
|
26
|
+
agent = DefaultAgent(
|
|
27
|
+
LitellmModel(model_name=model_name),
|
|
28
|
+
LocalEnvironment(),
|
|
29
|
+
**yaml.safe_load(Path(package_dir / "config" / "default.yaml").read_text())["agent"],
|
|
30
|
+
)
|
|
31
|
+
agent.run(task)
|
|
32
|
+
return agent
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
if __name__ == "__main__":
|
|
36
|
+
app()
|