mini-swe-agent 1.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mini_swe_agent-1.16.0.dist-info/METADATA +314 -0
- mini_swe_agent-1.16.0.dist-info/RECORD +62 -0
- mini_swe_agent-1.16.0.dist-info/WHEEL +5 -0
- mini_swe_agent-1.16.0.dist-info/entry_points.txt +5 -0
- mini_swe_agent-1.16.0.dist-info/licenses/LICENSE.md +21 -0
- mini_swe_agent-1.16.0.dist-info/top_level.txt +1 -0
- minisweagent/__init__.py +83 -0
- minisweagent/__main__.py +7 -0
- minisweagent/agents/__init__.py +1 -0
- minisweagent/agents/default.py +131 -0
- minisweagent/agents/interactive.py +153 -0
- minisweagent/agents/interactive_textual.py +450 -0
- minisweagent/config/README.md +10 -0
- minisweagent/config/__init__.py +27 -0
- minisweagent/config/default.yaml +157 -0
- minisweagent/config/extra/__init__.py +1 -0
- minisweagent/config/extra/swebench.yaml +230 -0
- minisweagent/config/extra/swebench_roulette.yaml +233 -0
- minisweagent/config/extra/swebench_xml.yaml +215 -0
- minisweagent/config/github_issue.yaml +146 -0
- minisweagent/config/mini.tcss +86 -0
- minisweagent/config/mini.yaml +158 -0
- minisweagent/config/mini_no_temp.yaml +158 -0
- minisweagent/environments/__init__.py +31 -0
- minisweagent/environments/docker.py +114 -0
- minisweagent/environments/extra/__init__.py +0 -0
- minisweagent/environments/extra/bubblewrap.py +112 -0
- minisweagent/environments/extra/swerex_docker.py +47 -0
- minisweagent/environments/local.py +38 -0
- minisweagent/environments/singularity.py +97 -0
- minisweagent/models/__init__.py +114 -0
- minisweagent/models/anthropic.py +35 -0
- minisweagent/models/extra/__init__.py +0 -0
- minisweagent/models/extra/roulette.py +61 -0
- minisweagent/models/litellm_model.py +100 -0
- minisweagent/models/litellm_response_api_model.py +80 -0
- minisweagent/models/openrouter_model.py +125 -0
- minisweagent/models/portkey_model.py +154 -0
- minisweagent/models/portkey_response_api_model.py +74 -0
- minisweagent/models/requesty_model.py +119 -0
- minisweagent/models/test_models.py +42 -0
- minisweagent/models/utils/__init__.py +0 -0
- minisweagent/models/utils/cache_control.py +54 -0
- minisweagent/models/utils/key_per_thread.py +20 -0
- minisweagent/models/utils/openai_utils.py +41 -0
- minisweagent/py.typed +0 -0
- minisweagent/run/__init__.py +1 -0
- minisweagent/run/extra/__init__.py +0 -0
- minisweagent/run/extra/config.py +114 -0
- minisweagent/run/extra/swebench.py +266 -0
- minisweagent/run/extra/swebench_single.py +79 -0
- minisweagent/run/extra/utils/__init__.py +0 -0
- minisweagent/run/extra/utils/batch_progress.py +178 -0
- minisweagent/run/github_issue.py +87 -0
- minisweagent/run/hello_world.py +36 -0
- minisweagent/run/inspector.py +212 -0
- minisweagent/run/mini.py +108 -0
- minisweagent/run/mini_extra.py +44 -0
- minisweagent/run/utils/__init__.py +0 -0
- minisweagent/run/utils/save.py +78 -0
- minisweagent/utils/__init__.py +0 -0
- minisweagent/utils/log.py +36 -0
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
"""Run mini-SWE-agent on SWE-bench instances in batch mode."""
|
|
4
|
+
# Read this first: https://mini-swe-agent.com/latest/usage/swebench/ (usage docs)
|
|
5
|
+
|
|
6
|
+
import concurrent.futures
|
|
7
|
+
import json
|
|
8
|
+
import random
|
|
9
|
+
import re
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
import traceback
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import typer
|
|
16
|
+
import yaml
|
|
17
|
+
from datasets import load_dataset
|
|
18
|
+
from jinja2 import StrictUndefined, Template
|
|
19
|
+
from rich.live import Live
|
|
20
|
+
|
|
21
|
+
from minisweagent import Environment
|
|
22
|
+
from minisweagent.agents.default import DefaultAgent
|
|
23
|
+
from minisweagent.config import builtin_config_dir, get_config_path
|
|
24
|
+
from minisweagent.environments import get_environment
|
|
25
|
+
from minisweagent.models import get_model
|
|
26
|
+
from minisweagent.run.extra.utils.batch_progress import RunBatchProgressManager
|
|
27
|
+
from minisweagent.run.utils.save import save_traj
|
|
28
|
+
from minisweagent.utils.log import add_file_handler, logger
|
|
29
|
+
|
|
30
|
+
_HELP_TEXT = """Run mini-SWE-agent on SWEBench instances.
|
|
31
|
+
|
|
32
|
+
[not dim]
|
|
33
|
+
More information about the usage: [bold green]https://mini-swe-agent.com/latest/usage/swebench/[/bold green]
|
|
34
|
+
[/not dim]
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
app = typer.Typer(rich_markup_mode="rich", add_completion=False)
|
|
38
|
+
|
|
39
|
+
DATASET_MAPPING = {
|
|
40
|
+
"full": "princeton-nlp/SWE-Bench",
|
|
41
|
+
"verified": "princeton-nlp/SWE-Bench_Verified",
|
|
42
|
+
"lite": "princeton-nlp/SWE-Bench_Lite",
|
|
43
|
+
"multimodal": "princeton-nlp/SWE-Bench_Multimodal",
|
|
44
|
+
"multilingual": "swe-bench/SWE-Bench_Multilingual",
|
|
45
|
+
"smith": "SWE-bench/SWE-smith",
|
|
46
|
+
"_test": "klieret/swe-bench-dummy-test-dataset",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
_OUTPUT_FILE_LOCK = threading.Lock()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ProgressTrackingAgent(DefaultAgent):
|
|
54
|
+
"""Simple wrapper around DefaultAgent that provides progress updates."""
|
|
55
|
+
|
|
56
|
+
def __init__(self, *args, progress_manager: RunBatchProgressManager, instance_id: str = "", **kwargs):
|
|
57
|
+
super().__init__(*args, **kwargs)
|
|
58
|
+
self.progress_manager: RunBatchProgressManager = progress_manager
|
|
59
|
+
self.instance_id = instance_id
|
|
60
|
+
|
|
61
|
+
def step(self) -> dict:
|
|
62
|
+
"""Override step to provide progress updates."""
|
|
63
|
+
self.progress_manager.update_instance_status(
|
|
64
|
+
self.instance_id, f"Step {self.model.n_calls + 1:3d} (${self.model.cost:.2f})"
|
|
65
|
+
)
|
|
66
|
+
return super().step()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_swebench_docker_image_name(instance: dict) -> str:
|
|
70
|
+
"""Get the image name for a SWEBench instance."""
|
|
71
|
+
image_name = instance.get("image_name", None)
|
|
72
|
+
if image_name is None:
|
|
73
|
+
# Docker doesn't allow double underscore, so we replace them with a magic token
|
|
74
|
+
iid = instance["instance_id"]
|
|
75
|
+
id_docker_compatible = iid.replace("__", "_1776_")
|
|
76
|
+
image_name = f"docker.io/swebench/sweb.eval.x86_64.{id_docker_compatible}:latest".lower()
|
|
77
|
+
return image_name
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def get_sb_environment(config: dict, instance: dict) -> Environment:
|
|
81
|
+
env_config = config.setdefault("environment", {})
|
|
82
|
+
env_config["environment_class"] = env_config.get("environment_class", "docker")
|
|
83
|
+
image_name = get_swebench_docker_image_name(instance)
|
|
84
|
+
if env_config["environment_class"] == "docker":
|
|
85
|
+
env_config["image"] = image_name
|
|
86
|
+
elif env_config["environment_class"] == "singularity":
|
|
87
|
+
env_config["image"] = "docker://" + image_name
|
|
88
|
+
env = get_environment(env_config)
|
|
89
|
+
if startup_command := config.get("run", {}).get("env_startup_command"):
|
|
90
|
+
startup_command = Template(startup_command, undefined=StrictUndefined).render(**instance)
|
|
91
|
+
out = env.execute(startup_command)
|
|
92
|
+
if out["returncode"] != 0:
|
|
93
|
+
raise RuntimeError(f"Error executing startup command: {out}")
|
|
94
|
+
return env
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def update_preds_file(output_path: Path, instance_id: str, model_name: str, result: str):
|
|
98
|
+
"""Update the output JSON file with results from a single instance."""
|
|
99
|
+
with _OUTPUT_FILE_LOCK:
|
|
100
|
+
output_data = {}
|
|
101
|
+
if output_path.exists():
|
|
102
|
+
output_data = json.loads(output_path.read_text())
|
|
103
|
+
output_data[instance_id] = {
|
|
104
|
+
"model_name_or_path": model_name,
|
|
105
|
+
"instance_id": instance_id,
|
|
106
|
+
"model_patch": result,
|
|
107
|
+
}
|
|
108
|
+
output_path.write_text(json.dumps(output_data, indent=2))
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def remove_from_preds_file(output_path: Path, instance_id: str):
|
|
112
|
+
"""Remove an instance from the predictions file."""
|
|
113
|
+
if not output_path.exists():
|
|
114
|
+
return
|
|
115
|
+
with _OUTPUT_FILE_LOCK:
|
|
116
|
+
output_data = json.loads(output_path.read_text())
|
|
117
|
+
if instance_id in output_data:
|
|
118
|
+
del output_data[instance_id]
|
|
119
|
+
output_path.write_text(json.dumps(output_data, indent=2))
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def process_instance(
|
|
123
|
+
instance: dict,
|
|
124
|
+
output_dir: Path,
|
|
125
|
+
config: dict,
|
|
126
|
+
progress_manager: RunBatchProgressManager,
|
|
127
|
+
) -> None:
|
|
128
|
+
"""Process a single SWEBench instance."""
|
|
129
|
+
instance_id = instance["instance_id"]
|
|
130
|
+
instance_dir = output_dir / instance_id
|
|
131
|
+
# avoid inconsistent state if something here fails and there's leftover previous files
|
|
132
|
+
remove_from_preds_file(output_dir / "preds.json", instance_id)
|
|
133
|
+
(instance_dir / f"{instance_id}.traj.json").unlink(missing_ok=True)
|
|
134
|
+
model = get_model(config=config.get("model", {}))
|
|
135
|
+
task = instance["problem_statement"]
|
|
136
|
+
|
|
137
|
+
progress_manager.on_instance_start(instance_id)
|
|
138
|
+
progress_manager.update_instance_status(instance_id, "Pulling/starting docker")
|
|
139
|
+
|
|
140
|
+
agent = None
|
|
141
|
+
extra_info = None
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
env = get_sb_environment(config, instance)
|
|
145
|
+
agent = ProgressTrackingAgent(
|
|
146
|
+
model,
|
|
147
|
+
env,
|
|
148
|
+
progress_manager=progress_manager,
|
|
149
|
+
instance_id=instance_id,
|
|
150
|
+
**config.get("agent", {}),
|
|
151
|
+
)
|
|
152
|
+
exit_status, result = agent.run(task)
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error(f"Error processing instance {instance_id}: {e}", exc_info=True)
|
|
155
|
+
exit_status, result = type(e).__name__, str(e)
|
|
156
|
+
extra_info = {"traceback": traceback.format_exc()}
|
|
157
|
+
finally:
|
|
158
|
+
save_traj(
|
|
159
|
+
agent,
|
|
160
|
+
instance_dir / f"{instance_id}.traj.json",
|
|
161
|
+
exit_status=exit_status,
|
|
162
|
+
result=result,
|
|
163
|
+
extra_info=extra_info,
|
|
164
|
+
instance_id=instance_id,
|
|
165
|
+
print_fct=logger.info,
|
|
166
|
+
)
|
|
167
|
+
update_preds_file(output_dir / "preds.json", instance_id, model.config.model_name, result)
|
|
168
|
+
progress_manager.on_instance_end(instance_id, exit_status)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def filter_instances(
|
|
172
|
+
instances: list[dict], *, filter_spec: str, slice_spec: str = "", shuffle: bool = False
|
|
173
|
+
) -> list[dict]:
|
|
174
|
+
"""Filter and slice a list of SWEBench instances."""
|
|
175
|
+
if shuffle:
|
|
176
|
+
instances = sorted(instances.copy(), key=lambda x: x["instance_id"])
|
|
177
|
+
random.seed(42)
|
|
178
|
+
random.shuffle(instances)
|
|
179
|
+
before_filter = len(instances)
|
|
180
|
+
instances = [instance for instance in instances if re.match(filter_spec, instance["instance_id"])]
|
|
181
|
+
if (after_filter := len(instances)) != before_filter:
|
|
182
|
+
logger.info(f"Instance filter: {before_filter} -> {after_filter} instances")
|
|
183
|
+
if slice_spec:
|
|
184
|
+
values = [int(x) if x else None for x in slice_spec.split(":")]
|
|
185
|
+
instances = instances[slice(*values)]
|
|
186
|
+
if (after_slice := len(instances)) != before_filter:
|
|
187
|
+
logger.info(f"Instance slice: {before_filter} -> {after_slice} instances")
|
|
188
|
+
return instances
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
# fmt: off
|
|
192
|
+
@app.command(help=_HELP_TEXT)
|
|
193
|
+
def main(
|
|
194
|
+
subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
|
|
195
|
+
split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
|
|
196
|
+
slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)", rich_help_panel="Data selection"),
|
|
197
|
+
filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex", rich_help_panel="Data selection"),
|
|
198
|
+
shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances", rich_help_panel="Data selection"),
|
|
199
|
+
output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
|
|
200
|
+
workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
|
|
201
|
+
model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
|
|
202
|
+
model_class: str | None = typer.Option(None, "-c", "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
|
|
203
|
+
redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
|
|
204
|
+
config_spec: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
|
|
205
|
+
environment_class: str | None = typer.Option( None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
|
|
206
|
+
) -> None:
|
|
207
|
+
# fmt: on
|
|
208
|
+
output_path = Path(output)
|
|
209
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
210
|
+
logger.info(f"Results will be saved to {output_path}")
|
|
211
|
+
add_file_handler(output_path / "minisweagent.log")
|
|
212
|
+
|
|
213
|
+
dataset_path = DATASET_MAPPING.get(subset, subset)
|
|
214
|
+
logger.info(f"Loading dataset {dataset_path}, split {split}...")
|
|
215
|
+
instances = list(load_dataset(dataset_path, split=split))
|
|
216
|
+
|
|
217
|
+
instances = filter_instances(instances, filter_spec=filter_spec, slice_spec=slice_spec, shuffle=shuffle)
|
|
218
|
+
if not redo_existing and (output_path / "preds.json").exists():
|
|
219
|
+
existing_instances = list(json.loads((output_path / "preds.json").read_text()).keys())
|
|
220
|
+
logger.info(f"Skipping {len(existing_instances)} existing instances")
|
|
221
|
+
instances = [instance for instance in instances if instance["instance_id"] not in existing_instances]
|
|
222
|
+
logger.info(f"Running on {len(instances)} instances...")
|
|
223
|
+
|
|
224
|
+
config_path = get_config_path(config_spec)
|
|
225
|
+
logger.info(f"Loading agent config from '{config_path}'")
|
|
226
|
+
config = yaml.safe_load(config_path.read_text())
|
|
227
|
+
if environment_class is not None:
|
|
228
|
+
config.setdefault("environment", {})["environment_class"] = environment_class
|
|
229
|
+
if model is not None:
|
|
230
|
+
config.setdefault("model", {})["model_name"] = model
|
|
231
|
+
if model_class is not None:
|
|
232
|
+
config.setdefault("model", {})["model_class"] = model_class
|
|
233
|
+
|
|
234
|
+
progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")
|
|
235
|
+
|
|
236
|
+
def process_futures(futures: dict[concurrent.futures.Future, str]):
|
|
237
|
+
for future in concurrent.futures.as_completed(futures):
|
|
238
|
+
try:
|
|
239
|
+
future.result()
|
|
240
|
+
except concurrent.futures.CancelledError:
|
|
241
|
+
pass
|
|
242
|
+
except Exception as e:
|
|
243
|
+
instance_id = futures[future]
|
|
244
|
+
logger.error(f"Error in future for instance {instance_id}: {e}", exc_info=True)
|
|
245
|
+
progress_manager.on_uncaught_exception(instance_id, e)
|
|
246
|
+
|
|
247
|
+
with Live(progress_manager.render_group, refresh_per_second=4):
|
|
248
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
|
|
249
|
+
futures = {
|
|
250
|
+
executor.submit(process_instance, instance, output_path, config, progress_manager): instance[
|
|
251
|
+
"instance_id"
|
|
252
|
+
]
|
|
253
|
+
for instance in instances
|
|
254
|
+
}
|
|
255
|
+
try:
|
|
256
|
+
process_futures(futures)
|
|
257
|
+
except KeyboardInterrupt:
|
|
258
|
+
logger.info("Cancelling all pending jobs. Press ^C again to exit immediately.")
|
|
259
|
+
for future in futures:
|
|
260
|
+
if not future.running() and not future.done():
|
|
261
|
+
future.cancel()
|
|
262
|
+
process_futures(futures)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
if __name__ == "__main__":
|
|
266
|
+
app()
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Run on a single SWE-Bench instance."""
|
|
2
|
+
|
|
3
|
+
import traceback
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
import yaml
|
|
8
|
+
from datasets import load_dataset
|
|
9
|
+
|
|
10
|
+
from minisweagent import global_config_dir
|
|
11
|
+
from minisweagent.agents.interactive import InteractiveAgent
|
|
12
|
+
from minisweagent.config import builtin_config_dir, get_config_path
|
|
13
|
+
from minisweagent.models import get_model
|
|
14
|
+
from minisweagent.run.extra.swebench import (
|
|
15
|
+
DATASET_MAPPING,
|
|
16
|
+
get_sb_environment,
|
|
17
|
+
)
|
|
18
|
+
from minisweagent.run.utils.save import save_traj
|
|
19
|
+
from minisweagent.utils.log import logger
|
|
20
|
+
|
|
21
|
+
app = typer.Typer(add_completion=False)
|
|
22
|
+
|
|
23
|
+
DEFAULT_OUTPUT = global_config_dir / "last_swebench_single_run.traj.json"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# fmt: off
|
|
27
|
+
@app.command()
|
|
28
|
+
def main(
|
|
29
|
+
subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
|
|
30
|
+
split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
|
|
31
|
+
instance_spec: str = typer.Option(0, "-i", "--instance", help="SWE-Bench instance ID or index", rich_help_panel="Data selection"),
|
|
32
|
+
model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
|
|
33
|
+
model_class: str | None = typer.Option(None, "-c", "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
|
|
34
|
+
config_path: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
|
|
35
|
+
environment_class: str | None = typer.Option(None, "--environment-class", rich_help_panel="Advanced"),
|
|
36
|
+
exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Basic"),
|
|
37
|
+
output: Path = typer.Option(DEFAULT_OUTPUT, "-o", "--output", help="Output trajectory file", rich_help_panel="Basic"),
|
|
38
|
+
) -> None:
|
|
39
|
+
# fmt: on
|
|
40
|
+
"""Run on a single SWE-Bench instance."""
|
|
41
|
+
dataset_path = DATASET_MAPPING.get(subset, subset)
|
|
42
|
+
logger.info(f"Loading dataset from {dataset_path}, split {split}...")
|
|
43
|
+
instances = {
|
|
44
|
+
inst["instance_id"]: inst # type: ignore
|
|
45
|
+
for inst in load_dataset(dataset_path, split=split)
|
|
46
|
+
}
|
|
47
|
+
if instance_spec.isnumeric():
|
|
48
|
+
instance_spec = sorted(instances.keys())[int(instance_spec)]
|
|
49
|
+
instance: dict = instances[instance_spec] # type: ignore
|
|
50
|
+
|
|
51
|
+
config_path = get_config_path(config_path)
|
|
52
|
+
logger.info(f"Loading agent config from '{config_path}'")
|
|
53
|
+
config = yaml.safe_load(config_path.read_text())
|
|
54
|
+
if environment_class is not None:
|
|
55
|
+
config.setdefault("environment", {})["environment_class"] = environment_class
|
|
56
|
+
if model_class is not None:
|
|
57
|
+
config.setdefault("model", {})["model_class"] = model_class
|
|
58
|
+
if exit_immediately:
|
|
59
|
+
config.setdefault("agent", {})["confirm_exit"] = False
|
|
60
|
+
env = get_sb_environment(config, instance)
|
|
61
|
+
agent = InteractiveAgent(
|
|
62
|
+
get_model(model_name, config.get("model", {})),
|
|
63
|
+
env,
|
|
64
|
+
**({"mode": "yolo"} | config.get("agent", {})),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
exit_status, result, extra_info = None, None, None
|
|
68
|
+
try:
|
|
69
|
+
exit_status, result = agent.run(instance["problem_statement"]) # type: ignore[arg-type]
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.error(f"Error processing instance {instance_spec}: {e}", exc_info=True)
|
|
72
|
+
exit_status, result = type(e).__name__, str(e)
|
|
73
|
+
extra_info = {"traceback": traceback.format_exc()}
|
|
74
|
+
finally:
|
|
75
|
+
save_traj(agent, output, exit_status=exit_status, result=result, extra_info=extra_info) # type: ignore[arg-type]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
app()
|
|
File without changes
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""This module contains an auxiliary class for rendering progress of a batch run.
|
|
2
|
+
It's identical to the one used in swe-agent.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import collections
|
|
6
|
+
import time
|
|
7
|
+
from datetime import timedelta
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from threading import Lock
|
|
10
|
+
|
|
11
|
+
import yaml
|
|
12
|
+
from rich.console import Group
|
|
13
|
+
from rich.progress import (
|
|
14
|
+
BarColumn,
|
|
15
|
+
MofNCompleteColumn,
|
|
16
|
+
Progress,
|
|
17
|
+
SpinnerColumn,
|
|
18
|
+
TaskID,
|
|
19
|
+
TaskProgressColumn,
|
|
20
|
+
TextColumn,
|
|
21
|
+
TimeElapsedColumn,
|
|
22
|
+
)
|
|
23
|
+
from rich.table import Table
|
|
24
|
+
|
|
25
|
+
import minisweagent.models
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _shorten_str(s: str, max_len: int, shorten_left=False) -> str:
|
|
29
|
+
if not shorten_left:
|
|
30
|
+
s = s[: max_len - 3] + "..." if len(s) > max_len else s
|
|
31
|
+
else:
|
|
32
|
+
s = "..." + s[-max_len + 3 :] if len(s) > max_len else s
|
|
33
|
+
return f"{s:<{max_len}}"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class RunBatchProgressManager:
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
num_instances: int,
|
|
40
|
+
yaml_report_path: Path | None = None,
|
|
41
|
+
):
|
|
42
|
+
"""This class manages a progress bar/UI for run-batch
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
num_instances: Number of task instances
|
|
46
|
+
yaml_report_path: Path to save a yaml report of the instances and their exit statuses
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
self._spinner_tasks: dict[str, TaskID] = {}
|
|
50
|
+
"""We need to map instance ID to the task ID that is used by the rich progress bar."""
|
|
51
|
+
|
|
52
|
+
self._lock = Lock()
|
|
53
|
+
self._start_time = time.time()
|
|
54
|
+
self._total_instances = num_instances
|
|
55
|
+
|
|
56
|
+
self._instances_by_exit_status = collections.defaultdict(list)
|
|
57
|
+
self._main_progress_bar = Progress(
|
|
58
|
+
SpinnerColumn(spinner_name="dots2"),
|
|
59
|
+
TextColumn("[progress.description]{task.description} (${task.fields[total_cost]})"),
|
|
60
|
+
BarColumn(),
|
|
61
|
+
MofNCompleteColumn(),
|
|
62
|
+
TaskProgressColumn(),
|
|
63
|
+
TimeElapsedColumn(),
|
|
64
|
+
TextColumn("[cyan]{task.fields[eta]}[/cyan]"),
|
|
65
|
+
# Wait 5 min before estimating speed
|
|
66
|
+
speed_estimate_period=60 * 5,
|
|
67
|
+
)
|
|
68
|
+
self._task_progress_bar = Progress(
|
|
69
|
+
SpinnerColumn(spinner_name="dots2"),
|
|
70
|
+
TextColumn("{task.fields[instance_id]}"),
|
|
71
|
+
TextColumn("{task.fields[status]}"),
|
|
72
|
+
TimeElapsedColumn(),
|
|
73
|
+
)
|
|
74
|
+
"""Task progress bar for individual instances. There's only one progress bar
|
|
75
|
+
with one task for each instance.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
self._main_task_id = self._main_progress_bar.add_task(
|
|
79
|
+
"[cyan]Overall Progress", total=num_instances, total_cost="0.00", eta=""
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
self.render_group = Group(Table(), self._task_progress_bar, self._main_progress_bar)
|
|
83
|
+
self._yaml_report_path = yaml_report_path
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def n_completed(self) -> int:
|
|
87
|
+
return sum(len(instances) for instances in self._instances_by_exit_status.values())
|
|
88
|
+
|
|
89
|
+
def _get_eta_text(self) -> str:
|
|
90
|
+
"""Calculate estimated time remaining based on current progress."""
|
|
91
|
+
try:
|
|
92
|
+
estimated_remaining = (
|
|
93
|
+
(time.time() - self._start_time) / self.n_completed * (self._total_instances - self.n_completed)
|
|
94
|
+
)
|
|
95
|
+
return f"eta: {timedelta(seconds=int(estimated_remaining))}"
|
|
96
|
+
except ZeroDivisionError:
|
|
97
|
+
return ""
|
|
98
|
+
|
|
99
|
+
def update_exit_status_table(self):
|
|
100
|
+
# We cannot update the existing table, so we need to create a new one and
|
|
101
|
+
# assign it back to the render group.
|
|
102
|
+
t = Table()
|
|
103
|
+
t.add_column("Exit Status")
|
|
104
|
+
t.add_column("Count", justify="right", style="bold cyan")
|
|
105
|
+
t.add_column("Most recent instances")
|
|
106
|
+
t.show_header = False
|
|
107
|
+
with self._lock:
|
|
108
|
+
t.show_header = True
|
|
109
|
+
# Sort by number of instances in descending order
|
|
110
|
+
sorted_items = sorted(self._instances_by_exit_status.items(), key=lambda x: len(x[1]), reverse=True)
|
|
111
|
+
for status, instances in sorted_items:
|
|
112
|
+
instances_str = _shorten_str(", ".join(reversed(instances)), 55)
|
|
113
|
+
t.add_row(status, str(len(instances)), instances_str)
|
|
114
|
+
assert self.render_group is not None
|
|
115
|
+
self.render_group.renderables[0] = t
|
|
116
|
+
|
|
117
|
+
def _update_total_costs(self) -> None:
|
|
118
|
+
with self._lock:
|
|
119
|
+
self._main_progress_bar.update(
|
|
120
|
+
self._main_task_id,
|
|
121
|
+
total_cost=f"{minisweagent.models.GLOBAL_MODEL_STATS.cost:.2f}",
|
|
122
|
+
eta=self._get_eta_text(),
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def update_instance_status(self, instance_id: str, message: str):
|
|
126
|
+
assert self._task_progress_bar is not None
|
|
127
|
+
assert self._main_progress_bar is not None
|
|
128
|
+
with self._lock:
|
|
129
|
+
self._task_progress_bar.update(
|
|
130
|
+
self._spinner_tasks[instance_id],
|
|
131
|
+
status=_shorten_str(message, 30),
|
|
132
|
+
instance_id=_shorten_str(instance_id, 25, shorten_left=True),
|
|
133
|
+
)
|
|
134
|
+
self._update_total_costs()
|
|
135
|
+
|
|
136
|
+
def on_instance_start(self, instance_id: str):
|
|
137
|
+
with self._lock:
|
|
138
|
+
self._spinner_tasks[instance_id] = self._task_progress_bar.add_task(
|
|
139
|
+
description=f"Task {instance_id}",
|
|
140
|
+
status="Task initialized",
|
|
141
|
+
total=None,
|
|
142
|
+
instance_id=instance_id,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
def on_instance_end(self, instance_id: str, exit_status: str | None) -> None:
|
|
146
|
+
self._instances_by_exit_status[exit_status].append(instance_id)
|
|
147
|
+
with self._lock:
|
|
148
|
+
try:
|
|
149
|
+
self._task_progress_bar.remove_task(self._spinner_tasks[instance_id])
|
|
150
|
+
except KeyError:
|
|
151
|
+
pass
|
|
152
|
+
self._main_progress_bar.update(TaskID(0), advance=1, eta=self._get_eta_text())
|
|
153
|
+
self.update_exit_status_table()
|
|
154
|
+
self._update_total_costs()
|
|
155
|
+
if self._yaml_report_path is not None:
|
|
156
|
+
self._save_overview_data_yaml(self._yaml_report_path)
|
|
157
|
+
|
|
158
|
+
def on_uncaught_exception(self, instance_id: str, exception: Exception) -> None:
|
|
159
|
+
self.on_instance_end(instance_id, f"Uncaught {type(exception).__name__}")
|
|
160
|
+
|
|
161
|
+
def print_report(self) -> None:
|
|
162
|
+
"""Print complete list of instances and their exit statuses."""
|
|
163
|
+
for status, instances in self._instances_by_exit_status.items():
|
|
164
|
+
print(f"{status}: {len(instances)}")
|
|
165
|
+
for instance in instances:
|
|
166
|
+
print(f" {instance}")
|
|
167
|
+
|
|
168
|
+
def _get_overview_data(self) -> dict:
|
|
169
|
+
"""Get data like exit statuses, total costs, etc."""
|
|
170
|
+
return {
|
|
171
|
+
# convert defaultdict to dict because of serialization
|
|
172
|
+
"instances_by_exit_status": dict(self._instances_by_exit_status),
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
def _save_overview_data_yaml(self, path: Path) -> None:
|
|
176
|
+
"""Save a yaml report of the instances and their exit statuses."""
|
|
177
|
+
with self._lock:
|
|
178
|
+
path.write_text(yaml.dump(self._get_overview_data(), indent=4))
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
import typer
|
|
7
|
+
import yaml
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
from minisweagent.agents.interactive import InteractiveAgent
|
|
11
|
+
from minisweagent.config import builtin_config_dir, get_config_path
|
|
12
|
+
from minisweagent.environments.docker import DockerEnvironment
|
|
13
|
+
from minisweagent.models import get_model
|
|
14
|
+
from minisweagent.run.extra.config import configure_if_first_time
|
|
15
|
+
from minisweagent.run.utils.save import save_traj
|
|
16
|
+
|
|
17
|
+
DEFAULT_CONFIG = Path(os.getenv("MSWEA_GITHUB_CONFIG_PATH", builtin_config_dir / "github_issue.yaml"))
|
|
18
|
+
console = Console(highlight=False)
|
|
19
|
+
app = typer.Typer(rich_markup_mode="rich", add_completion=False)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def fetch_github_issue(issue_url: str) -> str:
|
|
23
|
+
"""Fetch GitHub issue text from the URL."""
|
|
24
|
+
# Convert GitHub issue URL to API URL
|
|
25
|
+
api_url = issue_url.replace("github.com", "api.github.com/repos").replace("/issues/", "/issues/")
|
|
26
|
+
|
|
27
|
+
headers = {}
|
|
28
|
+
if github_token := os.getenv("GITHUB_TOKEN"):
|
|
29
|
+
headers["Authorization"] = f"token {github_token}"
|
|
30
|
+
|
|
31
|
+
response = requests.get(api_url, headers=headers)
|
|
32
|
+
issue_data = response.json()
|
|
33
|
+
|
|
34
|
+
title = issue_data["title"]
|
|
35
|
+
body = issue_data["body"] or ""
|
|
36
|
+
|
|
37
|
+
return f"GitHub Issue: {title}\n\n{body}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# fmt: off
|
|
41
|
+
@app.command()
|
|
42
|
+
def main(
|
|
43
|
+
issue_url: str = typer.Option(prompt="Enter GitHub issue URL", help="GitHub issue URL"),
|
|
44
|
+
config: Path = typer.Option(DEFAULT_CONFIG, "-c", "--config", help="Path to config file"),
|
|
45
|
+
model: str | None = typer.Option(None, "-m", "--model", help="Model to use"),
|
|
46
|
+
model_class: str | None = typer.Option(None, "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
|
|
47
|
+
yolo: bool = typer.Option(False, "-y", "--yolo", help="Run without confirmation"),
|
|
48
|
+
) -> InteractiveAgent:
|
|
49
|
+
# fmt: on
|
|
50
|
+
"""Run mini-SWE-agent on a GitHub issue"""
|
|
51
|
+
configure_if_first_time()
|
|
52
|
+
|
|
53
|
+
config_path = get_config_path(config)
|
|
54
|
+
console.print(f"Loading agent config from [bold green]'{config_path}'[/bold green]")
|
|
55
|
+
_config = yaml.safe_load(config_path.read_text())
|
|
56
|
+
_agent_config = _config.setdefault("agent", {})
|
|
57
|
+
if yolo:
|
|
58
|
+
_agent_config["mode"] = "yolo"
|
|
59
|
+
if model_class is not None:
|
|
60
|
+
_config.setdefault("model", {})["model_class"] = model_class
|
|
61
|
+
|
|
62
|
+
task = fetch_github_issue(issue_url)
|
|
63
|
+
|
|
64
|
+
agent = InteractiveAgent(
|
|
65
|
+
get_model(model, _config.get("model", {})),
|
|
66
|
+
DockerEnvironment(**_config.get("environment", {})),
|
|
67
|
+
**_agent_config,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
repo_url = issue_url.split("/issues/")[0]
|
|
71
|
+
if github_token := os.getenv("GITHUB_TOKEN"):
|
|
72
|
+
repo_url = repo_url.replace("https://github.com/", f"https://{github_token}@github.com/") + ".git"
|
|
73
|
+
|
|
74
|
+
agent.env.execute(f"git clone {repo_url} /testbed", cwd="/")
|
|
75
|
+
|
|
76
|
+
exit_status, result = None, None
|
|
77
|
+
try:
|
|
78
|
+
exit_status, result = agent.run(task)
|
|
79
|
+
except KeyboardInterrupt:
|
|
80
|
+
console.print("\n[bold red]KeyboardInterrupt -- goodbye[/bold red]")
|
|
81
|
+
finally:
|
|
82
|
+
save_traj(agent, Path("traj.json"), exit_status=exit_status, result=result)
|
|
83
|
+
return agent
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
app()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import typer
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from minisweagent import package_dir
|
|
8
|
+
from minisweagent.agents.default import DefaultAgent
|
|
9
|
+
from minisweagent.environments.local import LocalEnvironment
|
|
10
|
+
from minisweagent.models.litellm_model import LitellmModel
|
|
11
|
+
|
|
12
|
+
app = typer.Typer()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@app.command()
|
|
16
|
+
def main(
|
|
17
|
+
task: str = typer.Option(..., "-t", "--task", help="Task/problem statement", show_default=False, prompt=True),
|
|
18
|
+
model_name: str = typer.Option(
|
|
19
|
+
os.getenv("MSWEA_MODEL_NAME"),
|
|
20
|
+
"-m",
|
|
21
|
+
"--model",
|
|
22
|
+
help="Model name (defaults to MSWEA_MODEL_NAME env var)",
|
|
23
|
+
prompt="What model do you want to use?",
|
|
24
|
+
),
|
|
25
|
+
) -> DefaultAgent:
|
|
26
|
+
agent = DefaultAgent(
|
|
27
|
+
LitellmModel(model_name=model_name),
|
|
28
|
+
LocalEnvironment(),
|
|
29
|
+
**yaml.safe_load(Path(package_dir / "config" / "default.yaml").read_text())["agent"],
|
|
30
|
+
)
|
|
31
|
+
agent.run(task)
|
|
32
|
+
return agent
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
if __name__ == "__main__":
|
|
36
|
+
app()
|