mini-swe-agent 1.17.4__py3-none-any.whl → 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
- mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +1 -1
- mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
- minisweagent/__init__.py +19 -26
- minisweagent/agents/default.py +128 -113
- minisweagent/agents/interactive.py +119 -58
- minisweagent/config/README.md +3 -4
- minisweagent/config/__init__.py +36 -1
- minisweagent/config/benchmarks/swebench.yaml +156 -0
- minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
- minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
- minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
- minisweagent/config/default.yaml +24 -21
- minisweagent/config/inspector.tcss +42 -0
- minisweagent/config/mini.yaml +53 -71
- minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
- minisweagent/environments/__init__.py +1 -0
- minisweagent/environments/docker.py +67 -20
- minisweagent/environments/extra/bubblewrap.py +86 -47
- minisweagent/environments/extra/swerex_docker.py +53 -20
- minisweagent/environments/extra/swerex_modal.py +90 -0
- minisweagent/environments/local.py +62 -21
- minisweagent/environments/singularity.py +59 -18
- minisweagent/exceptions.py +22 -0
- minisweagent/models/__init__.py +6 -7
- minisweagent/models/extra/roulette.py +20 -17
- minisweagent/models/litellm_model.py +90 -44
- minisweagent/models/litellm_response_model.py +80 -0
- minisweagent/models/litellm_textbased_model.py +45 -0
- minisweagent/models/openrouter_model.py +87 -45
- minisweagent/models/openrouter_response_model.py +123 -0
- minisweagent/models/openrouter_textbased_model.py +76 -0
- minisweagent/models/portkey_model.py +84 -42
- minisweagent/models/portkey_response_model.py +163 -0
- minisweagent/models/requesty_model.py +91 -41
- minisweagent/models/test_models.py +246 -19
- minisweagent/models/utils/actions_text.py +60 -0
- minisweagent/models/utils/actions_toolcall.py +102 -0
- minisweagent/models/utils/actions_toolcall_response.py +110 -0
- minisweagent/models/utils/anthropic_utils.py +28 -0
- minisweagent/models/utils/cache_control.py +15 -2
- minisweagent/models/utils/content_string.py +74 -0
- minisweagent/models/utils/openai_multimodal.py +50 -0
- minisweagent/models/utils/retry.py +25 -0
- minisweagent/run/benchmarks/__init__.py +1 -0
- minisweagent/run/{extra → benchmarks}/swebench.py +57 -36
- minisweagent/run/benchmarks/swebench_single.py +89 -0
- minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
- minisweagent/run/hello_world.py +6 -0
- minisweagent/run/mini.py +54 -63
- minisweagent/run/utilities/__init__.py +1 -0
- minisweagent/run/{extra → utilities}/config.py +2 -0
- minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
- minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
- minisweagent/utils/serialize.py +26 -0
- mini_swe_agent-1.17.4.dist-info/RECORD +0 -61
- mini_swe_agent-1.17.4.dist-info/entry_points.txt +0 -5
- minisweagent/agents/interactive_textual.py +0 -450
- minisweagent/config/extra/swebench_roulette.yaml +0 -233
- minisweagent/config/mini.tcss +0 -86
- minisweagent/models/anthropic.py +0 -35
- minisweagent/models/litellm_response_api_model.py +0 -82
- minisweagent/models/portkey_response_api_model.py +0 -75
- minisweagent/models/utils/key_per_thread.py +0 -20
- minisweagent/models/utils/openai_utils.py +0 -41
- minisweagent/run/extra/swebench_single.py +0 -79
- minisweagent/run/github_issue.py +0 -87
- minisweagent/run/utils/__init__.py +0 -0
- minisweagent/run/utils/save.py +0 -78
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
- /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
- /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
|
@@ -13,19 +13,17 @@ import traceback
|
|
|
13
13
|
from pathlib import Path
|
|
14
14
|
|
|
15
15
|
import typer
|
|
16
|
-
import yaml
|
|
17
|
-
from datasets import load_dataset
|
|
18
16
|
from jinja2 import StrictUndefined, Template
|
|
19
17
|
from rich.live import Live
|
|
20
18
|
|
|
21
19
|
from minisweagent import Environment
|
|
22
20
|
from minisweagent.agents.default import DefaultAgent
|
|
23
|
-
from minisweagent.config import builtin_config_dir,
|
|
21
|
+
from minisweagent.config import builtin_config_dir, get_config_from_spec
|
|
24
22
|
from minisweagent.environments import get_environment
|
|
25
23
|
from minisweagent.models import get_model
|
|
26
|
-
from minisweagent.run.
|
|
27
|
-
from minisweagent.run.utils.save import save_traj
|
|
24
|
+
from minisweagent.run.benchmarks.utils.batch_progress import RunBatchProgressManager
|
|
28
25
|
from minisweagent.utils.log import add_file_handler, logger
|
|
26
|
+
from minisweagent.utils.serialize import UNSET, recursive_merge
|
|
29
27
|
|
|
30
28
|
_HELP_TEXT = """Run mini-SWE-agent on SWEBench instances.
|
|
31
29
|
|
|
@@ -34,7 +32,23 @@ More information about the usage: [bold green]https://mini-swe-agent.com/latest/
|
|
|
34
32
|
[/not dim]
|
|
35
33
|
"""
|
|
36
34
|
|
|
37
|
-
|
|
35
|
+
_CONFIG_SPEC_HELP_TEXT = """Path to config files, filenames, or key-value pairs.
|
|
36
|
+
|
|
37
|
+
[bold red]IMPORTANT:[/bold red] [red]If you set this option, the default config file will not be used.[/red]
|
|
38
|
+
So you need to explicitly set it e.g., with [bold green]-c swebench.yaml <other options>[/bold green]
|
|
39
|
+
|
|
40
|
+
Multiple configs will be recursively merged.
|
|
41
|
+
|
|
42
|
+
Examples:
|
|
43
|
+
|
|
44
|
+
[bold red]-c model.model_kwargs.temperature=0[/bold red] [red]You forgot to add the default config file! See above.[/red]
|
|
45
|
+
|
|
46
|
+
[bold green]-c swebench.yaml -c model.model_kwargs.temperature=0.5[/bold green]
|
|
47
|
+
|
|
48
|
+
[bold green]-c swebench.yaml -c agent.max_iterations=50[/bold green]
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
DEFAULT_CONFIG_FILE = builtin_config_dir / "benchmarks" / "swebench.yaml"
|
|
38
52
|
|
|
39
53
|
DATASET_MAPPING = {
|
|
40
54
|
"full": "princeton-nlp/SWE-Bench",
|
|
@@ -46,7 +60,7 @@ DATASET_MAPPING = {
|
|
|
46
60
|
"_test": "klieret/swe-bench-dummy-test-dataset",
|
|
47
61
|
}
|
|
48
62
|
|
|
49
|
-
|
|
63
|
+
app = typer.Typer(rich_markup_mode="rich", add_completion=False)
|
|
50
64
|
_OUTPUT_FILE_LOCK = threading.Lock()
|
|
51
65
|
|
|
52
66
|
|
|
@@ -60,9 +74,7 @@ class ProgressTrackingAgent(DefaultAgent):
|
|
|
60
74
|
|
|
61
75
|
def step(self) -> dict:
|
|
62
76
|
"""Override step to provide progress updates."""
|
|
63
|
-
self.progress_manager.update_instance_status(
|
|
64
|
-
self.instance_id, f"Step {self.model.n_calls + 1:3d} (${self.model.cost:.2f})"
|
|
65
|
-
)
|
|
77
|
+
self.progress_manager.update_instance_status(self.instance_id, f"Step {self.n_calls + 1:3d} (${self.cost:.2f})")
|
|
66
78
|
return super().step()
|
|
67
79
|
|
|
68
80
|
|
|
@@ -81,7 +93,7 @@ def get_sb_environment(config: dict, instance: dict) -> Environment:
|
|
|
81
93
|
env_config = config.setdefault("environment", {})
|
|
82
94
|
env_config["environment_class"] = env_config.get("environment_class", "docker")
|
|
83
95
|
image_name = get_swebench_docker_image_name(instance)
|
|
84
|
-
if env_config["environment_class"]
|
|
96
|
+
if env_config["environment_class"] in ["docker", "swerex_modal"]:
|
|
85
97
|
env_config["image"] = image_name
|
|
86
98
|
elif env_config["environment_class"] == "singularity":
|
|
87
99
|
env_config["image"] = "docker://" + image_name
|
|
@@ -138,7 +150,9 @@ def process_instance(
|
|
|
138
150
|
progress_manager.update_instance_status(instance_id, "Pulling/starting docker")
|
|
139
151
|
|
|
140
152
|
agent = None
|
|
141
|
-
|
|
153
|
+
exit_status = None
|
|
154
|
+
result = None
|
|
155
|
+
extra_info = {}
|
|
142
156
|
|
|
143
157
|
try:
|
|
144
158
|
env = get_sb_environment(config, instance)
|
|
@@ -149,21 +163,28 @@ def process_instance(
|
|
|
149
163
|
instance_id=instance_id,
|
|
150
164
|
**config.get("agent", {}),
|
|
151
165
|
)
|
|
152
|
-
|
|
166
|
+
info = agent.run(task)
|
|
167
|
+
exit_status = info.get("exit_status")
|
|
168
|
+
result = info.get("submission")
|
|
153
169
|
except Exception as e:
|
|
154
170
|
logger.error(f"Error processing instance {instance_id}: {e}", exc_info=True)
|
|
155
|
-
exit_status, result = type(e).__name__,
|
|
156
|
-
extra_info = {"traceback": traceback.format_exc()}
|
|
171
|
+
exit_status, result = type(e).__name__, ""
|
|
172
|
+
extra_info = {"traceback": traceback.format_exc(), "exception_str": str(e)}
|
|
157
173
|
finally:
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
174
|
+
if agent is not None:
|
|
175
|
+
traj_path = instance_dir / f"{instance_id}.traj.json"
|
|
176
|
+
agent.save(
|
|
177
|
+
traj_path,
|
|
178
|
+
{
|
|
179
|
+
"info": {
|
|
180
|
+
"exit_status": exit_status,
|
|
181
|
+
"submission": result,
|
|
182
|
+
**extra_info,
|
|
183
|
+
},
|
|
184
|
+
"instance_id": instance_id,
|
|
185
|
+
},
|
|
186
|
+
)
|
|
187
|
+
logger.info(f"Saved trajectory to '{traj_path}'")
|
|
167
188
|
update_preds_file(output_dir / "preds.json", instance_id, model.config.model_name, result)
|
|
168
189
|
progress_manager.on_instance_end(instance_id, exit_status)
|
|
169
190
|
|
|
@@ -199,10 +220,10 @@ def main(
|
|
|
199
220
|
output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
|
|
200
221
|
workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
|
|
201
222
|
model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
|
|
202
|
-
model_class: str | None = typer.Option(None, "
|
|
223
|
+
model_class: str | None = typer.Option(None, "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
|
|
203
224
|
redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
|
|
204
|
-
config_spec:
|
|
205
|
-
environment_class: str | None = typer.Option(
|
|
225
|
+
config_spec: list[str] = typer.Option([str(DEFAULT_CONFIG_FILE)], "-c", "--config", help=_CONFIG_SPEC_HELP_TEXT, rich_help_panel="Basic"),
|
|
226
|
+
environment_class: str | None = typer.Option(None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
|
|
206
227
|
) -> None:
|
|
207
228
|
# fmt: on
|
|
208
229
|
output_path = Path(output)
|
|
@@ -210,6 +231,8 @@ def main(
|
|
|
210
231
|
logger.info(f"Results will be saved to {output_path}")
|
|
211
232
|
add_file_handler(output_path / "minisweagent.log")
|
|
212
233
|
|
|
234
|
+
from datasets import load_dataset
|
|
235
|
+
|
|
213
236
|
dataset_path = DATASET_MAPPING.get(subset, subset)
|
|
214
237
|
logger.info(f"Loading dataset {dataset_path}, split {split}...")
|
|
215
238
|
instances = list(load_dataset(dataset_path, split=split))
|
|
@@ -221,15 +244,13 @@ def main(
|
|
|
221
244
|
instances = [instance for instance in instances if instance["instance_id"] not in existing_instances]
|
|
222
245
|
logger.info(f"Running on {len(instances)} instances...")
|
|
223
246
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
if model_class is not None:
|
|
232
|
-
config.setdefault("model", {})["model_class"] = model_class
|
|
247
|
+
logger.info(f"Building agent config from specs: {config_spec}")
|
|
248
|
+
configs = [get_config_from_spec(spec) for spec in config_spec]
|
|
249
|
+
configs.append({
|
|
250
|
+
"environment": {"environment_class": environment_class or UNSET},
|
|
251
|
+
"model": {"model_name": model or UNSET, "model_class": model_class or UNSET},
|
|
252
|
+
})
|
|
253
|
+
config = recursive_merge(*configs)
|
|
233
254
|
|
|
234
255
|
progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")
|
|
235
256
|
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Run on a single SWE-Bench instance."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from datasets import load_dataset
|
|
7
|
+
|
|
8
|
+
from minisweagent import global_config_dir
|
|
9
|
+
from minisweagent.agents.interactive import InteractiveAgent
|
|
10
|
+
from minisweagent.config import builtin_config_dir, get_config_from_spec
|
|
11
|
+
from minisweagent.models import get_model
|
|
12
|
+
from minisweagent.run.benchmarks.swebench import (
|
|
13
|
+
DATASET_MAPPING,
|
|
14
|
+
get_sb_environment,
|
|
15
|
+
)
|
|
16
|
+
from minisweagent.utils.log import logger
|
|
17
|
+
from minisweagent.utils.serialize import recursive_merge
|
|
18
|
+
|
|
19
|
+
DEFAULT_OUTPUT_FILE = global_config_dir / "last_swebench_single_run.traj.json"
|
|
20
|
+
DEFAULT_CONFIG_FILE = builtin_config_dir / "benchmarks" / "swebench.yaml"
|
|
21
|
+
|
|
22
|
+
app = typer.Typer(add_completion=False)
|
|
23
|
+
|
|
24
|
+
_CONFIG_SPEC_HELP_TEXT = """Path to config files, filenames, or key-value pairs.
|
|
25
|
+
|
|
26
|
+
[bold red]IMPORTANT:[/bold red] [red]If you set this option, the default config file will not be used.[/red]
|
|
27
|
+
So you need to explicitly set it e.g., with [bold green]-c swebench.yaml <other options>[/bold green]
|
|
28
|
+
|
|
29
|
+
Multiple configs will be recursively merged.
|
|
30
|
+
|
|
31
|
+
Examples:
|
|
32
|
+
|
|
33
|
+
[bold red]-c model.model_kwargs.temperature=0[/bold red] [red]You forgot to add the default config file! See above.[/red]
|
|
34
|
+
|
|
35
|
+
[bold green]-c swebench.yaml -c model.model_kwargs.temperature=0.5[/bold green]
|
|
36
|
+
|
|
37
|
+
[bold green]-c swebench.yaml -c agent.mode=yolo[/bold green]
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# fmt: off
|
|
42
|
+
@app.command()
|
|
43
|
+
def main(
|
|
44
|
+
subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
|
|
45
|
+
split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
|
|
46
|
+
instance_spec: str = typer.Option(0, "-i", "--instance", help="SWE-Bench instance ID or index", rich_help_panel="Data selection"),
|
|
47
|
+
model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
|
|
48
|
+
model_class: str | None = typer.Option(None, "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
|
|
49
|
+
config_spec: list[str] = typer.Option([str(DEFAULT_CONFIG_FILE)], "-c", "--config", help=_CONFIG_SPEC_HELP_TEXT, rich_help_panel="Basic"),
|
|
50
|
+
environment_class: str | None = typer.Option(None, "--environment-class", rich_help_panel="Advanced"),
|
|
51
|
+
exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Basic"),
|
|
52
|
+
output: Path = typer.Option(DEFAULT_OUTPUT_FILE, "-o", "--output", help="Output trajectory file", rich_help_panel="Basic"),
|
|
53
|
+
) -> None:
|
|
54
|
+
# fmt: on
|
|
55
|
+
"""Run on a single SWE-Bench instance."""
|
|
56
|
+
dataset_path = DATASET_MAPPING.get(subset, subset)
|
|
57
|
+
logger.info(f"Loading dataset from {dataset_path}, split {split}...")
|
|
58
|
+
instances = {
|
|
59
|
+
inst["instance_id"]: inst # type: ignore
|
|
60
|
+
for inst in load_dataset(dataset_path, split=split)
|
|
61
|
+
}
|
|
62
|
+
if instance_spec.isnumeric():
|
|
63
|
+
instance_spec = sorted(instances.keys())[int(instance_spec)]
|
|
64
|
+
instance: dict = instances[instance_spec] # type: ignore
|
|
65
|
+
|
|
66
|
+
logger.info(f"Building agent config from specs: {config_spec}")
|
|
67
|
+
configs = [get_config_from_spec(spec) for spec in config_spec]
|
|
68
|
+
configs.append({"agent": {"mode": "yolo"}})
|
|
69
|
+
if environment_class is not None:
|
|
70
|
+
configs.append({"environment": {"environment_class": environment_class}})
|
|
71
|
+
if model_class is not None:
|
|
72
|
+
configs.append({"model": {"model_class": model_class}})
|
|
73
|
+
if model_name is not None:
|
|
74
|
+
configs.append({"model": {"model_name": model_name}})
|
|
75
|
+
if exit_immediately:
|
|
76
|
+
configs.append({"agent": {"confirm_exit": False}})
|
|
77
|
+
config = recursive_merge(*configs)
|
|
78
|
+
|
|
79
|
+
env = get_sb_environment(config, instance)
|
|
80
|
+
agent = InteractiveAgent(
|
|
81
|
+
get_model(config=config.get("model", {})),
|
|
82
|
+
env,
|
|
83
|
+
**config.get("agent", {}),
|
|
84
|
+
)
|
|
85
|
+
agent.run(instance["problem_statement"])
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
if __name__ == "__main__":
|
|
89
|
+
app()
|
|
@@ -143,8 +143,8 @@ class RunBatchProgressManager:
|
|
|
143
143
|
)
|
|
144
144
|
|
|
145
145
|
def on_instance_end(self, instance_id: str, exit_status: str | None) -> None:
|
|
146
|
-
self._instances_by_exit_status[exit_status].append(instance_id)
|
|
147
146
|
with self._lock:
|
|
147
|
+
self._instances_by_exit_status[exit_status].append(instance_id)
|
|
148
148
|
try:
|
|
149
149
|
self._task_progress_bar.remove_task(self._spinner_tasks[instance_id])
|
|
150
150
|
except KeyError:
|
minisweagent/run/hello_world.py
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
"""This is the simplest possible example of how to use mini-SWE-agent with python bindings.
|
|
2
|
+
For a more complete example, see mini.py
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
1
6
|
import os
|
|
2
7
|
from pathlib import Path
|
|
3
8
|
|
|
@@ -23,6 +28,7 @@ def main(
|
|
|
23
28
|
prompt="What model do you want to use?",
|
|
24
29
|
),
|
|
25
30
|
) -> DefaultAgent:
|
|
31
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
26
32
|
agent = DefaultAgent(
|
|
27
33
|
LitellmModel(model_name=model_name),
|
|
28
34
|
LocalEnvironment(),
|
minisweagent/run/mini.py
CHANGED
|
@@ -4,103 +4,94 @@
|
|
|
4
4
|
# Read this first: https://mini-swe-agent.com/latest/usage/mini/ (usage)
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
|
-
import traceback
|
|
8
7
|
from pathlib import Path
|
|
9
8
|
from typing import Any
|
|
10
9
|
|
|
11
10
|
import typer
|
|
12
|
-
import yaml
|
|
13
|
-
from prompt_toolkit.formatted_text import HTML
|
|
14
|
-
from prompt_toolkit.history import FileHistory
|
|
15
|
-
from prompt_toolkit.shortcuts import PromptSession
|
|
16
11
|
from rich.console import Console
|
|
17
12
|
|
|
18
13
|
from minisweagent import global_config_dir
|
|
19
|
-
from minisweagent.agents.interactive import InteractiveAgent
|
|
20
|
-
from minisweagent.
|
|
21
|
-
from minisweagent.config import builtin_config_dir, get_config_path
|
|
14
|
+
from minisweagent.agents.interactive import InteractiveAgent, _multiline_prompt
|
|
15
|
+
from minisweagent.config import builtin_config_dir, get_config_from_spec
|
|
22
16
|
from minisweagent.environments.local import LocalEnvironment
|
|
23
17
|
from minisweagent.models import get_model
|
|
24
|
-
from minisweagent.run.
|
|
25
|
-
from minisweagent.
|
|
26
|
-
from minisweagent.utils.log import logger
|
|
18
|
+
from minisweagent.run.utilities.config import configure_if_first_time
|
|
19
|
+
from minisweagent.utils.serialize import UNSET, recursive_merge
|
|
27
20
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
console = Console(highlight=False)
|
|
31
|
-
app = typer.Typer(rich_markup_mode="rich")
|
|
32
|
-
prompt_session = PromptSession(history=FileHistory(global_config_dir / "mini_task_history.txt"))
|
|
33
|
-
_HELP_TEXT = """Run mini-SWE-agent in your local environment.
|
|
21
|
+
DEFAULT_CONFIG_FILE = Path(os.getenv("MSWEA_MINI_CONFIG_PATH", builtin_config_dir / "mini.yaml"))
|
|
22
|
+
DEFAULT_OUTPUT_FILE = global_config_dir / "last_mini_run.traj.json"
|
|
34
23
|
|
|
35
|
-
[not dim]
|
|
36
|
-
There are two different user interfaces:
|
|
37
24
|
|
|
38
|
-
|
|
39
|
-
[bold green]mini -v[/bold green] Pager-style interface (Textual)
|
|
25
|
+
_HELP_TEXT = """Run mini-SWE-agent in your local environment.
|
|
40
26
|
|
|
27
|
+
[not dim]
|
|
41
28
|
More information about the usage: [bold green]https://mini-swe-agent.com/latest/usage/mini/[/bold green]
|
|
42
29
|
[/not dim]
|
|
43
30
|
"""
|
|
44
31
|
|
|
32
|
+
_CONFIG_SPEC_HELP_TEXT = """Path to config files, filenames, or key-value pairs.
|
|
33
|
+
|
|
34
|
+
[bold red]IMPORTANT:[/bold red] [red]If you set this option, the default config file will not be used.[/red]
|
|
35
|
+
So you need to explicitly set it e.g., with [bold green]-c mini.yaml <other options>[/bold green]
|
|
36
|
+
|
|
37
|
+
Multiple configs will be recursively merged.
|
|
38
|
+
|
|
39
|
+
Examples:
|
|
40
|
+
|
|
41
|
+
[bold red]-c model.model_kwargs.temperature=0[/bold red] [red]You forgot to add the default config file! See above.[/red]
|
|
42
|
+
|
|
43
|
+
[bold green]-c mini.yaml -c model.model_kwargs.temperature=0.5[/bold green]
|
|
44
|
+
|
|
45
|
+
[bold green]-c swebench.yaml agent.mode=yolo[/bold green]
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
console = Console(highlight=False)
|
|
49
|
+
app = typer.Typer(rich_markup_mode="rich")
|
|
50
|
+
|
|
45
51
|
|
|
46
52
|
# fmt: off
|
|
47
53
|
@app.command(help=_HELP_TEXT)
|
|
48
54
|
def main(
|
|
49
|
-
|
|
50
|
-
model_name: str | None = typer.Option( None, "-m", "--model", help="Model to use",),
|
|
55
|
+
model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use",),
|
|
51
56
|
model_class: str | None = typer.Option(None, "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
|
|
52
57
|
task: str | None = typer.Option(None, "-t", "--task", help="Task/problem statement", show_default=False),
|
|
53
58
|
yolo: bool = typer.Option(False, "-y", "--yolo", help="Run without confirmation"),
|
|
54
59
|
cost_limit: float | None = typer.Option(None, "-l", "--cost-limit", help="Cost limit. Set to 0 to disable."),
|
|
55
|
-
config_spec:
|
|
56
|
-
output: Path | None = typer.Option(
|
|
57
|
-
exit_immediately: bool = typer.Option(
|
|
60
|
+
config_spec: list[str] = typer.Option([str(DEFAULT_CONFIG_FILE)], "-c", "--config", help=_CONFIG_SPEC_HELP_TEXT),
|
|
61
|
+
output: Path | None = typer.Option(DEFAULT_OUTPUT_FILE, "-o", "--output", help="Output trajectory file"),
|
|
62
|
+
exit_immediately: bool = typer.Option(False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Advanced"),
|
|
58
63
|
) -> Any:
|
|
59
64
|
# fmt: on
|
|
60
65
|
configure_if_first_time()
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
config
|
|
66
|
+
|
|
67
|
+
# Build the config from the command line arguments
|
|
68
|
+
console.print(f"Building agent config from specs: [bold green]{config_spec}[/bold green]")
|
|
69
|
+
configs = [get_config_from_spec(spec) for spec in config_spec]
|
|
70
|
+
configs.append({
|
|
71
|
+
"agent": {
|
|
72
|
+
"mode": "yolo" if yolo else UNSET,
|
|
73
|
+
"cost_limit": cost_limit or UNSET,
|
|
74
|
+
"confirm_exit": False if exit_immediately else UNSET,
|
|
75
|
+
"output_path": output or UNSET,
|
|
76
|
+
},
|
|
77
|
+
"model": {
|
|
78
|
+
"model_class": model_class or UNSET,
|
|
79
|
+
"model_name": model_name or UNSET,
|
|
80
|
+
},
|
|
81
|
+
})
|
|
82
|
+
config = recursive_merge(*configs)
|
|
64
83
|
|
|
65
84
|
if not task:
|
|
66
85
|
console.print("[bold yellow]What do you want to do?")
|
|
67
|
-
task =
|
|
68
|
-
"",
|
|
69
|
-
multiline=True,
|
|
70
|
-
bottom_toolbar=HTML(
|
|
71
|
-
"Submit task: <b fg='yellow' bg='black'>Esc+Enter</b> | "
|
|
72
|
-
"Navigate history: <b fg='yellow' bg='black'>Arrow Up/Down</b> | "
|
|
73
|
-
"Search history: <b fg='yellow' bg='black'>Ctrl+R</b>"
|
|
74
|
-
),
|
|
75
|
-
)
|
|
86
|
+
task = _multiline_prompt()
|
|
76
87
|
console.print("[bold green]Got that, thanks![/bold green]")
|
|
77
88
|
|
|
78
|
-
|
|
79
|
-
config.setdefault("agent", {})["mode"] = "yolo"
|
|
80
|
-
if cost_limit is not None:
|
|
81
|
-
config.setdefault("agent", {})["cost_limit"] = cost_limit
|
|
82
|
-
if exit_immediately:
|
|
83
|
-
config.setdefault("agent", {})["confirm_exit"] = False
|
|
84
|
-
if model_class is not None:
|
|
85
|
-
config.setdefault("model", {})["model_class"] = model_class
|
|
86
|
-
model = get_model(model_name, config.get("model", {}))
|
|
89
|
+
model = get_model(config=config.get("model", {}))
|
|
87
90
|
env = LocalEnvironment(**config.get("environment", {}))
|
|
88
|
-
|
|
89
|
-
#
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
agent_class = TextualAgent
|
|
93
|
-
|
|
94
|
-
agent = agent_class(model, env, **config.get("agent", {}))
|
|
95
|
-
exit_status, result, extra_info = None, None, None
|
|
96
|
-
try:
|
|
97
|
-
exit_status, result = agent.run(task) # type: ignore[arg-type]
|
|
98
|
-
except Exception as e:
|
|
99
|
-
logger.error(f"Error running agent: {e}", exc_info=True)
|
|
100
|
-
exit_status, result = type(e).__name__, str(e)
|
|
101
|
-
extra_info = {"traceback": traceback.format_exc()}
|
|
102
|
-
finally:
|
|
103
|
-
save_traj(agent, output, exit_status=exit_status, result=result, extra_info=extra_info) # type: ignore[arg-type]
|
|
91
|
+
agent = InteractiveAgent(model, env, **config.get("agent", {}))
|
|
92
|
+
agent.run(task) # type: ignore[arg-type]
|
|
93
|
+
if output:
|
|
94
|
+
console.print(f"Saved trajectory to [bold green]'{output}'[/bold green]")
|
|
104
95
|
return agent
|
|
105
96
|
|
|
106
97
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Utility modules for mini-SWE-agent (config management, inspector, etc.)."""
|
|
@@ -7,36 +7,92 @@ More information about the usage: [bold green] https://mini-swe-agent.com/latest
|
|
|
7
7
|
|
|
8
8
|
import json
|
|
9
9
|
import os
|
|
10
|
+
import subprocess
|
|
11
|
+
import tempfile
|
|
10
12
|
from pathlib import Path
|
|
11
13
|
|
|
12
14
|
import typer
|
|
13
15
|
from rich.text import Text
|
|
14
16
|
from textual.app import App, ComposeResult
|
|
15
17
|
from textual.binding import Binding
|
|
18
|
+
from textual.command import DiscoveryHit, Hit, Hits, Provider
|
|
16
19
|
from textual.containers import Container, Vertical, VerticalScroll
|
|
17
20
|
from textual.widgets import Footer, Header, Static
|
|
18
21
|
|
|
19
|
-
from minisweagent.
|
|
22
|
+
from minisweagent.models.utils.content_string import get_content_string
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _messages_to_steps(messages: list[dict]) -> list[list[dict]]:
|
|
26
|
+
"""Group messages into "pages" as shown by the UI."""
|
|
27
|
+
steps = []
|
|
28
|
+
current_step = []
|
|
29
|
+
for message in messages:
|
|
30
|
+
# Start new step with new tool uses
|
|
31
|
+
if message.get("extra", {}).get("actions") or message.get("role") == "assistant":
|
|
32
|
+
steps.append(current_step)
|
|
33
|
+
current_step = [message]
|
|
34
|
+
else:
|
|
35
|
+
current_step.append(message)
|
|
36
|
+
if current_step:
|
|
37
|
+
steps.append(current_step)
|
|
38
|
+
return steps
|
|
39
|
+
|
|
20
40
|
|
|
21
41
|
app = typer.Typer(rich_markup_mode="rich", add_completion=False)
|
|
22
42
|
|
|
23
43
|
|
|
44
|
+
class BindingCommandProvider(Provider):
|
|
45
|
+
"""Provide bindings as commands in the palette."""
|
|
46
|
+
|
|
47
|
+
COMMAND_DESCRIPTIONS = {
|
|
48
|
+
"next_step": "Next step in the current trajectory",
|
|
49
|
+
"previous_step": "Previous step in the current trajectory",
|
|
50
|
+
"first_step": "First step in the current trajectory",
|
|
51
|
+
"last_step": "Last step in the current trajectory",
|
|
52
|
+
"scroll_down": "Scroll down",
|
|
53
|
+
"scroll_up": "Scroll up",
|
|
54
|
+
"next_trajectory": "Next trajectory",
|
|
55
|
+
"previous_trajectory": "Previous trajectory",
|
|
56
|
+
"open_in_jless": "Open the current step in jless",
|
|
57
|
+
"open_in_jless_all": "Open the entire trajectory in jless",
|
|
58
|
+
"quit": "Quit the inspector",
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
async def discover(self) -> Hits:
|
|
62
|
+
app = self.app
|
|
63
|
+
for binding in app.BINDINGS:
|
|
64
|
+
desc = self.COMMAND_DESCRIPTIONS.get(binding.action, binding.description)
|
|
65
|
+
yield DiscoveryHit(desc, lambda b=binding: app.run_action(b.action))
|
|
66
|
+
|
|
67
|
+
async def search(self, query: str) -> Hits:
|
|
68
|
+
matcher = self.matcher(query)
|
|
69
|
+
app = self.app
|
|
70
|
+
for binding in app.BINDINGS:
|
|
71
|
+
desc = self.COMMAND_DESCRIPTIONS.get(binding.action, binding.description)
|
|
72
|
+
score = matcher.match(desc)
|
|
73
|
+
if score > 0:
|
|
74
|
+
yield Hit(score, matcher.highlight(desc), lambda b=binding: app.run_action(b.action))
|
|
75
|
+
|
|
76
|
+
|
|
24
77
|
class TrajectoryInspector(App):
|
|
78
|
+
COMMANDS = {BindingCommandProvider}
|
|
25
79
|
BINDINGS = [
|
|
26
80
|
Binding("right,l", "next_step", "Step++"),
|
|
27
81
|
Binding("left,h", "previous_step", "Step--"),
|
|
28
82
|
Binding("0", "first_step", "Step=0"),
|
|
29
83
|
Binding("$", "last_step", "Step=-1"),
|
|
30
|
-
Binding("j,down", "scroll_down", "
|
|
31
|
-
Binding("k,up", "scroll_up", "
|
|
32
|
-
Binding("L", "next_trajectory", "
|
|
33
|
-
Binding("H", "previous_trajectory", "
|
|
84
|
+
Binding("j,down", "scroll_down", "↓"),
|
|
85
|
+
Binding("k,up", "scroll_up", "↑"),
|
|
86
|
+
Binding("L", "next_trajectory", "Traj++"),
|
|
87
|
+
Binding("H", "previous_trajectory", "Traj--"),
|
|
88
|
+
Binding("e", "open_in_jless", "Jless"),
|
|
89
|
+
Binding("E", "open_in_jless_all", "Jless (all)"),
|
|
34
90
|
Binding("q", "quit", "Quit"),
|
|
35
91
|
]
|
|
36
92
|
|
|
37
93
|
def __init__(self, trajectory_files: list[Path]):
|
|
38
94
|
css_path = os.environ.get(
|
|
39
|
-
"MSWEA_INSPECTOR_STYLE_PATH", str(Path(__file__).parent.parent / "config" / "
|
|
95
|
+
"MSWEA_INSPECTOR_STYLE_PATH", str(Path(__file__).parent.parent.parent / "config" / "inspector.tcss")
|
|
40
96
|
)
|
|
41
97
|
self.__class__.CSS = Path(css_path).read_text()
|
|
42
98
|
|
|
@@ -142,13 +198,10 @@ class TrajectoryInspector(App):
|
|
|
142
198
|
return
|
|
143
199
|
|
|
144
200
|
for message in self.steps[self.i_step]:
|
|
145
|
-
|
|
146
|
-
content_str = "\n".join([item["text"] for item in message["content"]])
|
|
147
|
-
else:
|
|
148
|
-
content_str = str(message["content"])
|
|
201
|
+
content_str = get_content_string(message)
|
|
149
202
|
message_container = Vertical(classes="message-container")
|
|
150
203
|
container.mount(message_container)
|
|
151
|
-
role = message
|
|
204
|
+
role = message.get("role") or message.get("type") or "unknown"
|
|
152
205
|
message_container.mount(Static(role.upper(), classes="message-header"))
|
|
153
206
|
message_container.mount(Static(Text(content_str, no_wrap=False), classes="message-content"))
|
|
154
207
|
|
|
@@ -186,6 +239,32 @@ class TrajectoryInspector(App):
|
|
|
186
239
|
vs = self.query_one(VerticalScroll)
|
|
187
240
|
vs.scroll_to(y=vs.scroll_target_y - 15)
|
|
188
241
|
|
|
242
|
+
def _open_in_jless(self, path: Path) -> None:
|
|
243
|
+
"""Open file in jless."""
|
|
244
|
+
with self.suspend():
|
|
245
|
+
try:
|
|
246
|
+
subprocess.run(["jless", path])
|
|
247
|
+
except FileNotFoundError:
|
|
248
|
+
self.notify("jless not found. Install with: `brew install jless`", severity="error")
|
|
249
|
+
|
|
250
|
+
def action_open_in_jless(self) -> None:
|
|
251
|
+
"""Open the current step's messages in jless."""
|
|
252
|
+
if not self.steps:
|
|
253
|
+
self.notify("No messages to display", severity="warning")
|
|
254
|
+
return
|
|
255
|
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
|
|
256
|
+
json.dump(self.steps[self.i_step], f, indent=2)
|
|
257
|
+
temp_path = Path(f.name)
|
|
258
|
+
self._open_in_jless(temp_path)
|
|
259
|
+
temp_path.unlink()
|
|
260
|
+
|
|
261
|
+
def action_open_in_jless_all(self) -> None:
|
|
262
|
+
"""Open the entire trajectory in jless."""
|
|
263
|
+
if not self.trajectory_files:
|
|
264
|
+
self.notify("No trajectory to display", severity="warning")
|
|
265
|
+
return
|
|
266
|
+
self._open_in_jless(self.trajectory_files[self.i_trajectory])
|
|
267
|
+
|
|
189
268
|
|
|
190
269
|
@app.command(help=__doc__)
|
|
191
270
|
def main(
|
|
@@ -1,16 +1,20 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
|
|
3
|
+
"""This is the central entry point to the mini-extra script. Use subcommands
|
|
4
|
+
to invoke other command line utilities like running on benchmarks, editing config,
|
|
5
|
+
inspecting trajectories, etc.
|
|
6
|
+
"""
|
|
7
|
+
|
|
3
8
|
import sys
|
|
4
9
|
from importlib import import_module
|
|
5
10
|
|
|
6
11
|
from rich.console import Console
|
|
7
12
|
|
|
8
13
|
subcommands = [
|
|
9
|
-
("minisweagent.run.
|
|
10
|
-
("minisweagent.run.inspector", ["inspect", "i", "inspector"], "Run inspector (browse trajectories)"),
|
|
11
|
-
("minisweagent.run.
|
|
12
|
-
("minisweagent.run.
|
|
13
|
-
("minisweagent.run.extra.swebench_single", ["swebench-single"], "Evaluate on SWE-bench (single instance)"),
|
|
14
|
+
("minisweagent.run.utilities.config", ["config"], "Manage the global config file"),
|
|
15
|
+
("minisweagent.run.utilities.inspector", ["inspect", "i", "inspector"], "Run inspector (browse trajectories)"),
|
|
16
|
+
("minisweagent.run.benchmarks.swebench", ["swebench"], "Evaluate on SWE-bench (batch mode)"),
|
|
17
|
+
("minisweagent.run.benchmarks.swebench_single", ["swebench-single"], "Evaluate on SWE-bench (single instance)"),
|
|
14
18
|
]
|
|
15
19
|
|
|
16
20
|
|