mini-swe-agent 1.7.0__tar.gz → 1.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {mini_swe_agent-1.7.0/src/mini_swe_agent.egg-info → mini_swe_agent-1.8.0}/PKG-INFO +3 -3
  2. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/README.md +1 -1
  3. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/pyproject.toml +1 -1
  4. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0/src/mini_swe_agent.egg-info}/PKG-INFO +3 -3
  5. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/mini_swe_agent.egg-info/requires.txt +1 -1
  6. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/__init__.py +2 -2
  7. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/agents/default.py +4 -4
  8. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/agents/interactive_textual.py +27 -12
  9. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/config/extra/swebench.yaml +1 -0
  10. mini_swe_agent-1.8.0/src/minisweagent/environments/__init__.py +30 -0
  11. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/environments/docker.py +1 -1
  12. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/environments/singularity.py +24 -2
  13. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/extra/config.py +2 -1
  14. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/extra/swebench.py +32 -21
  15. mini_swe_agent-1.8.0/src/minisweagent/run/extra/swebench_single.py +57 -0
  16. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/mini.py +12 -35
  17. mini_swe_agent-1.7.0/src/minisweagent/environments/__init__.py +0 -1
  18. mini_swe_agent-1.7.0/src/minisweagent/run/extra/swebench_single.py +0 -53
  19. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/LICENSE.md +0 -0
  20. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/setup.cfg +0 -0
  21. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/mini_swe_agent.egg-info/SOURCES.txt +0 -0
  22. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/mini_swe_agent.egg-info/dependency_links.txt +0 -0
  23. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/mini_swe_agent.egg-info/entry_points.txt +0 -0
  24. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/mini_swe_agent.egg-info/top_level.txt +0 -0
  25. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/__main__.py +0 -0
  26. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/agents/__init__.py +0 -0
  27. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/agents/interactive.py +0 -0
  28. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/config/README.md +0 -0
  29. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/config/__init__.py +0 -0
  30. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/config/default.yaml +0 -0
  31. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/config/extra/__init__.py +0 -0
  32. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/config/github_issue.yaml +0 -0
  33. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/config/mini.tcss +0 -0
  34. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/config/mini.yaml +0 -0
  35. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/config/mini_no_temp.yaml +0 -0
  36. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/environments/extra/__init__.py +0 -0
  37. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/environments/extra/swerex_docker.py +0 -0
  38. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/environments/local.py +0 -0
  39. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/models/__init__.py +0 -0
  40. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/models/anthropic.py +0 -0
  41. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/models/litellm_model.py +0 -0
  42. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/models/test_models.py +0 -0
  43. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/models/utils/__init__.py +0 -0
  44. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/models/utils/cache_control.py +0 -0
  45. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/models/utils/key_per_thread.py +0 -0
  46. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/py.typed +0 -0
  47. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/__init__.py +0 -0
  48. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/extra/__init__.py +0 -0
  49. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/extra/utils/__init__.py +0 -0
  50. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/extra/utils/batch_progress.py +0 -0
  51. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/github_issue.py +0 -0
  52. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/hello_world.py +0 -0
  53. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/inspector.py +0 -0
  54. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/mini_extra.py +0 -0
  55. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/utils/__init__.py +0 -0
  56. {mini_swe_agent-1.7.0 → mini_swe_agent-1.8.0}/src/minisweagent/run/utils/save.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.7.0
3
+ Version: 1.8.0
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -40,7 +40,7 @@ License-File: LICENSE.md
40
40
  Requires-Dist: pyyaml
41
41
  Requires-Dist: requests
42
42
  Requires-Dist: jinja2
43
- Requires-Dist: litellm
43
+ Requires-Dist: litellm>=1.75.5
44
44
  Requires-Dist: tenacity
45
45
  Requires-Dist: rich
46
46
  Requires-Dist: python-dotenv
@@ -92,7 +92,7 @@ Here's some details:
92
92
 
93
93
  - **Minimal**: Just [100 lines of python](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (+100 total for [env](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
94
94
  [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), [script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
95
- - **Powerful:** Resolves 65% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
95
+ - **Powerful:** Resolves 68% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
96
96
  - **Convenient:** Comes with UIs that turn this into your daily dev swiss army knife!
97
97
  - **Deployable:** In addition to local envs, you can use **docker**, **podman**, **singularity**, **apptainer**, and more
98
98
  - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
@@ -24,7 +24,7 @@ Here's some details:
24
24
 
25
25
  - **Minimal**: Just [100 lines of python](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (+100 total for [env](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
26
26
  [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), [script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
27
- - **Powerful:** Resolves 65% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
27
+ - **Powerful:** Resolves 68% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
28
28
  - **Convenient:** Comes with UIs that turn this into your daily dev swiss army knife!
29
29
  - **Deployable:** In addition to local envs, you can use **docker**, **podman**, **singularity**, **apptainer**, and more
30
30
  - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
@@ -34,7 +34,7 @@ dependencies = [
34
34
  "pyyaml",
35
35
  "requests",
36
36
  "jinja2",
37
- "litellm",
37
+ "litellm >= 1.75.5", # want to have gpt-5 support
38
38
  "tenacity",
39
39
  "rich",
40
40
  "python-dotenv",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.7.0
3
+ Version: 1.8.0
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -40,7 +40,7 @@ License-File: LICENSE.md
40
40
  Requires-Dist: pyyaml
41
41
  Requires-Dist: requests
42
42
  Requires-Dist: jinja2
43
- Requires-Dist: litellm
43
+ Requires-Dist: litellm>=1.75.5
44
44
  Requires-Dist: tenacity
45
45
  Requires-Dist: rich
46
46
  Requires-Dist: python-dotenv
@@ -92,7 +92,7 @@ Here's some details:
92
92
 
93
93
  - **Minimal**: Just [100 lines of python](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (+100 total for [env](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
94
94
  [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), [script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
95
- - **Powerful:** Resolves 65% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
95
+ - **Powerful:** Resolves 68% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
96
96
  - **Convenient:** Comes with UIs that turn this into your daily dev swiss army knife!
97
97
  - **Deployable:** In addition to local envs, you can use **docker**, **podman**, **singularity**, **apptainer**, and more
98
98
  - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
@@ -1,7 +1,7 @@
1
1
  pyyaml
2
2
  requests
3
3
  jinja2
4
- litellm
4
+ litellm>=1.75.5
5
5
  tenacity
6
6
  rich
7
7
  python-dotenv
@@ -8,7 +8,7 @@ This file provides:
8
8
  unless you want the static type checking.
9
9
  """
10
10
 
11
- __version__ = "1.7.0"
11
+ __version__ = "1.8.0"
12
12
 
13
13
  import os
14
14
  from pathlib import Path
@@ -61,7 +61,7 @@ class Agent(Protocol):
61
61
  env: Environment
62
62
  messages: list[dict[str, str]]
63
63
 
64
- def run(self, task: str) -> tuple[str, str]: ...
64
+ def run(self, task: str, **kwargs) -> tuple[str, str]: ...
65
65
 
66
66
 
67
67
  __all__ = ["Agent", "Model", "Environment", "package_dir", "__version__", "global_config_file", "global_config_dir"]
@@ -69,11 +69,11 @@ class DefaultAgent:
69
69
  def add_message(self, role: str, content: str, **kwargs):
70
70
  self.messages.append({"role": role, "content": content, **kwargs})
71
71
 
72
- def run(self, task: str) -> tuple[str, str]:
72
+ def run(self, task: str, **kwargs) -> tuple[str, str]:
73
73
  """Run step() until agent is finished. Return exit status & message"""
74
74
  self.messages = []
75
75
  self.add_message("system", self.render_template(self.config.system_template))
76
- self.add_message("user", self.render_template(self.config.instance_template, task=task))
76
+ self.add_message("user", self.render_template(self.config.instance_template, task=task, **kwargs))
77
77
  while True:
78
78
  try:
79
79
  self.step()
@@ -124,6 +124,6 @@ class DefaultAgent:
124
124
 
125
125
  def has_finished(self, output: dict[str, str]):
126
126
  """Raises Submitted exception with final output if the agent has finished its task."""
127
- lines = output.get("output", "").lstrip().splitlines()
127
+ lines = output.get("output", "").lstrip().splitlines(keepends=True)
128
128
  if lines and lines[0].strip() in ["MINI_SWE_AGENT_FINAL_OUTPUT", "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT"]:
129
- raise Submitted("\n".join(lines[1:]))
129
+ raise Submitted("".join(lines[1:]))
@@ -37,8 +37,8 @@ class TextualAgentConfig(AgentConfig):
37
37
  """If the agent wants to finish, do we ask for confirmation from user?"""
38
38
 
39
39
 
40
- class TextualAgent(DefaultAgent):
41
- def __init__(self, app: "AgentApp", *args, **kwargs):
40
+ class _TextualAgent(DefaultAgent):
41
+ def __init__(self, app: "TextualAgent", *args, **kwargs):
42
42
  """Connects the DefaultAgent to the TextualApp."""
43
43
  self.app = app
44
44
  super().__init__(*args, config_class=TextualAgentConfig, **kwargs)
@@ -59,9 +59,9 @@ class TextualAgent(DefaultAgent):
59
59
  self._current_action_from_human = False
60
60
  return super().query()
61
61
 
62
- def run(self, task: str) -> tuple[str, str]:
62
+ def run(self, task: str, **kwargs) -> tuple[str, str]:
63
63
  try:
64
- exit_status, result = super().run(task)
64
+ exit_status, result = super().run(task, **kwargs)
65
65
  except Exception as e:
66
66
  result = str(e)
67
67
  self.app.call_from_thread(self.app.action_quit)
@@ -123,7 +123,7 @@ def _messages_to_steps(messages: list[dict]) -> list[list[dict]]:
123
123
 
124
124
 
125
125
  class SmartInputContainer(Container):
126
- def __init__(self, app: "AgentApp"):
126
+ def __init__(self, app: "TextualAgent"):
127
127
  """Smart input container supporting single-line and multi-line input modes."""
128
128
  super().__init__(classes="smart-input-container")
129
129
  self._app = app
@@ -239,7 +239,7 @@ class SmartInputContainer(Container):
239
239
  return
240
240
 
241
241
 
242
- class AgentApp(App):
242
+ class TextualAgent(App):
243
243
  BINDINGS = [
244
244
  Binding("right,l", "next_step", "Step++", tooltip="Show next step of the agent"),
245
245
  Binding("left,h", "previous_step", "Step--", tooltip="Show previous step of the agent"),
@@ -259,24 +259,28 @@ class AgentApp(App):
259
259
  Binding("f1,question_mark", "toggle_help_panel", "Help", tooltip="Show help"),
260
260
  ]
261
261
 
262
- def __init__(self, model, env, task: str, **kwargs):
262
+ def __init__(self, model, env, **kwargs):
263
263
  css_path = os.environ.get("MSWEA_MINI_STYLE_PATH", str(Path(__file__).parent.parent / "config" / "mini.tcss"))
264
264
  self.__class__.CSS = Path(css_path).read_text()
265
265
  super().__init__()
266
266
  self.agent_state = "UNINITIALIZED"
267
- self.agent_task = task
268
- self.agent = TextualAgent(self, model=model, env=env, **kwargs)
267
+ self.agent = _TextualAgent(self, model=model, env=env, **kwargs)
269
268
  self._i_step = 0
270
269
  self.n_steps = 1
271
270
  self.input_container = SmartInputContainer(self)
272
271
  self.log_handler = AddLogEmitCallback(lambda record: self.call_from_thread(self.on_log_message_emitted, record))
273
272
  logging.getLogger().addHandler(self.log_handler)
274
273
  self._spinner = Spinner("dots")
275
- self.exit_status: str | None = None
276
- self.result: str | None = None
274
+ self.exit_status: str = "ExitStatusUnset"
275
+ self.result: str = ""
277
276
 
278
277
  self._vscroll = VerticalScroll()
279
278
 
279
+ def run(self, task: str) -> tuple[str, str]:
280
+ threading.Thread(target=lambda: self.agent.run(task), daemon=True).start()
281
+ super().run()
282
+ return self.exit_status, self.result
283
+
280
284
  # --- Basics ---
281
285
 
282
286
  @property
@@ -305,7 +309,18 @@ class AgentApp(App):
305
309
  self.agent_state = "RUNNING"
306
310
  self.update_content()
307
311
  self.set_interval(1 / 8, self._update_headers)
308
- threading.Thread(target=lambda: self.agent.run(self.agent_task), daemon=True).start()
312
+
313
+ @property
314
+ def messages(self) -> list[dict]:
315
+ return self.agent.messages
316
+
317
+ @property
318
+ def model(self):
319
+ return self.agent.model
320
+
321
+ @property
322
+ def env(self):
323
+ return self.agent.env
309
324
 
310
325
  # --- Reacting to events ---
311
326
 
@@ -221,6 +221,7 @@ environment:
221
221
  LESS: -R
222
222
  PIP_PROGRESS_BAR: 'off'
223
223
  TQDM_DISABLE: '1'
224
+ environment_class: docker
224
225
 
225
226
  model:
226
227
  model_name: "claude-sonnet-4-20250514"
@@ -0,0 +1,30 @@
1
+ """Environment implementations for mini-SWE-agent."""
2
+
3
+ import copy
4
+ import importlib
5
+
6
+ from minisweagent import Environment
7
+
8
+ _ENVIRONMENT_MAPPING = {
9
+ "docker": "minisweagent.environments.docker.DockerEnvironment",
10
+ "singularity": "minisweagent.environments.singularity.SingularityEnvironment",
11
+ "local": "minisweagent.environments.local.LocalEnvironment",
12
+ "swerex_docker": "minisweagent.environments.extra.swerex_docker.SwerexDockerEnvironment",
13
+ }
14
+
15
+
16
+ def get_environment_class(spec: str) -> type[Environment]:
17
+ full_path = _ENVIRONMENT_MAPPING.get(spec, spec)
18
+ try:
19
+ module_name, class_name = full_path.rsplit(".", 1)
20
+ module = importlib.import_module(module_name)
21
+ return getattr(module, class_name)
22
+ except (ValueError, ImportError, AttributeError):
23
+ msg = f"Unknown environment type: {spec} (resolved to {full_path}, available: {_ENVIRONMENT_MAPPING})"
24
+ raise ValueError(msg)
25
+
26
+
27
+ def get_environment(config: dict, *, default_type: str = "") -> Environment:
28
+ config = copy.deepcopy(config)
29
+ environment_class = config.pop("environment_class", default_type)
30
+ return get_environment_class(environment_class)(**config)
@@ -20,7 +20,7 @@ class DockerEnvironmentConfig:
20
20
  """
21
21
  timeout: int = 30
22
22
  """Timeout for executing commands in the container."""
23
- executable: str = "docker"
23
+ executable: str = os.getenv("MSWEA_DOCKER_EXECUTABLE", "docker")
24
24
  """Path to the docker/container executable."""
25
25
  run_args: list[str] = field(default_factory=list)
26
26
  """Additional arguments to pass to the docker/container executable."""
@@ -1,8 +1,12 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  import os
4
+ import shutil
4
5
  import subprocess
6
+ import tempfile
7
+ import uuid
5
8
  from dataclasses import dataclass, field
9
+ from pathlib import Path
6
10
  from typing import Any
7
11
 
8
12
 
@@ -16,7 +20,7 @@ class SingularityEnvironmentConfig:
16
20
  """Environment variables to forward to the container."""
17
21
  timeout: int = 30
18
22
  """Timeout for executing commands in the container."""
19
- executable: str = "singularity"
23
+ executable: str = os.getenv("MSWEA_SINGULARITY_EXECUTABLE", "singularity")
20
24
  """Path to the singularity executable."""
21
25
 
22
26
 
@@ -24,11 +28,20 @@ class SingularityEnvironment:
24
28
  def __init__(self, **kwargs):
25
29
  """Singularity environment. See `SingularityEnvironmentConfig` for kwargs."""
26
30
  self.config = SingularityEnvironmentConfig(**kwargs)
31
+ self.sandbox_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
32
+
33
+ subprocess.run(
34
+ [self.config.executable, "build", "--sandbox", self.sandbox_dir, self.config.image],
35
+ check=True,
36
+ )
27
37
 
28
38
  def execute(self, command: str, cwd: str = "") -> dict[str, Any]:
29
39
  """Execute a command in a Singularity container and return the result as a dict."""
30
40
  cmd = [self.config.executable, "exec"]
31
41
 
42
+ # Do not inherit directories and env vars from host
43
+ cmd.extend(["--contain", "--cleanenv"])
44
+
32
45
  work_dir = cwd or self.config.cwd
33
46
  if work_dir and work_dir != "/":
34
47
  cmd.extend(["--pwd", work_dir])
@@ -39,7 +52,7 @@ class SingularityEnvironment:
39
52
  for key, value in self.config.env.items():
40
53
  cmd.extend(["--env", f"{key}={value}"])
41
54
 
42
- cmd.extend([self.config.image, "bash", "-c", command])
55
+ cmd.extend(["--writable", str(self.sandbox_dir), "bash", "-c", command])
43
56
  result = subprocess.run(
44
57
  cmd,
45
58
  text=True,
@@ -50,3 +63,12 @@ class SingularityEnvironment:
50
63
  stderr=subprocess.STDOUT,
51
64
  )
52
65
  return {"output": result.stdout, "returncode": result.returncode}
66
+
67
+ def cleanup(self):
68
+ if self.sandbox_dir.exists():
69
+ print(f"Removing sandbox {self.sandbox_dir}")
70
+ shutil.rmtree(self.sandbox_dir)
71
+
72
+ def __del__(self):
73
+ """Cleanup sandbox when object is destroyed."""
74
+ self.cleanup()
@@ -34,11 +34,12 @@ This setup will ask you for your model and an API key.
34
34
  Here's a few popular models and the required API keys:
35
35
 
36
36
  [bold green]claude-sonnet-4-20250514[/bold green] ([bold green]ANTHROPIC_API_KEY[/bold green])
37
- [bold green]o3[/bold green] ([bold green]OPENAI_API_KEY[/bold green])
37
+ [bold green]openai/gpt-5[/bold green] or [bold green]openai/gpt-5-mini[/bold green] ([bold green]OPENAI_API_KEY[/bold green])
38
38
 
39
39
  [bold yellow]You can leave any setting blank to skip it.[/bold yellow]
40
40
 
41
41
  More information at https://mini-swe-agent.com/latest/quickstart/
42
+ To find the best model, check the leaderboard at https://swebench.com/
42
43
  """
43
44
 
44
45
 
@@ -17,9 +17,10 @@ import yaml
17
17
  from datasets import load_dataset
18
18
  from rich.live import Live
19
19
 
20
+ from minisweagent import Environment
20
21
  from minisweagent.agents.default import DefaultAgent
21
22
  from minisweagent.config import builtin_config_dir, get_config_path
22
- from minisweagent.environments.docker import DockerEnvironment
23
+ from minisweagent.environments import get_environment
23
24
  from minisweagent.models import get_model
24
25
  from minisweagent.run.extra.utils.batch_progress import RunBatchProgressManager
25
26
  from minisweagent.run.utils.save import save_traj
@@ -74,6 +75,15 @@ def get_swebench_docker_image_name(instance: dict) -> str:
74
75
  return image_name
75
76
 
76
77
 
78
+ def get_sb_environment(config: dict, instance: dict) -> Environment:
79
+ image_name = get_swebench_docker_image_name(instance)
80
+ env_config = config.get("environment", {})
81
+ if env_config.get("environment_class") == "singularity":
82
+ image_name = "docker://" + image_name
83
+ env_config["image"] = image_name
84
+ return get_environment(env_config, default_type="docker")
85
+
86
+
77
87
  def update_preds_file(output_path: Path, instance_id: str, model_name: str, result: str):
78
88
  """Update the output JSON file with results from a single instance."""
79
89
  with _OUTPUT_FILE_LOCK:
@@ -102,8 +112,7 @@ def remove_from_preds_file(output_path: Path, instance_id: str):
102
112
  def process_instance(
103
113
  instance: dict,
104
114
  output_dir: Path,
105
- model_name: str | None,
106
- config_path: str | Path,
115
+ config: dict,
107
116
  progress_manager: RunBatchProgressManager,
108
117
  ) -> None:
109
118
  """Process a single SWEBench instance."""
@@ -112,10 +121,7 @@ def process_instance(
112
121
  # avoid inconsistent state if something here fails and there's leftover previous files
113
122
  remove_from_preds_file(output_dir / "preds.json", instance_id)
114
123
  (instance_dir / f"{instance_id}.traj.json").unlink(missing_ok=True)
115
-
116
- image_name = get_swebench_docker_image_name(instance)
117
- config = yaml.safe_load(get_config_path(config_path).read_text())
118
- model = get_model(model_name, config=config.get("model", {}))
124
+ model = get_model(config=config.get("model", {}))
119
125
  task = instance["problem_statement"]
120
126
 
121
127
  progress_manager.on_instance_start(instance_id)
@@ -125,7 +131,7 @@ def process_instance(
125
131
  extra_info = None
126
132
 
127
133
  try:
128
- env = DockerEnvironment(**(config.get("environment", {}) | {"image": image_name}))
134
+ env = get_sb_environment(config, instance)
129
135
  agent = ProgressTrackingAgent(
130
136
  model,
131
137
  env,
@@ -171,21 +177,22 @@ def filter_instances(
171
177
  return instances
172
178
 
173
179
 
180
+ # fmt: off
174
181
  @app.command(help=_HELP_TEXT)
175
182
  def main(
176
- subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset"),
177
- split: str = typer.Option("dev", "--split", help="Dataset split"),
178
- slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)"),
179
- filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex"),
180
- shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances"),
181
- output: str = typer.Option("", "-o", "--output", help="Output directory"),
182
- workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing"),
183
- model: str | None = typer.Option(None, "-m", "--model", help="Model to use"),
184
- redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances"),
185
- config: Path = typer.Option(
186
- builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file"
187
- ),
183
+ subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
184
+ split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
185
+ slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)", rich_help_panel="Data selection"),
186
+ filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex", rich_help_panel="Data selection"),
187
+ shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances", rich_help_panel="Data selection"),
188
+ output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
189
+ workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
190
+ model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
191
+ redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
192
+ config_spec: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
193
+ environment_class: str | None = typer.Option( None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
188
194
  ) -> None:
195
+ # fmt: on
189
196
  dataset_path = DATASET_MAPPING.get(subset, subset)
190
197
  print(f"Loading dataset {dataset_path}, split {split}...")
191
198
  instances = list(load_dataset(dataset_path, split=split))
@@ -201,6 +208,10 @@ def main(
201
208
  print(f"Running on {len(instances)} instances...")
202
209
  print(f"Results will be saved to {output_path}")
203
210
 
211
+ config = yaml.safe_load(get_config_path(config_spec).read_text())
212
+ config.setdefault("environment", {}).setdefault("environment_class", environment_class)
213
+ config.setdefault("model", {}).setdefault("model_name", model)
214
+
204
215
  progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")
205
216
 
206
217
  def process_futures(futures: dict[concurrent.futures.Future, str]):
@@ -218,7 +229,7 @@ def main(
218
229
  with Live(progress_manager.render_group, refresh_per_second=4):
219
230
  with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
220
231
  futures = {
221
- executor.submit(process_instance, instance, output_path, model, config, progress_manager): instance[
232
+ executor.submit(process_instance, instance, output_path, config, progress_manager): instance[
222
233
  "instance_id"
223
234
  ]
224
235
  for instance in instances
@@ -0,0 +1,57 @@
1
+ """Run on a single SWE-Bench instance."""
2
+
3
+ from pathlib import Path
4
+
5
+ import typer
6
+ import yaml
7
+ from datasets import load_dataset
8
+
9
+ from minisweagent.agents.interactive import InteractiveAgent
10
+ from minisweagent.config import builtin_config_dir, get_config_path
11
+ from minisweagent.models import get_model
12
+ from minisweagent.run.extra.swebench import (
13
+ DATASET_MAPPING,
14
+ get_sb_environment,
15
+ )
16
+
17
+ app = typer.Typer(add_completion=False)
18
+
19
+
20
+ # fmt: off
21
+ @app.command()
22
+ def main(
23
+ subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
24
+ split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
25
+ instance_spec: str = typer.Option(0, "-i", "--instance", help="SWE-Bench instance ID or index", rich_help_panel="Data selection"),
26
+ model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
27
+ config_path: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
28
+ environment_class: str | None = typer.Option(None, "--environment-class", rich_help_panel="Advanced"),
29
+ exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Basic"),
30
+ ) -> None:
31
+ # fmt: on
32
+ """Run on a single SWE-Bench instance."""
33
+ dataset_path = DATASET_MAPPING.get(subset, subset)
34
+ print(f"Loading dataset from {dataset_path}, split {split}...")
35
+ instances = {
36
+ inst["instance_id"]: inst # type: ignore
37
+ for inst in load_dataset(dataset_path, split=split)
38
+ }
39
+ if instance_spec.isnumeric():
40
+ instance_spec = sorted(instances.keys())[int(instance_spec)]
41
+ instance: dict = instances[instance_spec] # type: ignore
42
+
43
+ config = yaml.safe_load(get_config_path(config_path).read_text())
44
+ config.setdefault("environment", {}).setdefault("environment_class", environment_class)
45
+ if exit_immediately:
46
+ config.setdefault("agent", {})["confirm_exit"] = False
47
+ env = get_sb_environment(config, instance)
48
+ agent = InteractiveAgent(
49
+ get_model(model_name, config.get("model", {})),
50
+ env,
51
+ **(config.get("agent", {}) | {"mode": "yolo"}),
52
+ )
53
+ agent.run(instance["problem_statement"])
54
+
55
+
56
+ if __name__ == "__main__":
57
+ app()
@@ -14,9 +14,9 @@ from prompt_toolkit.history import FileHistory
14
14
  from prompt_toolkit.shortcuts import PromptSession
15
15
  from rich.console import Console
16
16
 
17
- from minisweagent import Environment, Model, global_config_dir
17
+ from minisweagent import global_config_dir
18
18
  from minisweagent.agents.interactive import InteractiveAgent
19
- from minisweagent.agents.interactive_textual import AgentApp
19
+ from minisweagent.agents.interactive_textual import TextualAgent
20
20
  from minisweagent.config import builtin_config_dir, get_config_path
21
21
  from minisweagent.environments.local import LocalEnvironment
22
22
  from minisweagent.models import get_model
@@ -41,36 +41,6 @@ More information about the usage: [bold green]https://mini-swe-agent.com/latest/
41
41
  """
42
42
 
43
43
 
44
- def run_interactive(model: Model, env: Environment, agent_config: dict, task: str, output: Path | None = None) -> Any:
45
- agent = InteractiveAgent(
46
- model,
47
- env,
48
- **agent_config,
49
- )
50
-
51
- exit_status, result = None, None
52
- try:
53
- exit_status, result = agent.run(task)
54
- finally:
55
- if output:
56
- save_traj(agent, output, exit_status=exit_status, result=result)
57
- return agent
58
-
59
-
60
- def run_textual(model: Model, env: Environment, agent_config: dict, task: str, output: Path | None = None) -> Any:
61
- agent_app = AgentApp(
62
- model,
63
- env,
64
- task,
65
- **agent_config,
66
- )
67
- try:
68
- agent_app.run()
69
- finally:
70
- if output:
71
- save_traj(agent_app.agent, output, exit_status=agent_app.exit_status, result=agent_app.result)
72
-
73
-
74
44
  @app.command(help=_HELP_TEXT)
75
45
  def main(
76
46
  visual: bool = typer.Option(
@@ -119,10 +89,17 @@ def main(
119
89
  env = LocalEnvironment(**config.get("env", {}))
120
90
 
121
91
  # Both visual flag and the MSWEA_VISUAL_MODE_DEFAULT flip the mode, so it's essentially a XOR
92
+ agent_class = InteractiveAgent
122
93
  if visual == (os.getenv("MSWEA_VISUAL_MODE_DEFAULT", "false") == "false"):
123
- return run_textual(model, env, config["agent"], task, output) # type: ignore[arg-type]
124
- else:
125
- return run_interactive(model, env, config["agent"], task, output) # type: ignore[arg-type]
94
+ agent_class = TextualAgent
95
+ exit_status, result = None, None
96
+ agent = agent_class(model, env, **config.get("agent", {}))
97
+ try:
98
+ exit_status, result = agent.run(task) # type: ignore[arg-type]
99
+ finally:
100
+ if output:
101
+ save_traj(agent, output, exit_status=exit_status, result=result) # type: ignore[arg-type]
102
+ return agent
126
103
 
127
104
 
128
105
  if __name__ == "__main__":
@@ -1 +0,0 @@
1
- """Environment implementations for mini-SWE-agent."""
@@ -1,53 +0,0 @@
1
- """Run on a single SWE-Bench instance."""
2
-
3
- from pathlib import Path
4
-
5
- import typer
6
- import yaml
7
- from datasets import load_dataset
8
-
9
- from minisweagent.agents.interactive import InteractiveAgent
10
- from minisweagent.config import builtin_config_dir, get_config_path
11
- from minisweagent.environments.docker import DockerEnvironment
12
- from minisweagent.models import get_model
13
- from minisweagent.run.extra.swebench import DATASET_MAPPING, get_swebench_docker_image_name
14
-
15
- app = typer.Typer(add_completion=False)
16
-
17
-
18
- @app.command()
19
- def main(
20
- subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset"),
21
- split: str = typer.Option("dev", "--split", help="Dataset split"),
22
- instance_spec: str = typer.Option(None, "-i", "--instance", help="SWE-Bench instance ID"),
23
- model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use"),
24
- config_path: Path = typer.Option(
25
- builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file"
26
- ),
27
- ) -> None:
28
- """Run on a single SWE-Bench instance."""
29
- try:
30
- dataset_path = DATASET_MAPPING[subset]
31
- except KeyError:
32
- dataset_path = subset
33
- print(f"Loading dataset {dataset_path}, split {split}...")
34
- instances = {
35
- inst["instance_id"]: inst # type: ignore
36
- for inst in load_dataset(dataset_path, split=split)
37
- }
38
- if instance_spec.isnumeric():
39
- instance_spec = sorted(instances.keys())[int(instance_spec)]
40
- instance: dict = instances[instance_spec] # type: ignore
41
-
42
- _config = yaml.safe_load(get_config_path(config_path).read_text())
43
- env = DockerEnvironment(**(_config.get("environment", {}) | {"image": get_swebench_docker_image_name(instance)}))
44
- agent = InteractiveAgent(
45
- get_model(model_name, _config.get("model", {})),
46
- env,
47
- **(_config.get("agent", {}) | {"mode": "yolo"}),
48
- )
49
- agent.run(instance["problem_statement"])
50
-
51
-
52
- if __name__ == "__main__":
53
- app()
File without changes