mini-swe-agent 1.6.0__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.6.0
3
+ Version: 1.8.0
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -40,7 +40,7 @@ License-File: LICENSE.md
40
40
  Requires-Dist: pyyaml
41
41
  Requires-Dist: requests
42
42
  Requires-Dist: jinja2
43
- Requires-Dist: litellm
43
+ Requires-Dist: litellm>=1.75.5
44
44
  Requires-Dist: tenacity
45
45
  Requires-Dist: rich
46
46
  Requires-Dist: python-dotenv
@@ -92,7 +92,7 @@ Here's some details:
92
92
 
93
93
  - **Minimal**: Just [100 lines of python](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (+100 total for [env](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
94
94
  [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), [script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
95
- - **Powerful:** Resolves 65% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
95
+ - **Powerful:** Resolves 68% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
96
96
  - **Convenient:** Comes with UIs that turn this into your daily dev swiss army knife!
97
97
  - **Deployable:** In addition to local envs, you can use **docker**, **podman**, **singularity**, **apptainer**, and more
98
98
  - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
@@ -1,24 +1,24 @@
1
- mini_swe_agent-1.6.0.dist-info/licenses/LICENSE.md,sha256=D3luWPkdHAe7LBsdD4vzqDAXw6Xewb3G-uczss0uh1s,1094
2
- minisweagent/__init__.py,sha256=TzDDE2Pena2PXb1qZldoecg2ELBnSm6KU_eXavcOWVQ,1787
1
+ mini_swe_agent-1.8.0.dist-info/licenses/LICENSE.md,sha256=D3luWPkdHAe7LBsdD4vzqDAXw6Xewb3G-uczss0uh1s,1094
2
+ minisweagent/__init__.py,sha256=uHro6Ebka9Iwr8HmO6GaEvNEuwJh_JKSd_54XNw445c,1797
3
3
  minisweagent/__main__.py,sha256=FIyAOiw--c3FQ2g240FOM1FdL0lk_PxSpixu0pQ7WFo,194
4
4
  minisweagent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  minisweagent/agents/__init__.py,sha256=cpjJLzg1IGxLM-tZpoMJV9S33ye13XtdBO0x7DU_Lrk,48
6
- minisweagent/agents/default.py,sha256=bqLMtEptn61zc_ptAIQkz_2fMI8hBoVpydVA84mPH8I,5471
6
+ minisweagent/agents/default.py,sha256=2KiXOGNXLlhMqFtcsYGICTnfk8pUHn292g_kOlMyECY,5555
7
7
  minisweagent/agents/interactive.py,sha256=7HW2cffaV5f66DIjxvtIbL8mo_S5aZSwgNLSmHp6VC0,7450
8
- minisweagent/agents/interactive_textual.py,sha256=Ef2GTH2_9ujD95ukVf-hb7X6FCRVgvIdlJZvPvCAd-E,17629
8
+ minisweagent/agents/interactive_textual.py,sha256=yYVtgHXdrKkirMyyHehYs5S3e7ddzqtoMSx8rU8_zBo,17944
9
9
  minisweagent/config/README.md,sha256=ABd9anA4aRWtx7Oh37z36Wv6ARvcxD2w9lPUE24R2mY,435
10
10
  minisweagent/config/__init__.py,sha256=0KzHaaIqWgRy2zbwIzhrg6BJPDzOvYi3jb4eBNY4sAU,823
11
- minisweagent/config/default.yaml,sha256=_OJNNTGOr-cyqzkrDIzB2F5H1N49Dlp--N0SDJvkVxE,5233
12
- minisweagent/config/github_issue.yaml,sha256=evvu3AJ52tXYSdami9_B8zfazOAE2r2XXkzVmScBoKc,4539
11
+ minisweagent/config/default.yaml,sha256=OHK9-7PkCa9ZzgYykF1zGYC_AWkiAdOrEpDuurF-1Rk,5143
12
+ minisweagent/config/github_issue.yaml,sha256=qbjj3vmdukxz36_EY7e64vhNn1g2-_NrdNx5xgMOUAI,4569
13
13
  minisweagent/config/mini.tcss,sha256=fmAP9cYAp2n7Ps2Dw3e-ZOGEF2E8JcwTgK1LDcis-x4,1141
14
- minisweagent/config/mini.yaml,sha256=kQWVlAPbbVUb-wKa3lw0ouNpEADTr1a7bXKjU3WegLE,5249
15
- minisweagent/config/mini_no_temp.yaml,sha256=KQpqexf0oOdrMLGNPIy5Ll30bAEw7mrZpgnnMxEygLc,5294
14
+ minisweagent/config/mini.yaml,sha256=mDfN7KputHf7kOGidJFX5-5CDKg97vxxu2cdYYlPoM8,5159
15
+ minisweagent/config/mini_no_temp.yaml,sha256=n0W-017tBmMx57U9SLt7Fy9WJxI9x2vdTWBWeSngGMc,5204
16
16
  minisweagent/config/extra/__init__.py,sha256=e1MoAlDn_wc9HnXNoncf1P-B4DQ-iRf6n7Q_txjZGRI,52
17
- minisweagent/config/extra/swebench.yaml,sha256=LNpTahpul6HL0HozgAAz-C6kpX3wZA7Tg8uE-ZmgrF4,7577
18
- minisweagent/environments/__init__.py,sha256=g5mKac1YgVOZVKvmiAiuyPSevRYpI69V4vYrbCH3gsI,54
19
- minisweagent/environments/docker.py,sha256=VYk7i0T0IgUF_s-N-DqYkHsBWbfgaIMpJZIIdEtetTw,3871
17
+ minisweagent/config/extra/swebench.yaml,sha256=5FKblpcNTHmVUNE1JLHo5_AsupvlwsrLj8I3R2mRItk,7680
18
+ minisweagent/environments/__init__.py,sha256=tTnNjNAhMvIuB1mlesreBV5TLdQBp79qj_Mxr7HGzNk,1180
19
+ minisweagent/environments/docker.py,sha256=dSkD8FtHb9yN_ejau3ekN-FaHQMH2AWdhfpvZOoH7NQ,3909
20
20
  minisweagent/environments/local.py,sha256=-2EV3RqZSB8WEjJE7BHLhRjocPMLpoJ3HbM8QB1WXUU,1060
21
- minisweagent/environments/singularity.py,sha256=j7ptRVF8GwDLd-5IjhT5j7fNxEJz9amuLTmVxotaMlI,1796
21
+ minisweagent/environments/singularity.py,sha256=fqDH4nTg3njHMe7BzQ4HUp_jAgXeKyLWm-TmH8WuNlA,2552
22
22
  minisweagent/environments/extra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  minisweagent/environments/extra/swerex_docker.py,sha256=MOhhFdX1sAk_U0g-GOxohfjrldzO4YfrUnHq8qJff7c,1502
24
24
  minisweagent/models/__init__.py,sha256=J4bnvfMByTVG0cL_6p51sm8gdargXhARfbG5c0UZ8Z4,2890
@@ -32,18 +32,18 @@ minisweagent/run/__init__.py,sha256=WIoYgHVl7iZF2YncrfV3IttupG6P5KogroKHKECka3A,
32
32
  minisweagent/run/github_issue.py,sha256=GWOkGM09jOYV93p6xIM_kKWmC1yP_d5lprafWlqoBN0,2748
33
33
  minisweagent/run/hello_world.py,sha256=erLnEwNmPFLxq3-8zyv66Vy1kIqMqQf97vISX7LrQXg,959
34
34
  minisweagent/run/inspector.py,sha256=QnY3oYzm-yq3w9Jzs112Lco2Rg84vSocAWrQRVz_1lc,7127
35
- minisweagent/run/mini.py,sha256=yeVYaaQrYfAW5gzPsunxqB73CTnwugkQ1qPn2-Os-GM,4849
35
+ minisweagent/run/mini.py,sha256=d-dtnddRDvs0Ub3mFuXJYsNh3sSfRnSPjp6877Y9O2I,4215
36
36
  minisweagent/run/mini_extra.py,sha256=ecA1PnTWElpO60G9RktvVLtUOf3bZ_ESmnSttS6izhQ,1465
37
37
  minisweagent/run/extra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- minisweagent/run/extra/config.py,sha256=ezUu8n2-h79cfphWXv-j9LQXfxzkxrF2aPlh7mObF7k,3545
39
- minisweagent/run/extra/swebench.py,sha256=m5_PZI4ojkUyCxzkkMtel_vlnYmjziWrXu73yHoZGFs,9688
40
- minisweagent/run/extra/swebench_single.py,sha256=L3Kk4G65o3MCPLMEwGNIs77-AFf6Lfc8o1oxrbN-ZWM,1991
38
+ minisweagent/run/extra/config.py,sha256=CEsEr8AdEm64Jods2ZRURChHKMILSatkBLkLmRywkrw,3672
39
+ minisweagent/run/extra/swebench.py,sha256=1v5qGLaAOKdESEaa6qMgJUlUFh025gASpbEyxfzb4uM,10601
40
+ minisweagent/run/extra/swebench_single.py,sha256=YWYAMr6rfsUCGtB_4_e_w_CQ5RWfLbXIXOOGV8HPDYc,2441
41
41
  minisweagent/run/extra/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  minisweagent/run/extra/utils/batch_progress.py,sha256=xhJ7FmsaTBGz-yh8pzYl4yMoUGjn7GA24eYrP-nHj60,6804
43
43
  minisweagent/run/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  minisweagent/run/utils/save.py,sha256=yI_hSU-GOaB7j8YeHBCc7Fhl4js9AyO9N5SC6p-nnu8,1606
45
- mini_swe_agent-1.6.0.dist-info/METADATA,sha256=QDXPWyxQpaflZdPzuFZmzdgIDnEAMp_XGnAfRnV41vU,13783
46
- mini_swe_agent-1.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
- mini_swe_agent-1.6.0.dist-info/entry_points.txt,sha256=d1_yRbTaGjs1UXHa6JQK0sKDGBIVGm8oeW0k2kfbJgQ,182
48
- mini_swe_agent-1.6.0.dist-info/top_level.txt,sha256=zKF4t8bFpV87fdVABZt2Da-vnb4Vkh_CxkwQx5YT4Ew,13
49
- mini_swe_agent-1.6.0.dist-info/RECORD,,
45
+ mini_swe_agent-1.8.0.dist-info/METADATA,sha256=jwXlBjL8-xSWt3QxyvLsw7E8erT0xtWYELQkdk3Ean8,13791
46
+ mini_swe_agent-1.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ mini_swe_agent-1.8.0.dist-info/entry_points.txt,sha256=d1_yRbTaGjs1UXHa6JQK0sKDGBIVGm8oeW0k2kfbJgQ,182
48
+ mini_swe_agent-1.8.0.dist-info/top_level.txt,sha256=zKF4t8bFpV87fdVABZt2Da-vnb4Vkh_CxkwQx5YT4Ew,13
49
+ mini_swe_agent-1.8.0.dist-info/RECORD,,
minisweagent/__init__.py CHANGED
@@ -8,7 +8,7 @@ This file provides:
8
8
  unless you want the static type checking.
9
9
  """
10
10
 
11
- __version__ = "1.6.0"
11
+ __version__ = "1.8.0"
12
12
 
13
13
  import os
14
14
  from pathlib import Path
@@ -61,7 +61,7 @@ class Agent(Protocol):
61
61
  env: Environment
62
62
  messages: list[dict[str, str]]
63
63
 
64
- def run(self, task: str) -> tuple[str, str]: ...
64
+ def run(self, task: str, **kwargs) -> tuple[str, str]: ...
65
65
 
66
66
 
67
67
  __all__ = ["Agent", "Model", "Environment", "package_dir", "__version__", "global_config_file", "global_config_dir"]
@@ -18,7 +18,7 @@ class AgentConfig:
18
18
  system_template: str = "You are a helpful assistant that can do anything."
19
19
  instance_template: str = (
20
20
  "Your task: {{task}}. Please reply with a single shell command in triple backticks. "
21
- "To finish, the first line of the output of the shell command must be 'MINI_SWE_AGENT_FINAL_OUTPUT'."
21
+ "To finish, the first line of the output of the shell command must be 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'."
22
22
  )
23
23
  timeout_template: str = (
24
24
  "The last command <command>{{action['action']}}</command> timed out and has been killed.\n"
@@ -69,11 +69,11 @@ class DefaultAgent:
69
69
  def add_message(self, role: str, content: str, **kwargs):
70
70
  self.messages.append({"role": role, "content": content, **kwargs})
71
71
 
72
- def run(self, task: str) -> tuple[str, str]:
72
+ def run(self, task: str, **kwargs) -> tuple[str, str]:
73
73
  """Run step() until agent is finished. Return exit status & message"""
74
74
  self.messages = []
75
75
  self.add_message("system", self.render_template(self.config.system_template))
76
- self.add_message("user", self.render_template(self.config.instance_template, task=task))
76
+ self.add_message("user", self.render_template(self.config.instance_template, task=task, **kwargs))
77
77
  while True:
78
78
  try:
79
79
  self.step()
@@ -124,6 +124,6 @@ class DefaultAgent:
124
124
 
125
125
  def has_finished(self, output: dict[str, str]):
126
126
  """Raises Submitted exception with final output if the agent has finished its task."""
127
- lines = output.get("output", "").lstrip().splitlines()
128
- if lines and lines[0].strip() == "MINI_SWE_AGENT_FINAL_OUTPUT":
129
- raise Submitted("\n".join(lines[1:]))
127
+ lines = output.get("output", "").lstrip().splitlines(keepends=True)
128
+ if lines and lines[0].strip() in ["MINI_SWE_AGENT_FINAL_OUTPUT", "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT"]:
129
+ raise Submitted("".join(lines[1:]))
@@ -37,8 +37,8 @@ class TextualAgentConfig(AgentConfig):
37
37
  """If the agent wants to finish, do we ask for confirmation from user?"""
38
38
 
39
39
 
40
- class TextualAgent(DefaultAgent):
41
- def __init__(self, app: "AgentApp", *args, **kwargs):
40
+ class _TextualAgent(DefaultAgent):
41
+ def __init__(self, app: "TextualAgent", *args, **kwargs):
42
42
  """Connects the DefaultAgent to the TextualApp."""
43
43
  self.app = app
44
44
  super().__init__(*args, config_class=TextualAgentConfig, **kwargs)
@@ -59,9 +59,9 @@ class TextualAgent(DefaultAgent):
59
59
  self._current_action_from_human = False
60
60
  return super().query()
61
61
 
62
- def run(self, task: str) -> tuple[str, str]:
62
+ def run(self, task: str, **kwargs) -> tuple[str, str]:
63
63
  try:
64
- exit_status, result = super().run(task)
64
+ exit_status, result = super().run(task, **kwargs)
65
65
  except Exception as e:
66
66
  result = str(e)
67
67
  self.app.call_from_thread(self.app.action_quit)
@@ -123,7 +123,7 @@ def _messages_to_steps(messages: list[dict]) -> list[list[dict]]:
123
123
 
124
124
 
125
125
  class SmartInputContainer(Container):
126
- def __init__(self, app: "AgentApp"):
126
+ def __init__(self, app: "TextualAgent"):
127
127
  """Smart input container supporting single-line and multi-line input modes."""
128
128
  super().__init__(classes="smart-input-container")
129
129
  self._app = app
@@ -239,7 +239,7 @@ class SmartInputContainer(Container):
239
239
  return
240
240
 
241
241
 
242
- class AgentApp(App):
242
+ class TextualAgent(App):
243
243
  BINDINGS = [
244
244
  Binding("right,l", "next_step", "Step++", tooltip="Show next step of the agent"),
245
245
  Binding("left,h", "previous_step", "Step--", tooltip="Show previous step of the agent"),
@@ -259,24 +259,28 @@ class AgentApp(App):
259
259
  Binding("f1,question_mark", "toggle_help_panel", "Help", tooltip="Show help"),
260
260
  ]
261
261
 
262
- def __init__(self, model, env, task: str, **kwargs):
262
+ def __init__(self, model, env, **kwargs):
263
263
  css_path = os.environ.get("MSWEA_MINI_STYLE_PATH", str(Path(__file__).parent.parent / "config" / "mini.tcss"))
264
264
  self.__class__.CSS = Path(css_path).read_text()
265
265
  super().__init__()
266
266
  self.agent_state = "UNINITIALIZED"
267
- self.agent_task = task
268
- self.agent = TextualAgent(self, model=model, env=env, **kwargs)
267
+ self.agent = _TextualAgent(self, model=model, env=env, **kwargs)
269
268
  self._i_step = 0
270
269
  self.n_steps = 1
271
270
  self.input_container = SmartInputContainer(self)
272
271
  self.log_handler = AddLogEmitCallback(lambda record: self.call_from_thread(self.on_log_message_emitted, record))
273
272
  logging.getLogger().addHandler(self.log_handler)
274
273
  self._spinner = Spinner("dots")
275
- self.exit_status: str | None = None
276
- self.result: str | None = None
274
+ self.exit_status: str = "ExitStatusUnset"
275
+ self.result: str = ""
277
276
 
278
277
  self._vscroll = VerticalScroll()
279
278
 
279
+ def run(self, task: str) -> tuple[str, str]:
280
+ threading.Thread(target=lambda: self.agent.run(task), daemon=True).start()
281
+ super().run()
282
+ return self.exit_status, self.result
283
+
280
284
  # --- Basics ---
281
285
 
282
286
  @property
@@ -305,7 +309,18 @@ class AgentApp(App):
305
309
  self.agent_state = "RUNNING"
306
310
  self.update_content()
307
311
  self.set_interval(1 / 8, self._update_headers)
308
- threading.Thread(target=lambda: self.agent.run(self.agent_task), daemon=True).start()
312
+
313
+ @property
314
+ def messages(self) -> list[dict]:
315
+ return self.agent.messages
316
+
317
+ @property
318
+ def model(self):
319
+ return self.agent.model
320
+
321
+ @property
322
+ def env(self):
323
+ return self.agent.env
309
324
 
310
325
  # --- Reacting to events ---
311
326
 
@@ -15,8 +15,6 @@ agent:
15
15
  </format_example>
16
16
 
17
17
  Failure to follow these rules will cause your response to be rejected.
18
- To finish, issue the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`
19
- without any other command.
20
18
  instance_template: |
21
19
  Please solve this issue: {{task}}
22
20
 
@@ -31,7 +29,7 @@ agent:
31
29
  3. Edit the source code to resolve the issue
32
30
  4. Verify your fix works by running your script again
33
31
  5. Test edge cases to ensure your fix is robust
34
- 6. Submit your changes and finish your work by issuing the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`.
32
+ 6. Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
35
33
  Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
36
34
 
37
35
  ## Important Rules
@@ -130,7 +128,7 @@ agent:
130
128
  {%- endif -%}
131
129
  format_error_template: |
132
130
  Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
133
- If you want to end the task, please issue the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`
131
+ If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
134
132
  without any other command.
135
133
  Else, please format your response exactly as follows:
136
134
 
@@ -159,15 +159,15 @@ agent:
159
159
  ```
160
160
 
161
161
  ## Submission
162
- When you've completed your changes or can't make further progress
162
+ When you've completed your work (reading, editing, testing), and cannot make further progress
163
163
  issue exactly the following command:
164
164
 
165
165
  ```bash
166
- echo MINI_SWE_AGENT_FINAL_OUTPUT && git add -A && git diff --cached
166
+ echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached
167
167
  ```
168
168
 
169
- This command will submit your changes.
170
- You cannot continue working on this task after submitting.
169
+ This command will submit your work.
170
+ You cannot continue working (reading, editing, testing) in any way on this task after submitting.
171
171
  </instructions>
172
172
  action_observation_template: |
173
173
  <returncode>{{output.returncode}}</returncode>
@@ -221,9 +221,10 @@ environment:
221
221
  LESS: -R
222
222
  PIP_PROGRESS_BAR: 'off'
223
223
  TQDM_DISABLE: '1'
224
+ environment_class: docker
224
225
 
225
226
  model:
226
227
  model_name: "claude-sonnet-4-20250514"
227
228
  model_kwargs:
229
+ drop_params: true
228
230
  temperature: 0.0
229
- drop_params: true
@@ -15,7 +15,7 @@ agent:
15
15
  </format_example>
16
16
 
17
17
  Failure to follow these rules will cause your response to be rejected.
18
- To finish, issue the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`
18
+ To finish, issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
19
19
  without any other command.
20
20
  instance_template: |
21
21
  Please solve this issue: {{task}}
@@ -35,7 +35,7 @@ agent:
35
35
  2. The action must be enclosed in triple backticks
36
36
  3. Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
37
37
  However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
38
- 4. To finish, issue the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`.
38
+ 4. To finish, issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
39
39
  Do not combine it with any other command.
40
40
 
41
41
  ## Formatting your response
@@ -117,7 +117,7 @@ agent:
117
117
  {%- endif -%}
118
118
  format_error_template: |
119
119
  Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
120
- If you want to end the task, please issue the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`
120
+ If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
121
121
  without any other command.
122
122
  Else, please format your response exactly as follows:
123
123
 
@@ -15,8 +15,6 @@ agent:
15
15
  </format_example>
16
16
 
17
17
  Failure to follow these rules will cause your response to be rejected.
18
- To finish, issue the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`
19
- without any other command.
20
18
  instance_template: |
21
19
  Please solve this issue: {{task}}
22
20
 
@@ -31,7 +29,7 @@ agent:
31
29
  3. Edit the source code to resolve the issue
32
30
  4. Verify your fix works by running your script again
33
31
  5. Test edge cases to ensure your fix is robust
34
- 6. Submit your changes and finish your work by issuing the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`.
32
+ 6. Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
35
33
  Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
36
34
 
37
35
  ## Important Rules
@@ -130,7 +128,7 @@ agent:
130
128
  {%- endif -%}
131
129
  format_error_template: |
132
130
  Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
133
- If you want to end the task, please issue the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`
131
+ If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
134
132
  without any other command.
135
133
  Else, please format your response exactly as follows:
136
134
 
@@ -16,8 +16,6 @@ agent:
16
16
  </format_example>
17
17
 
18
18
  Failure to follow these rules will cause your response to be rejected.
19
- To finish, issue the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`
20
- without any other command.
21
19
  instance_template: |
22
20
  Please solve this issue: {{task}}
23
21
 
@@ -32,7 +30,7 @@ agent:
32
30
  3. Edit the source code to resolve the issue
33
31
  4. Verify your fix works by running your script again
34
32
  5. Test edge cases to ensure your fix is robust
35
- 6. Submit your changes and finish your work by issuing the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`.
33
+ 6. Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
36
34
  Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
37
35
 
38
36
  ## Important Rules
@@ -131,7 +129,7 @@ agent:
131
129
  {%- endif -%}
132
130
  format_error_template: |
133
131
  Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
134
- If you want to end the task, please issue the following command: `echo MINI_SWE_AGENT_FINAL_OUTPUT`
132
+ If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
135
133
  without any other command.
136
134
  Else, please format your response exactly as follows:
137
135
 
@@ -1 +1,30 @@
1
1
  """Environment implementations for mini-SWE-agent."""
2
+
3
+ import copy
4
+ import importlib
5
+
6
+ from minisweagent import Environment
7
+
8
+ _ENVIRONMENT_MAPPING = {
9
+ "docker": "minisweagent.environments.docker.DockerEnvironment",
10
+ "singularity": "minisweagent.environments.singularity.SingularityEnvironment",
11
+ "local": "minisweagent.environments.local.LocalEnvironment",
12
+ "swerex_docker": "minisweagent.environments.extra.swerex_docker.SwerexDockerEnvironment",
13
+ }
14
+
15
+
16
+ def get_environment_class(spec: str) -> type[Environment]:
17
+ full_path = _ENVIRONMENT_MAPPING.get(spec, spec)
18
+ try:
19
+ module_name, class_name = full_path.rsplit(".", 1)
20
+ module = importlib.import_module(module_name)
21
+ return getattr(module, class_name)
22
+ except (ValueError, ImportError, AttributeError):
23
+ msg = f"Unknown environment type: {spec} (resolved to {full_path}, available: {_ENVIRONMENT_MAPPING})"
24
+ raise ValueError(msg)
25
+
26
+
27
+ def get_environment(config: dict, *, default_type: str = "") -> Environment:
28
+ config = copy.deepcopy(config)
29
+ environment_class = config.pop("environment_class", default_type)
30
+ return get_environment_class(environment_class)(**config)
@@ -20,7 +20,7 @@ class DockerEnvironmentConfig:
20
20
  """
21
21
  timeout: int = 30
22
22
  """Timeout for executing commands in the container."""
23
- executable: str = "docker"
23
+ executable: str = os.getenv("MSWEA_DOCKER_EXECUTABLE", "docker")
24
24
  """Path to the docker/container executable."""
25
25
  run_args: list[str] = field(default_factory=list)
26
26
  """Additional arguments to pass to the docker/container executable."""
@@ -1,8 +1,12 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  import os
4
+ import shutil
4
5
  import subprocess
6
+ import tempfile
7
+ import uuid
5
8
  from dataclasses import dataclass, field
9
+ from pathlib import Path
6
10
  from typing import Any
7
11
 
8
12
 
@@ -16,7 +20,7 @@ class SingularityEnvironmentConfig:
16
20
  """Environment variables to forward to the container."""
17
21
  timeout: int = 30
18
22
  """Timeout for executing commands in the container."""
19
- executable: str = "singularity"
23
+ executable: str = os.getenv("MSWEA_SINGULARITY_EXECUTABLE", "singularity")
20
24
  """Path to the singularity executable."""
21
25
 
22
26
 
@@ -24,11 +28,20 @@ class SingularityEnvironment:
24
28
  def __init__(self, **kwargs):
25
29
  """Singularity environment. See `SingularityEnvironmentConfig` for kwargs."""
26
30
  self.config = SingularityEnvironmentConfig(**kwargs)
31
+ self.sandbox_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
32
+
33
+ subprocess.run(
34
+ [self.config.executable, "build", "--sandbox", self.sandbox_dir, self.config.image],
35
+ check=True,
36
+ )
27
37
 
28
38
  def execute(self, command: str, cwd: str = "") -> dict[str, Any]:
29
39
  """Execute a command in a Singularity container and return the result as a dict."""
30
40
  cmd = [self.config.executable, "exec"]
31
41
 
42
+ # Do not inherit directories and env vars from host
43
+ cmd.extend(["--contain", "--cleanenv"])
44
+
32
45
  work_dir = cwd or self.config.cwd
33
46
  if work_dir and work_dir != "/":
34
47
  cmd.extend(["--pwd", work_dir])
@@ -39,7 +52,7 @@ class SingularityEnvironment:
39
52
  for key, value in self.config.env.items():
40
53
  cmd.extend(["--env", f"{key}={value}"])
41
54
 
42
- cmd.extend([self.config.image, "bash", "-c", command])
55
+ cmd.extend(["--writable", str(self.sandbox_dir), "bash", "-c", command])
43
56
  result = subprocess.run(
44
57
  cmd,
45
58
  text=True,
@@ -50,3 +63,12 @@ class SingularityEnvironment:
50
63
  stderr=subprocess.STDOUT,
51
64
  )
52
65
  return {"output": result.stdout, "returncode": result.returncode}
66
+
67
+ def cleanup(self):
68
+ if self.sandbox_dir.exists():
69
+ print(f"Removing sandbox {self.sandbox_dir}")
70
+ shutil.rmtree(self.sandbox_dir)
71
+
72
+ def __del__(self):
73
+ """Cleanup sandbox when object is destroyed."""
74
+ self.cleanup()
@@ -34,11 +34,12 @@ This setup will ask you for your model and an API key.
34
34
  Here's a few popular models and the required API keys:
35
35
 
36
36
  [bold green]claude-sonnet-4-20250514[/bold green] ([bold green]ANTHROPIC_API_KEY[/bold green])
37
- [bold green]o3[/bold green] ([bold green]OPENAI_API_KEY[/bold green])
37
+ [bold green]openai/gpt-5[/bold green] or [bold green]openai/gpt-5-mini[/bold green] ([bold green]OPENAI_API_KEY[/bold green])
38
38
 
39
39
  [bold yellow]You can leave any setting blank to skip it.[/bold yellow]
40
40
 
41
41
  More information at https://mini-swe-agent.com/latest/quickstart/
42
+ To find the best model, check the leaderboard at https://swebench.com/
42
43
  """
43
44
 
44
45
 
@@ -17,9 +17,10 @@ import yaml
17
17
  from datasets import load_dataset
18
18
  from rich.live import Live
19
19
 
20
+ from minisweagent import Environment
20
21
  from minisweagent.agents.default import DefaultAgent
21
22
  from minisweagent.config import builtin_config_dir, get_config_path
22
- from minisweagent.environments.docker import DockerEnvironment
23
+ from minisweagent.environments import get_environment
23
24
  from minisweagent.models import get_model
24
25
  from minisweagent.run.extra.utils.batch_progress import RunBatchProgressManager
25
26
  from minisweagent.run.utils.save import save_traj
@@ -74,6 +75,15 @@ def get_swebench_docker_image_name(instance: dict) -> str:
74
75
  return image_name
75
76
 
76
77
 
78
+ def get_sb_environment(config: dict, instance: dict) -> Environment:
79
+ image_name = get_swebench_docker_image_name(instance)
80
+ env_config = config.get("environment", {})
81
+ if env_config.get("environment_class") == "singularity":
82
+ image_name = "docker://" + image_name
83
+ env_config["image"] = image_name
84
+ return get_environment(env_config, default_type="docker")
85
+
86
+
77
87
  def update_preds_file(output_path: Path, instance_id: str, model_name: str, result: str):
78
88
  """Update the output JSON file with results from a single instance."""
79
89
  with _OUTPUT_FILE_LOCK:
@@ -102,8 +112,7 @@ def remove_from_preds_file(output_path: Path, instance_id: str):
102
112
  def process_instance(
103
113
  instance: dict,
104
114
  output_dir: Path,
105
- model_name: str | None,
106
- config_path: str | Path,
115
+ config: dict,
107
116
  progress_manager: RunBatchProgressManager,
108
117
  ) -> None:
109
118
  """Process a single SWEBench instance."""
@@ -112,10 +121,7 @@ def process_instance(
112
121
  # avoid inconsistent state if something here fails and there's leftover previous files
113
122
  remove_from_preds_file(output_dir / "preds.json", instance_id)
114
123
  (instance_dir / f"{instance_id}.traj.json").unlink(missing_ok=True)
115
-
116
- image_name = get_swebench_docker_image_name(instance)
117
- config = yaml.safe_load(get_config_path(config_path).read_text())
118
- model = get_model(model_name, config=config.get("model", {}))
124
+ model = get_model(config=config.get("model", {}))
119
125
  task = instance["problem_statement"]
120
126
 
121
127
  progress_manager.on_instance_start(instance_id)
@@ -125,7 +131,7 @@ def process_instance(
125
131
  extra_info = None
126
132
 
127
133
  try:
128
- env = DockerEnvironment(**(config.get("environment", {}) | {"image": image_name}))
134
+ env = get_sb_environment(config, instance)
129
135
  agent = ProgressTrackingAgent(
130
136
  model,
131
137
  env,
@@ -171,21 +177,22 @@ def filter_instances(
171
177
  return instances
172
178
 
173
179
 
180
+ # fmt: off
174
181
  @app.command(help=_HELP_TEXT)
175
182
  def main(
176
- subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset"),
177
- split: str = typer.Option("dev", "--split", help="Dataset split"),
178
- slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)"),
179
- filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex"),
180
- shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances"),
181
- output: str = typer.Option("", "-o", "--output", help="Output directory"),
182
- workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing"),
183
- model: str | None = typer.Option(None, "-m", "--model", help="Model to use"),
184
- redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances"),
185
- config: Path = typer.Option(
186
- builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file"
187
- ),
183
+ subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
184
+ split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
185
+ slice_spec: str = typer.Option("", "--slice", help="Slice specification (e.g., '0:5' for first 5 instances)", rich_help_panel="Data selection"),
186
+ filter_spec: str = typer.Option("", "--filter", help="Filter instance IDs by regex", rich_help_panel="Data selection"),
187
+ shuffle: bool = typer.Option(False, "--shuffle", help="Shuffle instances", rich_help_panel="Data selection"),
188
+ output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
189
+ workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
190
+ model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
191
+ redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
192
+ config_spec: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
193
+ environment_class: str | None = typer.Option( None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
188
194
  ) -> None:
195
+ # fmt: on
189
196
  dataset_path = DATASET_MAPPING.get(subset, subset)
190
197
  print(f"Loading dataset {dataset_path}, split {split}...")
191
198
  instances = list(load_dataset(dataset_path, split=split))
@@ -201,6 +208,10 @@ def main(
201
208
  print(f"Running on {len(instances)} instances...")
202
209
  print(f"Results will be saved to {output_path}")
203
210
 
211
+ config = yaml.safe_load(get_config_path(config_spec).read_text())
212
+ config.setdefault("environment", {}).setdefault("environment_class", environment_class)
213
+ config.setdefault("model", {}).setdefault("model_name", model)
214
+
204
215
  progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")
205
216
 
206
217
  def process_futures(futures: dict[concurrent.futures.Future, str]):
@@ -218,7 +229,7 @@ def main(
218
229
  with Live(progress_manager.render_group, refresh_per_second=4):
219
230
  with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
220
231
  futures = {
221
- executor.submit(process_instance, instance, output_path, model, config, progress_manager): instance[
232
+ executor.submit(process_instance, instance, output_path, config, progress_manager): instance[
222
233
  "instance_id"
223
234
  ]
224
235
  for instance in instances
@@ -8,29 +8,30 @@ from datasets import load_dataset
8
8
 
9
9
  from minisweagent.agents.interactive import InteractiveAgent
10
10
  from minisweagent.config import builtin_config_dir, get_config_path
11
- from minisweagent.environments.docker import DockerEnvironment
12
11
  from minisweagent.models import get_model
13
- from minisweagent.run.extra.swebench import DATASET_MAPPING, get_swebench_docker_image_name
12
+ from minisweagent.run.extra.swebench import (
13
+ DATASET_MAPPING,
14
+ get_sb_environment,
15
+ )
14
16
 
15
17
  app = typer.Typer(add_completion=False)
16
18
 
17
19
 
20
+ # fmt: off
18
21
  @app.command()
19
22
  def main(
20
- subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset"),
21
- split: str = typer.Option("dev", "--split", help="Dataset split"),
22
- instance_spec: str = typer.Option(None, "-i", "--instance", help="SWE-Bench instance ID"),
23
- model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use"),
24
- config_path: Path = typer.Option(
25
- builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file"
26
- ),
23
+ subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
24
+ split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
25
+ instance_spec: str = typer.Option(0, "-i", "--instance", help="SWE-Bench instance ID or index", rich_help_panel="Data selection"),
26
+ model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
27
+ config_path: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
28
+ environment_class: str | None = typer.Option(None, "--environment-class", rich_help_panel="Advanced"),
29
+ exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Basic"),
27
30
  ) -> None:
31
+ # fmt: on
28
32
  """Run on a single SWE-Bench instance."""
29
- try:
30
- dataset_path = DATASET_MAPPING[subset]
31
- except KeyError:
32
- dataset_path = subset
33
- print(f"Loading dataset {dataset_path}, split {split}...")
33
+ dataset_path = DATASET_MAPPING.get(subset, subset)
34
+ print(f"Loading dataset from {dataset_path}, split {split}...")
34
35
  instances = {
35
36
  inst["instance_id"]: inst # type: ignore
36
37
  for inst in load_dataset(dataset_path, split=split)
@@ -39,12 +40,15 @@ def main(
39
40
  instance_spec = sorted(instances.keys())[int(instance_spec)]
40
41
  instance: dict = instances[instance_spec] # type: ignore
41
42
 
42
- _config = yaml.safe_load(get_config_path(config_path).read_text())
43
- env = DockerEnvironment(**(_config.get("environment", {}) | {"image": get_swebench_docker_image_name(instance)}))
43
+ config = yaml.safe_load(get_config_path(config_path).read_text())
44
+ config.setdefault("environment", {}).setdefault("environment_class", environment_class)
45
+ if exit_immediately:
46
+ config.setdefault("agent", {})["confirm_exit"] = False
47
+ env = get_sb_environment(config, instance)
44
48
  agent = InteractiveAgent(
45
- get_model(model_name, _config.get("model", {})),
49
+ get_model(model_name, config.get("model", {})),
46
50
  env,
47
- **(_config.get("agent", {}) | {"mode": "yolo"}),
51
+ **(config.get("agent", {}) | {"mode": "yolo"}),
48
52
  )
49
53
  agent.run(instance["problem_statement"])
50
54
 
minisweagent/run/mini.py CHANGED
@@ -14,9 +14,9 @@ from prompt_toolkit.history import FileHistory
14
14
  from prompt_toolkit.shortcuts import PromptSession
15
15
  from rich.console import Console
16
16
 
17
- from minisweagent import Environment, Model, global_config_dir
17
+ from minisweagent import global_config_dir
18
18
  from minisweagent.agents.interactive import InteractiveAgent
19
- from minisweagent.agents.interactive_textual import AgentApp
19
+ from minisweagent.agents.interactive_textual import TextualAgent
20
20
  from minisweagent.config import builtin_config_dir, get_config_path
21
21
  from minisweagent.environments.local import LocalEnvironment
22
22
  from minisweagent.models import get_model
@@ -41,36 +41,6 @@ More information about the usage: [bold green]https://mini-swe-agent.com/latest/
41
41
  """
42
42
 
43
43
 
44
- def run_interactive(model: Model, env: Environment, agent_config: dict, task: str, output: Path | None = None) -> Any:
45
- agent = InteractiveAgent(
46
- model,
47
- env,
48
- **agent_config,
49
- )
50
-
51
- exit_status, result = None, None
52
- try:
53
- exit_status, result = agent.run(task)
54
- finally:
55
- if output:
56
- save_traj(agent, output, exit_status=exit_status, result=result)
57
- return agent
58
-
59
-
60
- def run_textual(model: Model, env: Environment, agent_config: dict, task: str, output: Path | None = None) -> Any:
61
- agent_app = AgentApp(
62
- model,
63
- env,
64
- task,
65
- **agent_config,
66
- )
67
- try:
68
- agent_app.run()
69
- finally:
70
- if output:
71
- save_traj(agent_app.agent, output, exit_status=agent_app.exit_status, result=agent_app.result)
72
-
73
-
74
44
  @app.command(help=_HELP_TEXT)
75
45
  def main(
76
46
  visual: bool = typer.Option(
@@ -119,10 +89,17 @@ def main(
119
89
  env = LocalEnvironment(**config.get("env", {}))
120
90
 
121
91
  # Both visual flag and the MSWEA_VISUAL_MODE_DEFAULT flip the mode, so it's essentially a XOR
92
+ agent_class = InteractiveAgent
122
93
  if visual == (os.getenv("MSWEA_VISUAL_MODE_DEFAULT", "false") == "false"):
123
- return run_textual(model, env, config["agent"], task, output) # type: ignore[arg-type]
124
- else:
125
- return run_interactive(model, env, config["agent"], task, output) # type: ignore[arg-type]
94
+ agent_class = TextualAgent
95
+ exit_status, result = None, None
96
+ agent = agent_class(model, env, **config.get("agent", {}))
97
+ try:
98
+ exit_status, result = agent.run(task) # type: ignore[arg-type]
99
+ finally:
100
+ if output:
101
+ save_traj(agent, output, exit_status=exit_status, result=result) # type: ignore[arg-type]
102
+ return agent
126
103
 
127
104
 
128
105
  if __name__ == "__main__":