mini-swe-agent 1.9.1__py3-none-any.whl → 1.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.9.1
3
+ Version: 1.11.0
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -48,7 +48,7 @@ Requires-Dist: typer
48
48
  Requires-Dist: platformdirs
49
49
  Requires-Dist: textual
50
50
  Requires-Dist: prompt_toolkit
51
- Requires-Dist: openai<=1.99.5
51
+ Requires-Dist: openai!=1.100.0,!=1.100.1
52
52
  Provides-Extra: full
53
53
  Requires-Dist: mini-swe-agent[dev]; extra == "full"
54
54
  Requires-Dist: swe-rex>=1.4.0; extra == "full"
@@ -73,7 +73,7 @@ Dynamic: license-file
73
73
 
74
74
  # The 100 line AI agent that solves GitHub issues & more
75
75
 
76
- 📣 [Run `mini` with GPT-5 and friends: Read here](https://mini-swe-agent.com/latest/quickstart/#gpt-5)
76
+ 📣 [New blogpost: Randomly switching between GPT-5 and Sonnet 4 boosts performance](https://www.swebench.com/SWE-bench/blog/2025/08/19/mini-roulette/)
77
77
 
78
78
  [![Docs](https://img.shields.io/badge/Docs-green?style=for-the-badge&logo=materialformkdocs&logoColor=white)](https://mini-swe-agent.com/latest/)
79
79
  [![Slack](https://img.shields.io/badge/Slack-4A154B?style=for-the-badge&logo=slack&logoColor=white)](https://join.slack.com/t/swe-bench/shared_invite/zt-36pj9bu5s-o3_yXPZbaH2wVnxnss1EkQ)
@@ -1,11 +1,11 @@
1
- mini_swe_agent-1.9.1.dist-info/licenses/LICENSE.md,sha256=D3luWPkdHAe7LBsdD4vzqDAXw6Xewb3G-uczss0uh1s,1094
2
- minisweagent/__init__.py,sha256=28OQ0MKY6EvqfdiNiDiiP-a1aGELpGBdUjRR5rsc3-Y,1997
1
+ mini_swe_agent-1.11.0.dist-info/licenses/LICENSE.md,sha256=D3luWPkdHAe7LBsdD4vzqDAXw6Xewb3G-uczss0uh1s,1094
2
+ minisweagent/__init__.py,sha256=km0yBZPAHXfBk30MPY1oRyjdgOZAG_G9Kv6xwpE6Tt4,2014
3
3
  minisweagent/__main__.py,sha256=FIyAOiw--c3FQ2g240FOM1FdL0lk_PxSpixu0pQ7WFo,194
4
4
  minisweagent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  minisweagent/agents/__init__.py,sha256=cpjJLzg1IGxLM-tZpoMJV9S33ye13XtdBO0x7DU_Lrk,48
6
- minisweagent/agents/default.py,sha256=znoeQPSnSLNvrvVC07kwgEb-rqi0yxf3Kv7JPeMDNsI,5624
6
+ minisweagent/agents/default.py,sha256=sxvACv-Yn3A-KQD9-Q-apHiU1gwkyX_-GPOikFVyuOI,5690
7
7
  minisweagent/agents/interactive.py,sha256=NBeNamRuqww9ZRhOg1q8xPO9ziUw2gpAVV6hCPbpBxU,7470
8
- minisweagent/agents/interactive_textual.py,sha256=rvD1zoEQSjWd1MklCRGT01cCFoMh46iFrDPBklAWW_A,17984
8
+ minisweagent/agents/interactive_textual.py,sha256=9RAJ-gUVVNL2qEq_HT2UcwJJorrSlp6WYoRwOgoCngI,18054
9
9
  minisweagent/config/README.md,sha256=ABd9anA4aRWtx7Oh37z36Wv6ARvcxD2w9lPUE24R2mY,435
10
10
  minisweagent/config/__init__.py,sha256=0KzHaaIqWgRy2zbwIzhrg6BJPDzOvYi3jb4eBNY4sAU,823
11
11
  minisweagent/config/default.yaml,sha256=OHK9-7PkCa9ZzgYykF1zGYC_AWkiAdOrEpDuurF-1Rk,5143
@@ -14,38 +14,43 @@ minisweagent/config/mini.tcss,sha256=fmAP9cYAp2n7Ps2Dw3e-ZOGEF2E8JcwTgK1LDcis-x4
14
14
  minisweagent/config/mini.yaml,sha256=mDfN7KputHf7kOGidJFX5-5CDKg97vxxu2cdYYlPoM8,5159
15
15
  minisweagent/config/mini_no_temp.yaml,sha256=n0W-017tBmMx57U9SLt7Fy9WJxI9x2vdTWBWeSngGMc,5204
16
16
  minisweagent/config/extra/__init__.py,sha256=e1MoAlDn_wc9HnXNoncf1P-B4DQ-iRf6n7Q_txjZGRI,52
17
- minisweagent/config/extra/swebench.yaml,sha256=5FKblpcNTHmVUNE1JLHo5_AsupvlwsrLj8I3R2mRItk,7680
18
- minisweagent/environments/__init__.py,sha256=tTnNjNAhMvIuB1mlesreBV5TLdQBp79qj_Mxr7HGzNk,1180
19
- minisweagent/environments/docker.py,sha256=tL0XL2kIxNGLswcmPTDklpoa3RGkQJr7HkEEE2rxzNQ,4049
17
+ minisweagent/config/extra/swebench.yaml,sha256=2LzqIM60eYqZZhB0U1q9e6fBiL1MvHvSUF63xI5U6kE,7738
18
+ minisweagent/config/extra/swebench_roulette.yaml,sha256=in235myH4BCJ8fyGtXuTlv16Ky3JTytV4H9-wvd-iTM,7801
19
+ minisweagent/environments/__init__.py,sha256=x80Ulx0UK21GAwg5jSTkOFeiZ7CQsGBP8cI_5BhazAo,1266
20
+ minisweagent/environments/docker.py,sha256=NJhdpYINEZC20URto2ZCkTCj44PEGqUn_sIWYvrA8XA,4281
20
21
  minisweagent/environments/local.py,sha256=dV05j4jPrmY0k5O_UtF_qrRqqdilyPUUkbn-YUhRC3M,1236
21
- minisweagent/environments/singularity.py,sha256=dHH_SS4MOm3cP7k1kSErvIjVijTL_-VFTOHtCyjUKVw,2671
22
+ minisweagent/environments/singularity.py,sha256=o7R_3aXbODsA_DPx0aH13eZhLU7yhMUtxGyRKybfVr4,3633
22
23
  minisweagent/environments/extra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ minisweagent/environments/extra/bubblewrap.py,sha256=So8MY3qprylkVhM78XUt05U3sAXVpxK47042eKWYxhA,3620
23
25
  minisweagent/environments/extra/swerex_docker.py,sha256=wu2F_9moa8yj5IgCEquFOoxEXJ8Vi6BF19rJvRdoJIs,1665
24
- minisweagent/models/__init__.py,sha256=wZguR-mGijqlxovIYfs1WcCiaHyYPywml0AacBJtg5c,2890
26
+ minisweagent/models/__init__.py,sha256=Boq3-r9eo4ptqnADMxAwtfevq9TmmE75mxSK5rS4ij0,4016
25
27
  minisweagent/models/anthropic.py,sha256=D8nHvvbgzPjla0He8p0O9kaXASPWg1Sai0pHsAj_Yn8,855
26
- minisweagent/models/litellm_model.py,sha256=tEwAV2dzslJ4HFDXApXSPo5OWsiz8soy52P8-r9p4Xg,2538
28
+ minisweagent/models/litellm_model.py,sha256=h6VHpiomiLo5Xi9fOxk818KjgAefMCumZdno-tIWtXo,2993
29
+ minisweagent/models/openrouter_model.py,sha256=85ccv7IFZnlU8F5nm9CfwEFi8M2baMgIOzb90i352zU,3729
27
30
  minisweagent/models/test_models.py,sha256=ItCA6ddntzkYA7dzSuUEaLMV-AE8TBuXBFP8CzpiO3U,1351
31
+ minisweagent/models/extra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ minisweagent/models/extra/roulette.py,sha256=SqLj_wz9Vkbxou7i9Ef4Uzmg_eheDouNySkkV7pm2Ys,2093
28
33
  minisweagent/models/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
34
  minisweagent/models/utils/cache_control.py,sha256=mG9cE56HQaUwXfoqvXoH6LcbMV_G1vlEE1aBBpikXYg,1608
30
35
  minisweagent/models/utils/key_per_thread.py,sha256=Vlxt--rapNNYCgIHrMCu1WVAkuiVIhC_awbarkbnkZQ,644
31
36
  minisweagent/run/__init__.py,sha256=WIoYgHVl7iZF2YncrfV3IttupG6P5KogroKHKECka3A,38
32
- minisweagent/run/github_issue.py,sha256=GWOkGM09jOYV93p6xIM_kKWmC1yP_d5lprafWlqoBN0,2748
37
+ minisweagent/run/github_issue.py,sha256=LS4_7LktE0pitcFZJyBJADF74xZMBDgovUa4KwU9RN0,3073
33
38
  minisweagent/run/hello_world.py,sha256=erLnEwNmPFLxq3-8zyv66Vy1kIqMqQf97vISX7LrQXg,959
34
39
  minisweagent/run/inspector.py,sha256=QnY3oYzm-yq3w9Jzs112Lco2Rg84vSocAWrQRVz_1lc,7127
35
- minisweagent/run/mini.py,sha256=09WKk4ERzhQ_hoSg0kNUYGtCRjLqMrLOSUqQtxjVwlo,4459
40
+ minisweagent/run/mini.py,sha256=EMU-hXAaTsgbdtTmZRxPYZ2bjYRTD6NQDr5Eta5Ybe8,4814
36
41
  minisweagent/run/mini_extra.py,sha256=ecA1PnTWElpO60G9RktvVLtUOf3bZ_ESmnSttS6izhQ,1465
37
42
  minisweagent/run/extra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- minisweagent/run/extra/config.py,sha256=CEsEr8AdEm64Jods2ZRURChHKMILSatkBLkLmRywkrw,3672
39
- minisweagent/run/extra/swebench.py,sha256=3zGn7PUZCCf5vcVYy6vsIgjdJHUVh8cr-ztJwTsUWUo,10826
40
- minisweagent/run/extra/swebench_single.py,sha256=RGfs0FPD_3cznR5Y-J90CopYCC0_Vg9NYusRB1wKllc,3309
43
+ minisweagent/run/extra/config.py,sha256=L5Xe7VGX1HMBlPac9aBufPnHA9duJpZYrbqXVtr8O6w,3845
44
+ minisweagent/run/extra/swebench.py,sha256=sheaYoTxFmw5NdRGiRHPd_epVHRdhYtfIgI-RMvTSQc,11650
45
+ minisweagent/run/extra/swebench_single.py,sha256=BwalUCgQTDlg4WOFmhPmW8K0wzbNoktTdGWzPoAiryE,3607
41
46
  minisweagent/run/extra/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
47
  minisweagent/run/extra/utils/batch_progress.py,sha256=xhJ7FmsaTBGz-yh8pzYl4yMoUGjn7GA24eYrP-nHj60,6804
43
48
  minisweagent/run/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- minisweagent/run/utils/save.py,sha256=MzIvUyCiqbq3EaQc8cKtaD0MEkcyXmUyl4ZcxYzZW_Y,1680
49
+ minisweagent/run/utils/save.py,sha256=2xd-UnUzI7Fr_AUZ5KEJ53Aa4kpuuGYxkLwyUcvqyMM,2503
45
50
  minisweagent/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
51
  minisweagent/utils/log.py,sha256=ruDMNKMrVC9NPvCeHwO3QYz5jsVNUGQB2dRAEAPAWp8,996
47
- mini_swe_agent-1.9.1.dist-info/METADATA,sha256=iJsC920Z-2YoGsdpRgwk1wwKvq9SWi9qcTHSQjqPetc,13828
48
- mini_swe_agent-1.9.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
49
- mini_swe_agent-1.9.1.dist-info/entry_points.txt,sha256=d1_yRbTaGjs1UXHa6JQK0sKDGBIVGm8oeW0k2kfbJgQ,182
50
- mini_swe_agent-1.9.1.dist-info/top_level.txt,sha256=zKF4t8bFpV87fdVABZt2Da-vnb4Vkh_CxkwQx5YT4Ew,13
51
- mini_swe_agent-1.9.1.dist-info/RECORD,,
52
+ mini_swe_agent-1.11.0.dist-info/METADATA,sha256=FAhb6b9hKIxgQTFMAd1Vjy5uKbYi9Zn1oyKUsWfymuw,13888
53
+ mini_swe_agent-1.11.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
+ mini_swe_agent-1.11.0.dist-info/entry_points.txt,sha256=d1_yRbTaGjs1UXHa6JQK0sKDGBIVGm8oeW0k2kfbJgQ,182
55
+ mini_swe_agent-1.11.0.dist-info/top_level.txt,sha256=zKF4t8bFpV87fdVABZt2Da-vnb4Vkh_CxkwQx5YT4Ew,13
56
+ mini_swe_agent-1.11.0.dist-info/RECORD,,
minisweagent/__init__.py CHANGED
@@ -8,7 +8,7 @@ This file provides:
8
8
  unless you want the static type checking.
9
9
  """
10
10
 
11
- __version__ = "1.9.1"
11
+ __version__ = "1.11.0"
12
12
 
13
13
  import os
14
14
  from pathlib import Path
@@ -66,6 +66,7 @@ class Agent(Protocol):
66
66
  model: Model
67
67
  env: Environment
68
68
  messages: list[dict[str, str]]
69
+ config: Any
69
70
 
70
71
  def run(self, task: str, **kwargs) -> tuple[str, str]: ...
71
72
 
@@ -5,7 +5,7 @@ import subprocess
5
5
  from collections.abc import Callable
6
6
  from dataclasses import asdict, dataclass
7
7
 
8
- from jinja2 import Template
8
+ from jinja2 import StrictUndefined, Template
9
9
 
10
10
  from minisweagent import Environment, Model
11
11
 
@@ -63,7 +63,9 @@ class DefaultAgent:
63
63
 
64
64
  def render_template(self, template: str, **kwargs) -> str:
65
65
  template_vars = asdict(self.config) | self.env.get_template_vars() | self.model.get_template_vars()
66
- return Template(template).render(**kwargs, **template_vars, **self.extra_template_vars)
66
+ return Template(template, undefined=StrictUndefined).render(
67
+ **kwargs, **template_vars, **self.extra_template_vars
68
+ )
67
69
 
68
70
  def add_message(self, role: str, content: str, **kwargs):
69
71
  self.messages.append({"role": role, "content": content, **kwargs})
@@ -283,6 +283,10 @@ class TextualAgent(App):
283
283
 
284
284
  # --- Basics ---
285
285
 
286
+ @property
287
+ def config(self):
288
+ return self.agent.config
289
+
286
290
  @property
287
291
  def i_step(self) -> int:
288
292
  """Current step index."""
@@ -36,7 +36,7 @@ agent:
36
36
  2. Provide exactly ONE bash command to execute
37
37
 
38
38
  ## Important Boundaries
39
- - MODIFY: Regular source code files in {{working_dir}}
39
+ - MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
40
40
  - DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
41
41
 
42
42
  ## Recommended Workflow
@@ -0,0 +1,233 @@
1
+ agent:
2
+ system_template: |
3
+ You are a helpful assistant that can interact multiple times with a computer shell to solve programming tasks.
4
+ Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||).
5
+
6
+ Include a THOUGHT section before your command where you explain your reasoning process.
7
+ Format your response as shown in <format_example>.
8
+
9
+ <format_example>
10
+ THOUGHT: Your reasoning and analysis here
11
+
12
+ ```bash
13
+ your_command_here
14
+ ```
15
+ </format_example>
16
+
17
+ Failure to follow these rules will cause your response to be rejected.
18
+ instance_template: |
19
+ <pr_description>
20
+ Consider the following PR description:
21
+ {{task}}
22
+ </pr_description>
23
+
24
+ <instructions>
25
+ # Task Instructions
26
+
27
+ ## Overview
28
+ You're a software engineer interacting continuously with a computer by submitting commands.
29
+ You'll be helping implement necessary changes to meet requirements in the PR description.
30
+ Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
31
+
32
+ IMPORTANT: This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.
33
+
34
+ For each response:
35
+ 1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
36
+ 2. Provide exactly ONE bash command to execute
37
+
38
+ ## Important Boundaries
39
+ - MODIFY: Regular source code files in {{working_dir}}
40
+ - DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
41
+
42
+ ## Recommended Workflow
43
+ 1. Analyze the codebase by finding and reading relevant files
44
+ 2. Create a script to reproduce the issue
45
+ 3. Edit the source code to resolve the issue
46
+ 4. Verify your fix works by running your script again
47
+ 5. Test edge cases to ensure your fix is robust
48
+
49
+ ## Command Execution Rules
50
+ You are operating in an environment where
51
+ 1. You write a single command
52
+ 2. The system executes that command in a subshell
53
+ 3. You see the result
54
+ 4. You write your next command
55
+
56
+ Each response should include:
57
+ 1. A **THOUGHT** section where you explain your reasoning and plan
58
+ 2. A single bash code block with your command
59
+
60
+ Format your responses like this:
61
+
62
+ <format_example>
63
+ THOUGHT: Here I explain my reasoning process, analysis of the current situation,
64
+ and what I'm trying to accomplish with the command below.
65
+
66
+ ```bash
67
+ your_command_here
68
+ ```
69
+ </format_example>
70
+
71
+ Commands must be specified in a single bash code block:
72
+
73
+ ```bash
74
+ your_command_here
75
+ ```
76
+
77
+ **CRITICAL REQUIREMENTS:**
78
+ - Your response SHOULD include a THOUGHT section explaining your reasoning
79
+ - Your response MUST include EXACTLY ONE bash code block
80
+ - This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
81
+ - If you include zero or multiple bash blocks, or no command at all, YOUR RESPONSE WILL FAIL
82
+ - Do NOT try to run multiple independent commands in separate blocks in one response
83
+ - Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
84
+ - However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
85
+
86
+ Example of a CORRECT response:
87
+ <example_response>
88
+ THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
89
+
90
+ ```bash
91
+ ls -la
92
+ ```
93
+ </example_response>
94
+
95
+ Example of an INCORRECT response:
96
+ <example_response>
97
+ THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
98
+
99
+ ```bash
100
+ ls -la
101
+ ```
102
+
103
+ Now I'll read the file:
104
+
105
+ ```bash
106
+ cat file.txt
107
+ ```
108
+ </example_response>
109
+
110
+ If you need to run multiple commands, either:
111
+ 1. Combine them in one block using && or ||
112
+ ```bash
113
+ command1 && command2 || echo "Error occurred"
114
+ ```
115
+
116
+ 2. Wait for the first command to complete, see its output, then issue the next command in your following response.
117
+
118
+ ## Environment Details
119
+ - You have a full Linux shell environment
120
+ - Always use non-interactive flags (-y, -f) for commands
121
+ - Avoid interactive tools like vi, nano, or any that require user input
122
+ - If a command isn't available, you can install it
123
+
124
+ ## Useful Command Examples
125
+
126
+ ### Create a new file:
127
+ ```bash
128
+ cat <<'EOF' > newfile.py
129
+ import numpy as np
130
+ hello = "world"
131
+ print(hello)
132
+ EOF
133
+ ```
134
+
135
+ ### Edit files with sed:
136
+ ```bash
137
+ # Replace all occurrences
138
+ sed -i 's/old_string/new_string/g' filename.py
139
+
140
+ # Replace only first occurrence
141
+ sed -i 's/old_string/new_string/' filename.py
142
+
143
+ # Replace first occurrence on line 1
144
+ sed -i '1s/old_string/new_string/' filename.py
145
+
146
+ # Replace all occurrences in lines 1-10
147
+ sed -i '1,10s/old_string/new_string/g' filename.py
148
+ ```
149
+
150
+ ### View file content:
151
+ ```bash
152
+ # View specific lines with numbers
153
+ nl -ba filename.py | sed -n '10,20p'
154
+ ```
155
+
156
+ ### Any other command you want to run
157
+ ```bash
158
+ anything
159
+ ```
160
+
161
+ ## Submission
162
+ When you've completed your work (reading, editing, testing), and cannot make further progress
163
+ issue exactly the following command:
164
+
165
+ ```bash
166
+ echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached
167
+ ```
168
+
169
+ This command will submit your work.
170
+ You cannot continue working (reading, editing, testing) in any way on this task after submitting.
171
+ </instructions>
172
+ action_observation_template: |
173
+ <returncode>{{output.returncode}}</returncode>
174
+ {% if output.output | length < 10000 -%}
175
+ <output>
176
+ {{ output.output -}}
177
+ </output>
178
+ {%- else -%}
179
+ <warning>
180
+ The output of your last command was too long.
181
+ Please try a different command that produces less output.
182
+ If you're looking at a file you can try use head, tail or sed to view a smaller number of lines selectively.
183
+ If you're using grep or find and it produced too much output, you can use a more selective search pattern.
184
+ If you really need to see something from the full command's output, you can redirect output to a file and then search in that file.
185
+ </warning>
186
+ {%- set elided_chars = output.output | length - 10000 -%}
187
+ <output_head>
188
+ {{ output.output[:5000] }}
189
+ </output_head>
190
+ <elided_chars>
191
+ {{ elided_chars }} characters elided
192
+ </elided_chars>
193
+ <output_tail>
194
+ {{ output.output[-5000:] }}
195
+ </output_tail>
196
+ {%- endif -%}
197
+ format_error_template: |
198
+ Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
199
+
200
+ Please format your action in triple backticks as shown in <response_example>.
201
+
202
+ <response_example>
203
+ Here are some thoughts about why you want to perform the action.
204
+
205
+ ```bash
206
+ <action>
207
+ ```
208
+ </response_example>
209
+
210
+ If you have completed your assignment, please consult the first message about how to
211
+ submit your solution (you will not be able to continue working on this task after that).
212
+ step_limit: 250
213
+ cost_limit: 3.
214
+
215
+ environment:
216
+ cwd: "/testbed"
217
+ timeout: 60
218
+ env:
219
+ PAGER: cat
220
+ MANPAGER: cat
221
+ LESS: -R
222
+ PIP_PROGRESS_BAR: 'off'
223
+ TQDM_DISABLE: '1'
224
+ environment_class: docker
225
+
226
+ model:
227
+ model_name: "roulette"
228
+ model_class: "minisweagent.models.extra.roulette.RouletteModel"
229
+ model_kwargs:
230
+ - model_name: "claude-sonnet-4-20250514"
231
+ model_kwargs:
232
+ temperature: 0.
233
+ - model_name: "gpt-5"
@@ -10,6 +10,7 @@ _ENVIRONMENT_MAPPING = {
10
10
  "singularity": "minisweagent.environments.singularity.SingularityEnvironment",
11
11
  "local": "minisweagent.environments.local.LocalEnvironment",
12
12
  "swerex_docker": "minisweagent.environments.extra.swerex_docker.SwerexDockerEnvironment",
13
+ "bubblewrap": "minisweagent.environments.extra.bubblewrap.BubblewrapEnvironment",
13
14
  }
14
15
 
15
16
 
@@ -23,18 +23,22 @@ class DockerEnvironmentConfig:
23
23
  """Timeout for executing commands in the container."""
24
24
  executable: str = os.getenv("MSWEA_DOCKER_EXECUTABLE", "docker")
25
25
  """Path to the docker/container executable."""
26
- run_args: list[str] = field(default_factory=list)
27
- """Additional arguments to pass to the docker/container executable."""
26
+ run_args: list[str] = field(default_factory=lambda: ["--rm"])
27
+ """Additional arguments to pass to the docker/container executable.
28
+ Default is ["--rm"], which removes the container after it exits.
29
+ """
28
30
  container_timeout: str = "2h"
29
31
  """Max duration to keep container running. Uses the same format as the sleep command."""
32
+ pull_timeout: int = 120
33
+ """Timeout in seconds for pulling images."""
30
34
 
31
35
 
32
36
  class DockerEnvironment:
33
- def __init__(self, *, config_class: type = DockerEnvironmentConfig, **kwargs):
37
+ def __init__(self, *, config_class: type = DockerEnvironmentConfig, logger: logging.Logger | None = None, **kwargs):
34
38
  """This class executes bash commands in a Docker container using direct docker commands.
35
39
  See `DockerEnvironmentConfig` for keyword arguments.
36
40
  """
37
- self.logger = logging.getLogger("minisweagent.environment")
41
+ self.logger = logger or logging.getLogger("minisweagent.environment")
38
42
  self.container_id: str | None = None
39
43
  self.config = config_class(**kwargs)
40
44
  self._start_container()
@@ -63,7 +67,7 @@ class DockerEnvironment:
63
67
  cmd,
64
68
  capture_output=True,
65
69
  text=True,
66
- timeout=120, # docker pull might take a while
70
+ timeout=self.config.pull_timeout, # docker pull might take a while
67
71
  check=True,
68
72
  )
69
73
  self.logger.info(f"Started container {container_name} with ID {result.stdout.strip()}")
@@ -0,0 +1,112 @@
1
+ """
2
+ [Bubblewrap](https://github.com/containers/bubblewrap) is a low-level, unprivileged sandboxing tool for Linux that enables running applications
3
+ in isolated environments with restricted access to the operating system and user data.
4
+ This environment uses bubblewrap to execute commands in a sandboxed environment.
5
+
6
+ !!! warning
7
+ This environment is experimental.
8
+
9
+ !!! warning
10
+ This environment is not supported on Windows.
11
+ """
12
+
13
+ import logging
14
+ import os
15
+ import platform
16
+ import shutil
17
+ import subprocess
18
+ import tempfile
19
+ import uuid
20
+ from dataclasses import asdict, dataclass, field
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+
25
+ @dataclass
26
+ class BubblewrapEnvironmentConfig:
27
+ cwd: str = ""
28
+ """Working directory for the sandbox."""
29
+ env: dict[str, str] = field(default_factory=dict)
30
+ """Dictionary of environment variables to set in the sandbox."""
31
+ timeout: int = 30
32
+ """Timeout for the command in seconds."""
33
+ executable: str = os.getenv("MSWEA_BUBBLEWRAP_EXECUTABLE", "bwrap")
34
+ """Path to the bubblewrap executable."""
35
+ wrapper_args: list[str] = field(
36
+ default_factory=lambda: [
37
+ "--unshare-user-try",
38
+ "--ro-bind",
39
+ "/usr",
40
+ "/usr",
41
+ "--ro-bind",
42
+ "/bin",
43
+ "/bin",
44
+ "--ro-bind",
45
+ "/lib",
46
+ "/lib",
47
+ "--ro-bind",
48
+ "/lib64",
49
+ "/lib64",
50
+ "--ro-bind",
51
+ "/etc",
52
+ "/etc",
53
+ "--tmpfs",
54
+ "/tmp",
55
+ "--proc",
56
+ "/proc",
57
+ "--dev",
58
+ "/dev",
59
+ "--new-session",
60
+ "--setenv",
61
+ "PATH",
62
+ "/usr/local/bin:/usr/sbin:/usr/bin:/bin",
63
+ ]
64
+ )
65
+ """Arguments to pass to the bubblewrap executable."""
66
+
67
+
68
+ class BubblewrapEnvironment:
69
+ def __init__(
70
+ self, *, config_class: type = BubblewrapEnvironmentConfig, logger: logging.Logger | None = None, **kwargs
71
+ ):
72
+ """This class executes bash commands in a bubblewrap environment and a separate working
73
+ directory for each environment. See `BubblewrapEnvironmentConfig` for kwargs.
74
+ """
75
+ self.logger = logger or logging.getLogger("minisweagent.environment")
76
+ self.config = config_class(**kwargs)
77
+ self.working_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
78
+ self.working_dir.mkdir(parents=True)
79
+
80
+ def execute(self, command: str, cwd: str = "") -> dict[str, Any]:
81
+ """Execute a command in the bubblewrap environment and return the result as a dict."""
82
+ cwd = cwd or self.config.cwd or str(self.working_dir)
83
+
84
+ cmd = [self.config.executable] + self.config.wrapper_args + ["--bind", cwd, cwd, "--chdir", cwd]
85
+
86
+ # Add environment variables
87
+ for key, value in self.config.env.items():
88
+ cmd.extend(["--setenv", key, value])
89
+
90
+ cmd.extend(["bash", "-c", command])
91
+
92
+ result = subprocess.run(
93
+ cmd,
94
+ text=True,
95
+ timeout=self.config.timeout,
96
+ encoding="utf-8",
97
+ errors="replace",
98
+ stdout=subprocess.PIPE,
99
+ stderr=subprocess.STDOUT,
100
+ )
101
+ return {"output": result.stdout, "returncode": result.returncode}
102
+
103
+ def cleanup(self):
104
+ if self.working_dir.exists():
105
+ shutil.rmtree(self.working_dir)
106
+
107
+ def __del__(self):
108
+ """Cleanup working_dir when object is destroyed."""
109
+ self.cleanup()
110
+
111
+ def get_template_vars(self) -> dict[str, Any]:
112
+ return asdict(self.config) | platform.uname()._asdict()
@@ -23,18 +23,39 @@ class SingularityEnvironmentConfig:
23
23
  """Timeout for executing commands in the container."""
24
24
  executable: str = os.getenv("MSWEA_SINGULARITY_EXECUTABLE", "singularity")
25
25
  """Path to the singularity executable."""
26
+ sandbox_build_retries: int = 3
27
+ """Number of retries for building the sandbox if an error occurs."""
26
28
 
27
29
 
28
30
  class SingularityEnvironment:
29
- def __init__(self, **kwargs):
31
+ def __init__(
32
+ self, *, config_class: type = SingularityEnvironmentConfig, logger: logging.Logger | None = None, **kwargs
33
+ ):
30
34
  """Singularity environment. See `SingularityEnvironmentConfig` for kwargs."""
31
- self.logger = logging.getLogger("minisweagent.environment")
32
- self.config = SingularityEnvironmentConfig(**kwargs)
33
- self.sandbox_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
34
- subprocess.run(
35
- [self.config.executable, "build", "--sandbox", self.sandbox_dir, self.config.image],
36
- check=True,
37
- )
35
+ self.logger = logger or logging.getLogger("minisweagent.environment")
36
+ self.config = config_class(**kwargs)
37
+ self.sandbox_dir = self._build_sandbox()
38
+
39
+ def _build_sandbox(self) -> Path:
40
+ # Building the sandbox can fail (very rarely), so we retry it
41
+ max_retries = self.config.sandbox_build_retries
42
+ for attempt in range(max_retries):
43
+ sandbox_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
44
+ try:
45
+ subprocess.run(
46
+ [self.config.executable, "build", "--sandbox", sandbox_dir, self.config.image],
47
+ check=True,
48
+ capture_output=True,
49
+ )
50
+ break
51
+ except subprocess.CalledProcessError as e:
52
+ shutil.rmtree(sandbox_dir, ignore_errors=True)
53
+ self.logger.error(
54
+ f"Error building image {self.config.image}, stdout: {e.stdout}, stderr: {e.stderr} (attempt {attempt + 1}/{max_retries})"
55
+ )
56
+ if attempt == max_retries - 1:
57
+ raise
58
+ return sandbox_dir
38
59
 
39
60
  def get_template_vars(self) -> dict[str, Any]:
40
61
  return asdict(self.config)
@@ -69,8 +90,7 @@ class SingularityEnvironment:
69
90
  return {"output": result.stdout, "returncode": result.returncode}
70
91
 
71
92
  def cleanup(self):
72
- if self.sandbox_dir.exists():
73
- shutil.rmtree(self.sandbox_dir)
93
+ shutil.rmtree(self.sandbox_dir, ignore_errors=True)
74
94
 
75
95
  def __del__(self):
76
96
  """Cleanup sandbox when object is destroyed."""
@@ -3,6 +3,7 @@ You can ignore this file completely if you explicitly set your model in your run
3
3
  """
4
4
 
5
5
  import copy
6
+ import importlib
6
7
  import os
7
8
  import threading
8
9
 
@@ -49,12 +50,12 @@ def get_model(input_model_name: str | None = None, config: dict | None = None) -
49
50
  config = copy.deepcopy(config)
50
51
  config["model_name"] = resolved_model_name
51
52
 
52
- # API key resolution (from env -> config -> None)
53
- if "model_kwargs" not in config:
54
- config["model_kwargs"] = {}
55
- if from_env := os.getenv("MSWEA_MODEL_API_KEY"):
56
- config["model_kwargs"]["api_key"] = from_env
57
- return get_model_class(resolved_model_name)(**config)
53
+ model_class = get_model_class(resolved_model_name, config.pop("model_class", ""))
54
+
55
+ if (from_env := os.getenv("MSWEA_MODEL_API_KEY")) and not str(type(model_class)).endswith("DeterministicModel"):
56
+ config.setdefault("model_kwargs", {})["api_key"] = from_env
57
+
58
+ return model_class(**config)
58
59
 
59
60
 
60
61
  def get_model_name(input_model_name: str | None = None, config: dict | None = None) -> str:
@@ -70,12 +71,38 @@ def get_model_name(input_model_name: str | None = None, config: dict | None = No
70
71
  raise ValueError("No default model set. Please run `mini-extra config setup` to set one.")
71
72
 
72
73
 
73
- def get_model_class(model_name: str) -> type:
74
- """Select the best model class for a given model name."""
74
+ _MODEL_CLASS_MAPPING = {
75
+ "anthropic": "minisweagent.models.anthropic.AnthropicModel",
76
+ "litellm": "minisweagent.models.litellm_model.LitellmModel",
77
+ "openrouter": "minisweagent.models.openrouter_model.OpenRouterModel",
78
+ "deterministic": "minisweagent.models.test_models.DeterministicModel",
79
+ }
80
+
81
+
82
+ def get_model_class(model_name: str, model_class: str = "") -> type:
83
+ """Select the best model class.
84
+
85
+ If a model_class is provided (as shortcut name, or as full import path,
86
+ e.g., "anthropic" or "minisweagent.models.anthropic.AnthropicModel"),
87
+ it takes precedence over the `model_name`.
88
+ Otherwise, the model_name is used to select the best model class.
89
+ """
90
+ if model_class:
91
+ full_path = _MODEL_CLASS_MAPPING.get(model_class, model_class)
92
+ try:
93
+ module_name, class_name = full_path.rsplit(".", 1)
94
+ module = importlib.import_module(module_name)
95
+ return getattr(module, class_name)
96
+ except (ValueError, ImportError, AttributeError):
97
+ msg = f"Unknown model class: {model_class} (resolved to {full_path}, available: {_MODEL_CLASS_MAPPING})"
98
+ raise ValueError(msg)
99
+
75
100
  if any(s in model_name.lower() for s in ["anthropic", "sonnet", "opus", "claude"]):
76
101
  from minisweagent.models.anthropic import AnthropicModel
77
102
 
78
103
  return AnthropicModel
104
+
105
+ # Default to LitellmModel
79
106
  from minisweagent.models.litellm_model import LitellmModel
80
107
 
81
108
  return LitellmModel
File without changes
@@ -0,0 +1,62 @@
1
+ import random
2
+ from collections.abc import Callable
3
+ from dataclasses import asdict, dataclass
4
+
5
+ from minisweagent import Model
6
+ from minisweagent.models import get_model
7
+
8
+
9
+ @dataclass
10
+ class RouletteModelConfig:
11
+ model_kwargs: list[dict]
12
+ """The models to choose from"""
13
+ model_name: str = "roulette"
14
+
15
+
16
+ class RouletteModel:
17
+ def __init__(self, *, config_class: Callable = RouletteModelConfig, **kwargs):
18
+ """This "meta"-model randomly selects one of the models at every call"""
19
+ self.config = config_class(**kwargs)
20
+ self.models = [get_model(config=config) for config in self.config.model_kwargs]
21
+
22
+ @property
23
+ def cost(self) -> float:
24
+ return sum(model.cost for model in self.models)
25
+
26
+ @property
27
+ def n_calls(self) -> int:
28
+ return sum(model.n_calls for model in self.models)
29
+
30
+ def get_template_vars(self) -> dict:
31
+ return asdict(self.config) | {"n_model_calls": self.n_calls, "model_cost": self.cost}
32
+
33
+ def select_model(self) -> Model:
34
+ return random.choice(self.models)
35
+
36
+ def query(self, *args, **kwargs) -> dict:
37
+ model = self.select_model()
38
+ response = model.query(*args, **kwargs)
39
+ response["model_name"] = model.config.model_name
40
+ return response
41
+
42
+
43
+ @dataclass
44
+ class InterleavingModelConfig:
45
+ model_kwargs: list[dict]
46
+ sequence: list[int] | None = None
47
+ """If set to 0, 0, 1, we will return the first model 2 times, then the second model 1 time,
48
+ then the first model again, etc."""
49
+ model_name: str = "interleaving"
50
+
51
+
52
+ class InterleavingModel(RouletteModel):
53
+ def __init__(self, *, config_class: Callable = InterleavingModelConfig, **kwargs):
54
+ """This "meta"-model alternates between the models in the sequence for every call"""
55
+ super().__init__(config_class=config_class, **kwargs)
56
+
57
+ def select_model(self) -> Model:
58
+ if self.config.sequence is None:
59
+ i_model = self.n_calls % len(self.models)
60
+ else:
61
+ i_model = self.config.sequence[self.n_calls % len(self.config.sequence)]
62
+ return self.models[i_model]
@@ -61,12 +61,23 @@ class LitellmModel:
61
61
 
62
62
  def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
63
63
  response = self._query(messages, **kwargs)
64
- cost = litellm.cost_calculator.completion_cost(response)
64
+ try:
65
+ cost = litellm.cost_calculator.completion_cost(response)
66
+ except Exception as e:
67
+ logger.critical(
68
+ f"Error calculating cost for model {self.config.model_name}: {e}. "
69
+ "Please check the 'Updating the model registry' section in the documentation. "
70
+ "http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
71
+ )
72
+ raise
65
73
  self.n_calls += 1
66
74
  self.cost += cost
67
75
  GLOBAL_MODEL_STATS.add(cost)
68
76
  return {
69
77
  "content": response.choices[0].message.content or "", # type: ignore
78
+ "extra": {
79
+ "response": response,
80
+ },
70
81
  }
71
82
 
72
83
  def get_template_vars(self) -> dict[str, Any]:
@@ -0,0 +1,118 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ from dataclasses import asdict, dataclass, field
5
+ from typing import Any
6
+
7
+ import requests
8
+ from tenacity import (
9
+ before_sleep_log,
10
+ retry,
11
+ retry_if_not_exception_type,
12
+ stop_after_attempt,
13
+ wait_exponential,
14
+ )
15
+
16
+ from minisweagent.models import GLOBAL_MODEL_STATS
17
+
18
+ logger = logging.getLogger("openrouter_model")
19
+
20
+
21
+ @dataclass
22
+ class OpenRouterModelConfig:
23
+ model_name: str
24
+ model_kwargs: dict[str, Any] = field(default_factory=dict)
25
+
26
+
27
+ class OpenRouterAPIError(Exception):
28
+ """Custom exception for OpenRouter API errors."""
29
+
30
+ pass
31
+
32
+
33
+ class OpenRouterAuthenticationError(Exception):
34
+ """Custom exception for OpenRouter authentication errors."""
35
+
36
+ pass
37
+
38
+
39
+ class OpenRouterRateLimitError(Exception):
40
+ """Custom exception for OpenRouter rate limit errors."""
41
+
42
+ pass
43
+
44
+
45
+ class OpenRouterModel:
46
+ def __init__(self, **kwargs):
47
+ self.config = OpenRouterModelConfig(**kwargs)
48
+ self.cost = 0.0
49
+ self.n_calls = 0
50
+ self._api_url = "https://openrouter.ai/api/v1/chat/completions"
51
+ self._api_key = os.getenv("OPENROUTER_API_KEY", "")
52
+
53
+ @retry(
54
+ stop=stop_after_attempt(10),
55
+ wait=wait_exponential(multiplier=1, min=4, max=60),
56
+ before_sleep=before_sleep_log(logger, logging.WARNING),
57
+ retry=retry_if_not_exception_type(
58
+ (
59
+ OpenRouterAuthenticationError,
60
+ KeyboardInterrupt,
61
+ )
62
+ ),
63
+ )
64
+ def _query(self, messages: list[dict[str, str]], **kwargs):
65
+ headers = {
66
+ "Authorization": f"Bearer {self._api_key}",
67
+ "Content-Type": "application/json",
68
+ }
69
+
70
+ payload = {
71
+ "model": self.config.model_name,
72
+ "messages": messages,
73
+ "usage": {"include": True},
74
+ **(self.config.model_kwargs | kwargs),
75
+ }
76
+
77
+ try:
78
+ response = requests.post(self._api_url, headers=headers, data=json.dumps(payload), timeout=60)
79
+ response.raise_for_status()
80
+ return response.json()
81
+ except requests.exceptions.HTTPError as e:
82
+ if response.status_code == 401:
83
+ error_msg = "Authentication failed. You can permanently set your API key with `mini-extra config set OPENROUTER_API_KEY YOUR_KEY`."
84
+ raise OpenRouterAuthenticationError(error_msg) from e
85
+ elif response.status_code == 429:
86
+ raise OpenRouterRateLimitError("Rate limit exceeded") from e
87
+ else:
88
+ raise OpenRouterAPIError(f"HTTP {response.status_code}: {response.text}") from e
89
+ except requests.exceptions.RequestException as e:
90
+ raise OpenRouterAPIError(f"Request failed: {e}") from e
91
+
92
+ def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
93
+ response = self._query(messages, **kwargs)
94
+
95
+ # Extract cost from usage information
96
+ usage = response.get("usage", {})
97
+ cost = usage.get("cost", 0.0)
98
+
99
+ # If total_cost is not available, raise an error
100
+ if cost == 0.0:
101
+ raise OpenRouterAPIError(
102
+ f"No cost information available from OpenRouter API for model {self.config.model_name}. "
103
+ "Cost tracking is required but not provided by the API response."
104
+ )
105
+
106
+ self.n_calls += 1
107
+ self.cost += cost
108
+ GLOBAL_MODEL_STATS.add(cost)
109
+
110
+ return {
111
+ "content": response["choices"][0]["message"]["content"] or "",
112
+ "extra": {
113
+ "response": response,
114
+ },
115
+ }
116
+
117
+ def get_template_vars(self) -> dict[str, Any]:
118
+ return asdict(self.config) | {"n_model_calls": self.n_calls, "model_cost": self.cost}
@@ -33,8 +33,11 @@ This setup will ask you for your model and an API key.
33
33
 
34
34
  Here's a few popular models and the required API keys:
35
35
 
36
- [bold green]claude-sonnet-4-20250514[/bold green] ([bold green]ANTHROPIC_API_KEY[/bold green])
36
+ [bold green]anthropic/claude-sonnet-4-20250514[/bold green] ([bold green]ANTHROPIC_API_KEY[/bold green])
37
37
  [bold green]openai/gpt-5[/bold green] or [bold green]openai/gpt-5-mini[/bold green] ([bold green]OPENAI_API_KEY[/bold green])
38
+ [bold green]gemini/gemini-2.5-pro[/bold green] ([bold green]GEMINI_API_KEY[/bold green])
39
+
40
+ [bold]Note: Please always include the provider in the model name.[/bold]
38
41
 
39
42
  [bold yellow]You can leave any setting blank to skip it.[/bold yellow]
40
43
 
@@ -15,6 +15,7 @@ from pathlib import Path
15
15
  import typer
16
16
  import yaml
17
17
  from datasets import load_dataset
18
+ from jinja2 import StrictUndefined, Template
18
19
  from rich.live import Live
19
20
 
20
21
  from minisweagent import Environment
@@ -72,17 +73,25 @@ def get_swebench_docker_image_name(instance: dict) -> str:
72
73
  # Docker doesn't allow double underscore, so we replace them with a magic token
73
74
  iid = instance["instance_id"]
74
75
  id_docker_compatible = iid.replace("__", "_1776_")
75
- image_name = f"swebench/sweb.eval.x86_64.{id_docker_compatible}:latest".lower()
76
+ image_name = f"docker.io/swebench/sweb.eval.x86_64.{id_docker_compatible}:latest".lower()
76
77
  return image_name
77
78
 
78
79
 
79
80
  def get_sb_environment(config: dict, instance: dict) -> Environment:
81
+ env_config = config.setdefault("environment", {})
82
+ env_config["environment_class"] = env_config.get("environment_class", "docker")
80
83
  image_name = get_swebench_docker_image_name(instance)
81
- env_config = config.get("environment", {})
82
- if env_config.get("environment_class") == "singularity":
83
- image_name = "docker://" + image_name
84
- env_config["image"] = image_name
85
- return get_environment(env_config, default_type="docker")
84
+ if env_config["environment_class"] == "docker":
85
+ env_config["image"] = image_name
86
+ elif env_config["environment_class"] == "singularity":
87
+ env_config["image"] = "docker://" + image_name
88
+ env = get_environment(env_config)
89
+ if startup_command := config.get("run", {}).get("env_startup_command"):
90
+ startup_command = Template(startup_command, undefined=StrictUndefined).render(**instance)
91
+ out = env.execute(startup_command)
92
+ if out["returncode"] != 0:
93
+ raise RuntimeError(f"Error executing startup command: {out}")
94
+ return env
86
95
 
87
96
 
88
97
  def update_preds_file(output_path: Path, instance_id: str, model_name: str, result: str):
@@ -190,6 +199,7 @@ def main(
190
199
  output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
191
200
  workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
192
201
  model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
202
+ model_class: str | None = typer.Option(None, "-c", "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
193
203
  redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
194
204
  config_spec: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
195
205
  environment_class: str | None = typer.Option( None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
@@ -217,6 +227,8 @@ def main(
217
227
  config.setdefault("environment", {})["environment_class"] = environment_class
218
228
  if model is not None:
219
229
  config.setdefault("model", {})["model_name"] = model
230
+ if model_class is not None:
231
+ config.setdefault("model", {})["model_class"] = model_class
220
232
 
221
233
  progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")
222
234
 
@@ -30,6 +30,7 @@ def main(
30
30
  split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
31
31
  instance_spec: str = typer.Option(0, "-i", "--instance", help="SWE-Bench instance ID or index", rich_help_panel="Data selection"),
32
32
  model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
33
+ model_class: str | None = typer.Option(None, "-c", "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
33
34
  config_path: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
34
35
  environment_class: str | None = typer.Option(None, "--environment-class", rich_help_panel="Advanced"),
35
36
  exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Basic"),
@@ -50,6 +51,8 @@ def main(
50
51
  config = yaml.safe_load(get_config_path(config_path).read_text())
51
52
  if environment_class is not None:
52
53
  config.setdefault("environment", {})["environment_class"] = environment_class
54
+ if model_class is not None:
55
+ config.setdefault("model", {})["model_class"] = model_class
53
56
  if exit_immediately:
54
57
  config.setdefault("agent", {})["confirm_exit"] = False
55
58
  env = get_sb_environment(config, instance)
@@ -37,20 +37,25 @@ def fetch_github_issue(issue_url: str) -> str:
37
37
  return f"GitHub Issue: {title}\n\n{body}"
38
38
 
39
39
 
40
+ # fmt: off
40
41
  @app.command()
41
42
  def main(
42
43
  issue_url: str = typer.Option(prompt="Enter GitHub issue URL", help="GitHub issue URL"),
43
44
  config: Path = typer.Option(DEFAULT_CONFIG, "-c", "--config", help="Path to config file"),
44
45
  model: str | None = typer.Option(None, "-m", "--model", help="Model to use"),
46
+ model_class: str | None = typer.Option(None, "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
45
47
  yolo: bool = typer.Option(False, "-y", "--yolo", help="Run without confirmation"),
46
48
  ) -> InteractiveAgent:
49
+ # fmt: on
47
50
  """Run mini-SWE-agent on a GitHub issue"""
48
51
  configure_if_first_time()
49
52
 
50
53
  _config = yaml.safe_load(get_config_path(config).read_text())
51
- _agent_config = _config.get("agent", {})
54
+ _agent_config = _config.setdefault("agent", {})
52
55
  if yolo:
53
56
  _agent_config["mode"] = "yolo"
57
+ if model_class is not None:
58
+ _config.setdefault("model", {})["model_class"] = model_class
54
59
 
55
60
  task = fetch_github_issue(issue_url)
56
61
 
minisweagent/run/mini.py CHANGED
@@ -48,12 +48,13 @@ More information about the usage: [bold green]https://mini-swe-agent.com/latest/
48
48
  def main(
49
49
  visual: bool = typer.Option(False, "-v", "--visual", help="Toggle (pager-style) UI (Textual) depending on the MSWEA_VISUAL_MODE_DEFAULT environment setting",),
50
50
  model_name: str | None = typer.Option( None, "-m", "--model", help="Model to use",),
51
+ model_class: str | None = typer.Option(None, "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
51
52
  task: str | None = typer.Option(None, "-t", "--task", help="Task/problem statement", show_default=False),
52
53
  yolo: bool = typer.Option(False, "-y", "--yolo", help="Run without confirmation"),
53
54
  cost_limit: float | None = typer.Option(None, "-l", "--cost-limit", help="Cost limit. Set to 0 to disable."),
54
55
  config_spec: Path = typer.Option(DEFAULT_CONFIG, "-c", "--config", help="Path to config file"),
55
56
  output: Path | None = typer.Option(DEFAULT_OUTPUT, "-o", "--output", help="Output trajectory file"),
56
- exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting."),
57
+ exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Advanced"),
57
58
  ) -> Any:
58
59
  # fmt: on
59
60
  configure_if_first_time()
@@ -72,11 +73,14 @@ def main(
72
73
  )
73
74
  console.print("[bold green]Got that, thanks![/bold green]")
74
75
 
75
- config["agent"]["mode"] = "confirm" if not yolo else "yolo"
76
+ if yolo:
77
+ config.setdefault("agent", {})["mode"] = "yolo"
76
78
  if cost_limit:
77
- config["agent"]["cost_limit"] = cost_limit
79
+ config.setdefault("agent", {})["cost_limit"] = cost_limit
78
80
  if exit_immediately:
79
- config["agent"]["confirm_exit"] = False
81
+ config.setdefault("agent", {})["confirm_exit"] = False
82
+ if model_class is not None:
83
+ config.setdefault("model", {})["model_class"] = model_class
80
84
  model = get_model(model_name, config.get("model", {}))
81
85
  env = LocalEnvironment(**config.get("env", {}))
82
86
 
@@ -1,10 +1,24 @@
1
+ import dataclasses
1
2
  import json
2
3
  from collections.abc import Callable
3
4
  from pathlib import Path
5
+ from typing import Any
4
6
 
5
7
  from minisweagent import Agent, __version__
6
8
 
7
9
 
10
+ def _get_class_name_with_module(obj: Any) -> str:
11
+ """Get the full class name with module path."""
12
+ return f"{obj.__class__.__module__}.{obj.__class__.__name__}"
13
+
14
+
15
+ def _asdict(obj: Any) -> dict:
16
+ """Convert config objects to dicts."""
17
+ if dataclasses.is_dataclass(obj):
18
+ return dataclasses.asdict(obj) # type: ignore[arg-type]
19
+ return obj # let's try our luck
20
+
21
+
8
22
  def save_traj(
9
23
  agent: Agent | None,
10
24
  path: Path,
@@ -45,6 +59,14 @@ def save_traj(
45
59
  data["info"]["model_stats"]["instance_cost"] = agent.model.cost
46
60
  data["info"]["model_stats"]["api_calls"] = agent.model.n_calls
47
61
  data["messages"] = agent.messages
62
+ data["info"]["config"] = {
63
+ "agent": _asdict(agent.config),
64
+ "model": _asdict(agent.model.config),
65
+ "environment": _asdict(agent.env.config),
66
+ "agent_type": _get_class_name_with_module(agent),
67
+ "model_type": _get_class_name_with_module(agent.model),
68
+ "environment_type": _get_class_name_with_module(agent.env),
69
+ }
48
70
  if extra_info:
49
71
  data["info"].update(extra_info)
50
72