mini-swe-agent 1.17.5__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
  2. mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
  3. mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
  4. minisweagent/__init__.py +19 -26
  5. minisweagent/agents/default.py +128 -113
  6. minisweagent/agents/interactive.py +119 -58
  7. minisweagent/config/README.md +3 -4
  8. minisweagent/config/__init__.py +36 -1
  9. minisweagent/config/benchmarks/swebench.yaml +156 -0
  10. minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
  11. minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
  12. minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
  13. minisweagent/config/default.yaml +24 -21
  14. minisweagent/config/inspector.tcss +42 -0
  15. minisweagent/config/mini.yaml +53 -71
  16. minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
  17. minisweagent/environments/__init__.py +1 -0
  18. minisweagent/environments/docker.py +67 -20
  19. minisweagent/environments/extra/bubblewrap.py +86 -47
  20. minisweagent/environments/extra/swerex_docker.py +53 -20
  21. minisweagent/environments/extra/swerex_modal.py +90 -0
  22. minisweagent/environments/local.py +62 -21
  23. minisweagent/environments/singularity.py +59 -18
  24. minisweagent/exceptions.py +22 -0
  25. minisweagent/models/__init__.py +6 -7
  26. minisweagent/models/extra/roulette.py +20 -17
  27. minisweagent/models/litellm_model.py +90 -44
  28. minisweagent/models/litellm_response_model.py +80 -0
  29. minisweagent/models/litellm_textbased_model.py +45 -0
  30. minisweagent/models/openrouter_model.py +87 -45
  31. minisweagent/models/openrouter_response_model.py +123 -0
  32. minisweagent/models/openrouter_textbased_model.py +76 -0
  33. minisweagent/models/portkey_model.py +84 -42
  34. minisweagent/models/portkey_response_model.py +163 -0
  35. minisweagent/models/requesty_model.py +91 -41
  36. minisweagent/models/test_models.py +246 -19
  37. minisweagent/models/utils/actions_text.py +60 -0
  38. minisweagent/models/utils/actions_toolcall.py +102 -0
  39. minisweagent/models/utils/actions_toolcall_response.py +110 -0
  40. minisweagent/models/utils/anthropic_utils.py +28 -0
  41. minisweagent/models/utils/cache_control.py +15 -2
  42. minisweagent/models/utils/content_string.py +74 -0
  43. minisweagent/models/utils/openai_multimodal.py +50 -0
  44. minisweagent/models/utils/retry.py +25 -0
  45. minisweagent/run/benchmarks/__init__.py +1 -0
  46. minisweagent/run/{extra → benchmarks}/swebench.py +56 -35
  47. minisweagent/run/{extra → benchmarks}/swebench_single.py +36 -26
  48. minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
  49. minisweagent/run/hello_world.py +6 -0
  50. minisweagent/run/mini.py +54 -63
  51. minisweagent/run/utilities/__init__.py +1 -0
  52. minisweagent/run/{extra → utilities}/config.py +2 -0
  53. minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
  54. minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
  55. minisweagent/utils/serialize.py +26 -0
  56. mini_swe_agent-1.17.5.dist-info/RECORD +0 -61
  57. mini_swe_agent-1.17.5.dist-info/entry_points.txt +0 -5
  58. minisweagent/agents/interactive_textual.py +0 -450
  59. minisweagent/config/extra/swebench_roulette.yaml +0 -233
  60. minisweagent/config/mini.tcss +0 -86
  61. minisweagent/models/anthropic.py +0 -35
  62. minisweagent/models/litellm_response_api_model.py +0 -82
  63. minisweagent/models/portkey_response_api_model.py +0 -75
  64. minisweagent/models/utils/key_per_thread.py +0 -20
  65. minisweagent/models/utils/openai_utils.py +0 -41
  66. minisweagent/run/github_issue.py +0 -87
  67. minisweagent/run/utils/__init__.py +0 -0
  68. minisweagent/run/utils/save.py +0 -78
  69. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +0 -0
  70. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
  71. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
  72. /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
  73. /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
@@ -0,0 +1,156 @@
1
+ agent:
2
+ system_template: |
3
+ You are a helpful assistant that can interact with a computer shell to solve programming tasks.
4
+ instance_template: |
5
+ <pr_description>
6
+ Consider the following PR description:
7
+ {{task}}
8
+ </pr_description>
9
+
10
+ <instructions>
11
+ # Task Instructions
12
+
13
+ ## Overview
14
+
15
+ You're a software engineer interacting continuously with a computer by submitting commands.
16
+ You'll be helping implement necessary changes to meet requirements in the PR description.
17
+ Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
18
+ <IMPORTANT>This is an interactive process where you will think and issue AT LEAST ONE command, see the result, then think and issue your next command(s).</important>
19
+
20
+ For each response:
21
+
22
+ 1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
23
+ 2. Provide exactly ONE bash command to execute
24
+
25
+ ## Important Boundaries
26
+
27
+ - MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
28
+ - DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
29
+
30
+ ## Recommended Workflow
31
+
32
+ 1. Analyze the codebase by finding and reading relevant files
33
+ 2. Create a script to reproduce the issue
34
+ 3. Edit the source code to resolve the issue
35
+ 4. Verify your fix works by running your script again
36
+ 5. Test edge cases to ensure your fix is robust
37
+
38
+ ## Command Execution Rules
39
+
40
+ You are operating in an environment where
41
+
42
+ 1. You issue at least one command
43
+ 3. The system executes the command(s) in a subshell
44
+ 4. You see the result(s)
45
+ 5. You write your next command(s)
46
+
47
+ Each response should include:
48
+
49
+ 1. **Reasoning text** where you explain your analysis and plan
50
+ 2. At least one tool call with your command
51
+
52
+ **CRITICAL REQUIREMENTS:**
53
+
54
+ - Your response SHOULD include reasoning text explaining what you're doing
55
+ - Your response MUST include AT LEAST ONE bash tool call
56
+ - Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
57
+ - However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
58
+
59
+ Example of a CORRECT response:
60
+ <example_response>
61
+ I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
62
+
63
+ [Makes bash tool call with {"command": "ls -la"} as arguments]
64
+ </example_response>
65
+
66
+ ## Environment Details
67
+
68
+ - You have a full Linux shell environment
69
+ - Always use non-interactive flags (-y, -f) for commands
70
+ - Avoid interactive tools like vi, nano, or any that require user input
71
+ - You can use bash commands or invoke any tool that is available in the environment
72
+ - You can also create new tools or scripts to help you with the task
73
+ - If a tool isn't available, you can also install it
74
+
75
+ ## Submission
76
+
77
+ When you've completed your work, you MUST submit your changes as a git patch.
78
+ Follow these steps IN ORDER, with SEPARATE commands:
79
+
80
+ Step 1: Create the patch file
81
+ Run `git diff -- path/to/file1 path/to/file2 > patch.txt` listing only the source files you modified.
82
+ Do NOT commit your changes.
83
+
84
+ <IMPORTANT>
85
+ The patch must only contain changes to the specific source files you modified to fix the issue.
86
+ Do not submit file creations or changes to any of the following files:
87
+
88
+ - test and reproduction files
89
+ - helper scripts, tests, or tools that you created
90
+ - installation, build, packaging, configuration, or setup scripts unless they are directly part of the issue you were fixing (you can assume that the environment is already set up for your client)
91
+ - binary or compiled files
92
+ </IMPORTANT>
93
+
94
+ Step 2: Verify your patch
95
+ Inspect patch.txt to confirm it only contains your intended changes and headers show `--- a/` and `+++ b/` paths.
96
+
97
+ Step 3: Submit (EXACT command required)
98
+ You MUST use this EXACT command to submit:
99
+
100
+ ```bash
101
+ echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && cat patch.txt
102
+ ```
103
+
104
+ If the command fails (nonzero exit status), it will not submit.
105
+
106
+ <CRITICAL>
107
+ - Creating/viewing the patch and submitting it MUST be separate commands (not combined with &&).
108
+ - If you modify patch.txt after verifying, you SHOULD verify again before submitting.
109
+ - You CANNOT continue working (reading, editing, testing) in any way on this task after submitting.
110
+ </CRITICAL>
111
+ </instructions>
112
+ step_limit: 250
113
+ cost_limit: 3.
114
+
115
+ environment:
116
+ cwd: "/testbed"
117
+ timeout: 60
118
+ env:
119
+ PAGER: cat
120
+ MANPAGER: cat
121
+ LESS: -R
122
+ PIP_PROGRESS_BAR: 'off'
123
+ TQDM_DISABLE: '1'
124
+ environment_class: docker
125
+
126
+ model:
127
+ observation_template: |
128
+ {%- if output.output | length < 10000 -%}
129
+ {
130
+ "returncode": {{ output.returncode }},
131
+ "output": {{ output.output | tojson }}
132
+ {%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %}
133
+ }
134
+ {%- else -%}
135
+ {
136
+ "returncode": {{ output.returncode }},
137
+ "output_head": {{ output.output[:5000] | tojson }},
138
+ "output_tail": {{ output.output[-5000:] | tojson }},
139
+ "elided_chars": {{ output.output | length - 10000 }},
140
+ "warning": "Output too long."
141
+ {%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %}
142
+ }
143
+ {%- endif -%}
144
+ format_error_template: |
145
+ Tool call error. Every response needs to use the 'bash' tool at least once to execute commands.
146
+
147
+ Call the bash tool with your command as the argument:
148
+ - Tool: bash
149
+ - Arguments: {"command": "your_command_here"}
150
+
151
+ If you have completed your assignment, please consult the first message about how to
152
+ submit your solution (you will not be able to continue working on this task after that).
153
+ model_name: "anthropic/claude-sonnet-4-5-20250929"
154
+ model_kwargs:
155
+ drop_params: true
156
+ temperature: 0.0
@@ -9,7 +9,7 @@ agent:
9
9
  <format_example>
10
10
  THOUGHT: Your reasoning and analysis here
11
11
 
12
- ```bash
12
+ ```mswea_bash_command
13
13
  your_command_here
14
14
  ```
15
15
  </format_example>
@@ -25,21 +25,25 @@ agent:
25
25
  # Task Instructions
26
26
 
27
27
  ## Overview
28
+
28
29
  You're a software engineer interacting continuously with a computer by submitting commands.
29
30
  You'll be helping implement necessary changes to meet requirements in the PR description.
30
31
  Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
31
32
 
32
- IMPORTANT: This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.
33
+ <IMPORTANT>This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.</IMPORTANT>
33
34
 
34
35
  For each response:
36
+
35
37
  1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
36
38
  2. Provide exactly ONE bash command to execute
37
39
 
38
40
  ## Important Boundaries
41
+
39
42
  - MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
40
43
  - DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
41
44
 
42
45
  ## Recommended Workflow
46
+
43
47
  1. Analyze the codebase by finding and reading relevant files
44
48
  2. Create a script to reproduce the issue
45
49
  3. Edit the source code to resolve the issue
@@ -47,34 +51,38 @@ agent:
47
51
  5. Test edge cases to ensure your fix is robust
48
52
 
49
53
  ## Command Execution Rules
54
+
50
55
  You are operating in an environment where
56
+
51
57
  1. You write a single command
52
58
  2. The system executes that command in a subshell
53
59
  3. You see the result
54
60
  4. You write your next command
55
61
 
56
62
  Each response should include:
63
+
57
64
  1. A **THOUGHT** section where you explain your reasoning and plan
58
65
  2. A single bash code block with your command
59
66
 
60
- Format your responses like this:
67
+ Format your responses like demonstrated within the <format_example> block:
61
68
 
62
69
  <format_example>
63
70
  THOUGHT: Here I explain my reasoning process, analysis of the current situation,
64
71
  and what I'm trying to accomplish with the command below.
65
72
 
66
- ```bash
73
+ ```mswea_bash_command
67
74
  your_command_here
68
75
  ```
69
76
  </format_example>
70
77
 
71
78
  Commands must be specified in a single bash code block:
72
79
 
73
- ```bash
80
+ ```mswea_bash_command
74
81
  your_command_here
75
82
  ```
76
83
 
77
84
  **CRITICAL REQUIREMENTS:**
85
+
78
86
  - Your response SHOULD include a THOUGHT section explaining your reasoning
79
87
  - Your response MUST include EXACTLY ONE bash code block
80
88
  - This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
@@ -87,89 +95,101 @@ agent:
87
95
  <example_response>
88
96
  THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
89
97
 
90
- ```bash
98
+ ```mswea_bash_command
91
99
  ls -la
92
100
  ```
93
101
  </example_response>
94
102
 
95
103
  Example of an INCORRECT response:
104
+
96
105
  <example_response>
97
106
  THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
98
107
 
99
- ```bash
108
+ ```mswea_bash_command
100
109
  ls -la
101
110
  ```
102
111
 
103
112
  Now I'll read the file:
104
113
 
105
- ```bash
114
+ ```mswea_bash_command
106
115
  cat file.txt
107
116
  ```
108
117
  </example_response>
109
118
 
110
119
  If you need to run multiple commands, either:
120
+
111
121
  1. Combine them in one block using && or ||
112
- ```bash
122
+ ```mswea_bash_command
113
123
  command1 && command2 || echo "Error occurred"
114
124
  ```
115
125
 
116
126
  2. Wait for the first command to complete, see its output, then issue the next command in your following response.
117
127
 
118
128
  ## Environment Details
129
+
119
130
  - You have a full Linux shell environment
120
131
  - Always use non-interactive flags (-y, -f) for commands
121
132
  - Avoid interactive tools like vi, nano, or any that require user input
122
- - If a command isn't available, you can install it
133
+ - You can use bash commands or invoke any tool that is available in the environment
134
+ - You can also create new tools or scripts to help you with the task
135
+ - If a tool isn't available, you can also install it
123
136
 
124
- ## Useful Command Examples
137
+ ## Submission
125
138
 
126
- ### Create a new file:
127
- ```bash
128
- cat <<'EOF' > newfile.py
129
- import numpy as np
130
- hello = "world"
131
- print(hello)
132
- EOF
133
- ```
139
+ When you've completed your work, you MUST submit your changes as a git patch.
140
+ Follow these steps IN ORDER, with SEPARATE commands:
134
141
 
135
- ### Edit files with sed:
136
- ```bash
137
- # Replace all occurrences
138
- sed -i 's/old_string/new_string/g' filename.py
142
+ Step 1: Create the patch file
143
+ Run `git diff -- path/to/file1 path/to/file2 > patch.txt` listing only the source files you modified.
144
+ Do NOT commit your changes.
139
145
 
140
- # Replace only first occurrence
141
- sed -i 's/old_string/new_string/' filename.py
146
+ <IMPORTANT>
147
+ The patch must only contain changes to the specific source files you modified to fix the issue.
148
+ Do not submit file creations or changes to any of the following files:
142
149
 
143
- # Replace first occurrence on line 1
144
- sed -i '1s/old_string/new_string/' filename.py
150
+ - test and reproduction files
151
+ - helper scripts, tests, or tools that you created
152
+ - installation, build, packaging, configuration, or setup scripts unless they are directly part of the issue you were fixing (you can assume that the environment is already set up for your client)
153
+ - binary or compiled files
154
+ </IMPORTANT>
145
155
 
146
- # Replace all occurrences in lines 1-10
147
- sed -i '1,10s/old_string/new_string/g' filename.py
148
- ```
156
+ Step 2: Verify your patch
157
+ Inspect patch.txt to confirm it only contains your intended changes and headers show `--- a/` and `+++ b/` paths.
149
158
 
150
- ### View file content:
151
- ```bash
152
- # View specific lines with numbers
153
- nl -ba filename.py | sed -n '10,20p'
154
- ```
159
+ Step 3: Submit (EXACT command required)
160
+ You MUST use this EXACT command to submit:
155
161
 
156
- ### Any other command you want to run
157
- ```bash
158
- anything
162
+ ```mswea_bash_command
163
+ echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && cat patch.txt
159
164
  ```
160
165
 
161
- ## Submission
162
- When you've completed your work (reading, editing, testing), and cannot make further progress
163
- issue exactly the following command:
164
-
165
- ```bash
166
- echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached
167
- ```
166
+ If the command fails (nonzero exit status), it will not submit.
168
167
 
169
- This command will submit your work.
170
- You cannot continue working (reading, editing, testing) in any way on this task after submitting.
168
+ <CRITICAL>
169
+ - Creating/viewing the patch and submitting it MUST be separate commands (not combined with &&).
170
+ - If you modify patch.txt after verifying, you SHOULD verify again before submitting.
171
+ - You CANNOT continue working (reading, editing, testing) in any way on this task after submitting.
172
+ </CRITICAL>
171
173
  </instructions>
172
- action_observation_template: |
174
+ step_limit: 250
175
+ cost_limit: 3.
176
+
177
+ environment:
178
+ cwd: "/testbed"
179
+ timeout: 60
180
+ env:
181
+ PAGER: cat
182
+ MANPAGER: cat
183
+ LESS: -R
184
+ PIP_PROGRESS_BAR: 'off'
185
+ TQDM_DISABLE: '1'
186
+ environment_class: docker
187
+
188
+ model:
189
+ observation_template: |
190
+ {% if output.exception_info -%}
191
+ <exception>{{output.exception_info}}</exception>
192
+ {% endif -%}
173
193
  <returncode>{{output.returncode}}</returncode>
174
194
  {% if output.output | length < 10000 -%}
175
195
  <output>
@@ -202,28 +222,13 @@ agent:
202
222
  <response_example>
203
223
  Here are some thoughts about why you want to perform the action.
204
224
 
205
- ```bash
225
+ ```mswea_bash_command
206
226
  <action>
207
227
  ```
208
228
  </response_example>
209
229
 
210
230
  If you have completed your assignment, please consult the first message about how to
211
231
  submit your solution (you will not be able to continue working on this task after that).
212
- step_limit: 250
213
- cost_limit: 3.
214
-
215
- environment:
216
- cwd: "/testbed"
217
- timeout: 60
218
- env:
219
- PAGER: cat
220
- MANPAGER: cat
221
- LESS: -R
222
- PIP_PROGRESS_BAR: 'off'
223
- TQDM_DISABLE: '1'
224
- environment_class: docker
225
-
226
- model:
227
232
  model_name: "anthropic/claude-sonnet-4-5-20250929"
228
233
  model_kwargs:
229
234
  drop_params: true
@@ -0,0 +1,47 @@
1
+ # SWE-bench Modal environment configuration
2
+ #
3
+ # This config provides Modal cloud environment settings for SWE-bench runs.
4
+ # It should be used together with the base swebench config:
5
+ #
6
+ # Usage:
7
+ # mini-extra swebench -c swebench -c swebench_modal [other options]
8
+ #
9
+ # The configs are merged, with swebench_modal overriding the environment
10
+ # settings to use Modal instead of Docker.
11
+ #
12
+ # Requirements:
13
+ # 1. Run `modal setup` to authenticate with Modal
14
+ # 2. Install the modal extra: `pip install mini-swe-agent[modal]`
15
+
16
+ # Modal environment configuration via SWE-ReX
17
+ # The image will be dynamically set per-instance to match SWE-bench docker images
18
+ environment:
19
+ environment_class: swerex_modal
20
+ cwd: "/testbed"
21
+ timeout: 60
22
+ env:
23
+ PAGER: cat
24
+ MANPAGER: cat
25
+ LESS: -R
26
+ PIP_PROGRESS_BAR: 'off'
27
+ TQDM_DISABLE: '1'
28
+ # Modal-specific settings
29
+ startup_timeout: 600.0 # 10 min to allow image building on first run
30
+ runtime_timeout: 1800.0 # 30 minutes max per instance
31
+ deployment_timeout: 1800.0
32
+ install_pipx: true
33
+ # Environment variables for the sandbox
34
+ modal_sandbox_kwargs: {}
35
+ # You can add Modal-specific options here, e.g.:
36
+ # cpu: 2.0
37
+ # memory: 4096
38
+
39
+ # GPT-5 mini via Portkey
40
+ model:
41
+ model_name: "gpt-5-mini"
42
+ model_class: portkey
43
+ provider: "@openai"
44
+ model_kwargs:
45
+ drop_params: true
46
+ # GPT-5 mini only supports temperature=1.0
47
+ # temperature: 0.0