mini-swe-agent 1.17.5__py3-none-any.whl → 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
- mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
- mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
- minisweagent/__init__.py +19 -26
- minisweagent/agents/default.py +128 -113
- minisweagent/agents/interactive.py +119 -58
- minisweagent/config/README.md +3 -4
- minisweagent/config/__init__.py +36 -1
- minisweagent/config/benchmarks/swebench.yaml +156 -0
- minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
- minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
- minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
- minisweagent/config/default.yaml +24 -21
- minisweagent/config/inspector.tcss +42 -0
- minisweagent/config/mini.yaml +53 -71
- minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
- minisweagent/environments/__init__.py +1 -0
- minisweagent/environments/docker.py +67 -20
- minisweagent/environments/extra/bubblewrap.py +86 -47
- minisweagent/environments/extra/swerex_docker.py +53 -20
- minisweagent/environments/extra/swerex_modal.py +90 -0
- minisweagent/environments/local.py +62 -21
- minisweagent/environments/singularity.py +59 -18
- minisweagent/exceptions.py +22 -0
- minisweagent/models/__init__.py +6 -7
- minisweagent/models/extra/roulette.py +20 -17
- minisweagent/models/litellm_model.py +90 -44
- minisweagent/models/litellm_response_model.py +80 -0
- minisweagent/models/litellm_textbased_model.py +45 -0
- minisweagent/models/openrouter_model.py +87 -45
- minisweagent/models/openrouter_response_model.py +123 -0
- minisweagent/models/openrouter_textbased_model.py +76 -0
- minisweagent/models/portkey_model.py +84 -42
- minisweagent/models/portkey_response_model.py +163 -0
- minisweagent/models/requesty_model.py +91 -41
- minisweagent/models/test_models.py +246 -19
- minisweagent/models/utils/actions_text.py +60 -0
- minisweagent/models/utils/actions_toolcall.py +102 -0
- minisweagent/models/utils/actions_toolcall_response.py +110 -0
- minisweagent/models/utils/anthropic_utils.py +28 -0
- minisweagent/models/utils/cache_control.py +15 -2
- minisweagent/models/utils/content_string.py +74 -0
- minisweagent/models/utils/openai_multimodal.py +50 -0
- minisweagent/models/utils/retry.py +25 -0
- minisweagent/run/benchmarks/__init__.py +1 -0
- minisweagent/run/{extra → benchmarks}/swebench.py +56 -35
- minisweagent/run/{extra → benchmarks}/swebench_single.py +36 -26
- minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
- minisweagent/run/hello_world.py +6 -0
- minisweagent/run/mini.py +54 -63
- minisweagent/run/utilities/__init__.py +1 -0
- minisweagent/run/{extra → utilities}/config.py +2 -0
- minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
- minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
- minisweagent/utils/serialize.py +26 -0
- mini_swe_agent-1.17.5.dist-info/RECORD +0 -61
- mini_swe_agent-1.17.5.dist-info/entry_points.txt +0 -5
- minisweagent/agents/interactive_textual.py +0 -450
- minisweagent/config/extra/swebench_roulette.yaml +0 -233
- minisweagent/config/mini.tcss +0 -86
- minisweagent/models/anthropic.py +0 -35
- minisweagent/models/litellm_response_api_model.py +0 -82
- minisweagent/models/portkey_response_api_model.py +0 -75
- minisweagent/models/utils/key_per_thread.py +0 -20
- minisweagent/models/utils/openai_utils.py +0 -41
- minisweagent/run/github_issue.py +0 -87
- minisweagent/run/utils/__init__.py +0 -0
- minisweagent/run/utils/save.py +0 -78
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +0 -0
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
- /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
- /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
agent:
|
|
2
|
+
system_template: |
|
|
3
|
+
You are a helpful assistant that can interact with a computer shell to solve programming tasks.
|
|
4
|
+
instance_template: |
|
|
5
|
+
<pr_description>
|
|
6
|
+
Consider the following PR description:
|
|
7
|
+
{{task}}
|
|
8
|
+
</pr_description>
|
|
9
|
+
|
|
10
|
+
<instructions>
|
|
11
|
+
# Task Instructions
|
|
12
|
+
|
|
13
|
+
## Overview
|
|
14
|
+
|
|
15
|
+
You're a software engineer interacting continuously with a computer by submitting commands.
|
|
16
|
+
You'll be helping implement necessary changes to meet requirements in the PR description.
|
|
17
|
+
Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
|
|
18
|
+
<IMPORTANT>This is an interactive process where you will think and issue AT LEAST ONE command, see the result, then think and issue your next command(s).</important>
|
|
19
|
+
|
|
20
|
+
For each response:
|
|
21
|
+
|
|
22
|
+
1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
|
|
23
|
+
2. Provide exactly ONE bash command to execute
|
|
24
|
+
|
|
25
|
+
## Important Boundaries
|
|
26
|
+
|
|
27
|
+
- MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
|
|
28
|
+
- DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
|
|
29
|
+
|
|
30
|
+
## Recommended Workflow
|
|
31
|
+
|
|
32
|
+
1. Analyze the codebase by finding and reading relevant files
|
|
33
|
+
2. Create a script to reproduce the issue
|
|
34
|
+
3. Edit the source code to resolve the issue
|
|
35
|
+
4. Verify your fix works by running your script again
|
|
36
|
+
5. Test edge cases to ensure your fix is robust
|
|
37
|
+
|
|
38
|
+
## Command Execution Rules
|
|
39
|
+
|
|
40
|
+
You are operating in an environment where
|
|
41
|
+
|
|
42
|
+
1. You issue at least one command
|
|
43
|
+
3. The system executes the command(s) in a subshell
|
|
44
|
+
4. You see the result(s)
|
|
45
|
+
5. You write your next command(s)
|
|
46
|
+
|
|
47
|
+
Each response should include:
|
|
48
|
+
|
|
49
|
+
1. **Reasoning text** where you explain your analysis and plan
|
|
50
|
+
2. At least one tool call with your command
|
|
51
|
+
|
|
52
|
+
**CRITICAL REQUIREMENTS:**
|
|
53
|
+
|
|
54
|
+
- Your response SHOULD include reasoning text explaining what you're doing
|
|
55
|
+
- Your response MUST include AT LEAST ONE bash tool call
|
|
56
|
+
- Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
|
|
57
|
+
- However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
|
|
58
|
+
|
|
59
|
+
Example of a CORRECT response:
|
|
60
|
+
<example_response>
|
|
61
|
+
I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
|
|
62
|
+
|
|
63
|
+
[Makes bash tool call with {"command": "ls -la"} as arguments]
|
|
64
|
+
</example_response>
|
|
65
|
+
|
|
66
|
+
## Environment Details
|
|
67
|
+
|
|
68
|
+
- You have a full Linux shell environment
|
|
69
|
+
- Always use non-interactive flags (-y, -f) for commands
|
|
70
|
+
- Avoid interactive tools like vi, nano, or any that require user input
|
|
71
|
+
- You can use bash commands or invoke any tool that is available in the environment
|
|
72
|
+
- You can also create new tools or scripts to help you with the task
|
|
73
|
+
- If a tool isn't available, you can also install it
|
|
74
|
+
|
|
75
|
+
## Submission
|
|
76
|
+
|
|
77
|
+
When you've completed your work, you MUST submit your changes as a git patch.
|
|
78
|
+
Follow these steps IN ORDER, with SEPARATE commands:
|
|
79
|
+
|
|
80
|
+
Step 1: Create the patch file
|
|
81
|
+
Run `git diff -- path/to/file1 path/to/file2 > patch.txt` listing only the source files you modified.
|
|
82
|
+
Do NOT commit your changes.
|
|
83
|
+
|
|
84
|
+
<IMPORTANT>
|
|
85
|
+
The patch must only contain changes to the specific source files you modified to fix the issue.
|
|
86
|
+
Do not submit file creations or changes to any of the following files:
|
|
87
|
+
|
|
88
|
+
- test and reproduction files
|
|
89
|
+
- helper scripts, tests, or tools that you created
|
|
90
|
+
- installation, build, packaging, configuration, or setup scripts unless they are directly part of the issue you were fixing (you can assume that the environment is already set up for your client)
|
|
91
|
+
- binary or compiled files
|
|
92
|
+
</IMPORTANT>
|
|
93
|
+
|
|
94
|
+
Step 2: Verify your patch
|
|
95
|
+
Inspect patch.txt to confirm it only contains your intended changes and headers show `--- a/` and `+++ b/` paths.
|
|
96
|
+
|
|
97
|
+
Step 3: Submit (EXACT command required)
|
|
98
|
+
You MUST use this EXACT command to submit:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && cat patch.txt
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
If the command fails (nonzero exit status), it will not submit.
|
|
105
|
+
|
|
106
|
+
<CRITICAL>
|
|
107
|
+
- Creating/viewing the patch and submitting it MUST be separate commands (not combined with &&).
|
|
108
|
+
- If you modify patch.txt after verifying, you SHOULD verify again before submitting.
|
|
109
|
+
- You CANNOT continue working (reading, editing, testing) in any way on this task after submitting.
|
|
110
|
+
</CRITICAL>
|
|
111
|
+
</instructions>
|
|
112
|
+
step_limit: 250
|
|
113
|
+
cost_limit: 3.
|
|
114
|
+
|
|
115
|
+
environment:
|
|
116
|
+
cwd: "/testbed"
|
|
117
|
+
timeout: 60
|
|
118
|
+
env:
|
|
119
|
+
PAGER: cat
|
|
120
|
+
MANPAGER: cat
|
|
121
|
+
LESS: -R
|
|
122
|
+
PIP_PROGRESS_BAR: 'off'
|
|
123
|
+
TQDM_DISABLE: '1'
|
|
124
|
+
environment_class: docker
|
|
125
|
+
|
|
126
|
+
model:
|
|
127
|
+
observation_template: |
|
|
128
|
+
{%- if output.output | length < 10000 -%}
|
|
129
|
+
{
|
|
130
|
+
"returncode": {{ output.returncode }},
|
|
131
|
+
"output": {{ output.output | tojson }}
|
|
132
|
+
{%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %}
|
|
133
|
+
}
|
|
134
|
+
{%- else -%}
|
|
135
|
+
{
|
|
136
|
+
"returncode": {{ output.returncode }},
|
|
137
|
+
"output_head": {{ output.output[:5000] | tojson }},
|
|
138
|
+
"output_tail": {{ output.output[-5000:] | tojson }},
|
|
139
|
+
"elided_chars": {{ output.output | length - 10000 }},
|
|
140
|
+
"warning": "Output too long."
|
|
141
|
+
{%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %}
|
|
142
|
+
}
|
|
143
|
+
{%- endif -%}
|
|
144
|
+
format_error_template: |
|
|
145
|
+
Tool call error. Every response needs to use the 'bash' tool at least once to execute commands.
|
|
146
|
+
|
|
147
|
+
Call the bash tool with your command as the argument:
|
|
148
|
+
- Tool: bash
|
|
149
|
+
- Arguments: {"command": "your_command_here"}
|
|
150
|
+
|
|
151
|
+
If you have completed your assignment, please consult the first message about how to
|
|
152
|
+
submit your solution (you will not be able to continue working on this task after that).
|
|
153
|
+
model_name: "anthropic/claude-sonnet-4-5-20250929"
|
|
154
|
+
model_kwargs:
|
|
155
|
+
drop_params: true
|
|
156
|
+
temperature: 0.0
|
|
@@ -9,7 +9,7 @@ agent:
|
|
|
9
9
|
<format_example>
|
|
10
10
|
THOUGHT: Your reasoning and analysis here
|
|
11
11
|
|
|
12
|
-
```
|
|
12
|
+
```mswea_bash_command
|
|
13
13
|
your_command_here
|
|
14
14
|
```
|
|
15
15
|
</format_example>
|
|
@@ -25,21 +25,25 @@ agent:
|
|
|
25
25
|
# Task Instructions
|
|
26
26
|
|
|
27
27
|
## Overview
|
|
28
|
+
|
|
28
29
|
You're a software engineer interacting continuously with a computer by submitting commands.
|
|
29
30
|
You'll be helping implement necessary changes to meet requirements in the PR description.
|
|
30
31
|
Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
|
|
31
32
|
|
|
32
|
-
IMPORTANT
|
|
33
|
+
<IMPORTANT>This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.</IMPORTANT>
|
|
33
34
|
|
|
34
35
|
For each response:
|
|
36
|
+
|
|
35
37
|
1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
|
|
36
38
|
2. Provide exactly ONE bash command to execute
|
|
37
39
|
|
|
38
40
|
## Important Boundaries
|
|
41
|
+
|
|
39
42
|
- MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
|
|
40
43
|
- DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
|
|
41
44
|
|
|
42
45
|
## Recommended Workflow
|
|
46
|
+
|
|
43
47
|
1. Analyze the codebase by finding and reading relevant files
|
|
44
48
|
2. Create a script to reproduce the issue
|
|
45
49
|
3. Edit the source code to resolve the issue
|
|
@@ -47,34 +51,38 @@ agent:
|
|
|
47
51
|
5. Test edge cases to ensure your fix is robust
|
|
48
52
|
|
|
49
53
|
## Command Execution Rules
|
|
54
|
+
|
|
50
55
|
You are operating in an environment where
|
|
56
|
+
|
|
51
57
|
1. You write a single command
|
|
52
58
|
2. The system executes that command in a subshell
|
|
53
59
|
3. You see the result
|
|
54
60
|
4. You write your next command
|
|
55
61
|
|
|
56
62
|
Each response should include:
|
|
63
|
+
|
|
57
64
|
1. A **THOUGHT** section where you explain your reasoning and plan
|
|
58
65
|
2. A single bash code block with your command
|
|
59
66
|
|
|
60
|
-
Format your responses like
|
|
67
|
+
Format your responses like demonstrated within the <format_example> block:
|
|
61
68
|
|
|
62
69
|
<format_example>
|
|
63
70
|
THOUGHT: Here I explain my reasoning process, analysis of the current situation,
|
|
64
71
|
and what I'm trying to accomplish with the command below.
|
|
65
72
|
|
|
66
|
-
```
|
|
73
|
+
```mswea_bash_command
|
|
67
74
|
your_command_here
|
|
68
75
|
```
|
|
69
76
|
</format_example>
|
|
70
77
|
|
|
71
78
|
Commands must be specified in a single bash code block:
|
|
72
79
|
|
|
73
|
-
```
|
|
80
|
+
```mswea_bash_command
|
|
74
81
|
your_command_here
|
|
75
82
|
```
|
|
76
83
|
|
|
77
84
|
**CRITICAL REQUIREMENTS:**
|
|
85
|
+
|
|
78
86
|
- Your response SHOULD include a THOUGHT section explaining your reasoning
|
|
79
87
|
- Your response MUST include EXACTLY ONE bash code block
|
|
80
88
|
- This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
|
|
@@ -87,89 +95,101 @@ agent:
|
|
|
87
95
|
<example_response>
|
|
88
96
|
THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
|
|
89
97
|
|
|
90
|
-
```
|
|
98
|
+
```mswea_bash_command
|
|
91
99
|
ls -la
|
|
92
100
|
```
|
|
93
101
|
</example_response>
|
|
94
102
|
|
|
95
103
|
Example of an INCORRECT response:
|
|
104
|
+
|
|
96
105
|
<example_response>
|
|
97
106
|
THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
|
|
98
107
|
|
|
99
|
-
```
|
|
108
|
+
```mswea_bash_command
|
|
100
109
|
ls -la
|
|
101
110
|
```
|
|
102
111
|
|
|
103
112
|
Now I'll read the file:
|
|
104
113
|
|
|
105
|
-
```
|
|
114
|
+
```mswea_bash_command
|
|
106
115
|
cat file.txt
|
|
107
116
|
```
|
|
108
117
|
</example_response>
|
|
109
118
|
|
|
110
119
|
If you need to run multiple commands, either:
|
|
120
|
+
|
|
111
121
|
1. Combine them in one block using && or ||
|
|
112
|
-
```
|
|
122
|
+
```mswea_bash_command
|
|
113
123
|
command1 && command2 || echo "Error occurred"
|
|
114
124
|
```
|
|
115
125
|
|
|
116
126
|
2. Wait for the first command to complete, see its output, then issue the next command in your following response.
|
|
117
127
|
|
|
118
128
|
## Environment Details
|
|
129
|
+
|
|
119
130
|
- You have a full Linux shell environment
|
|
120
131
|
- Always use non-interactive flags (-y, -f) for commands
|
|
121
132
|
- Avoid interactive tools like vi, nano, or any that require user input
|
|
122
|
-
-
|
|
133
|
+
- You can use bash commands or invoke any tool that is available in the environment
|
|
134
|
+
- You can also create new tools or scripts to help you with the task
|
|
135
|
+
- If a tool isn't available, you can also install it
|
|
123
136
|
|
|
124
|
-
##
|
|
137
|
+
## Submission
|
|
125
138
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
cat <<'EOF' > newfile.py
|
|
129
|
-
import numpy as np
|
|
130
|
-
hello = "world"
|
|
131
|
-
print(hello)
|
|
132
|
-
EOF
|
|
133
|
-
```
|
|
139
|
+
When you've completed your work, you MUST submit your changes as a git patch.
|
|
140
|
+
Follow these steps IN ORDER, with SEPARATE commands:
|
|
134
141
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
sed -i 's/old_string/new_string/g' filename.py
|
|
142
|
+
Step 1: Create the patch file
|
|
143
|
+
Run `git diff -- path/to/file1 path/to/file2 > patch.txt` listing only the source files you modified.
|
|
144
|
+
Do NOT commit your changes.
|
|
139
145
|
|
|
140
|
-
|
|
141
|
-
|
|
146
|
+
<IMPORTANT>
|
|
147
|
+
The patch must only contain changes to the specific source files you modified to fix the issue.
|
|
148
|
+
Do not submit file creations or changes to any of the following files:
|
|
142
149
|
|
|
143
|
-
|
|
144
|
-
|
|
150
|
+
- test and reproduction files
|
|
151
|
+
- helper scripts, tests, or tools that you created
|
|
152
|
+
- installation, build, packaging, configuration, or setup scripts unless they are directly part of the issue you were fixing (you can assume that the environment is already set up for your client)
|
|
153
|
+
- binary or compiled files
|
|
154
|
+
</IMPORTANT>
|
|
145
155
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
```
|
|
156
|
+
Step 2: Verify your patch
|
|
157
|
+
Inspect patch.txt to confirm it only contains your intended changes and headers show `--- a/` and `+++ b/` paths.
|
|
149
158
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
# View specific lines with numbers
|
|
153
|
-
nl -ba filename.py | sed -n '10,20p'
|
|
154
|
-
```
|
|
159
|
+
Step 3: Submit (EXACT command required)
|
|
160
|
+
You MUST use this EXACT command to submit:
|
|
155
161
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
anything
|
|
162
|
+
```mswea_bash_command
|
|
163
|
+
echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && cat patch.txt
|
|
159
164
|
```
|
|
160
165
|
|
|
161
|
-
|
|
162
|
-
When you've completed your work (reading, editing, testing), and cannot make further progress
|
|
163
|
-
issue exactly the following command:
|
|
164
|
-
|
|
165
|
-
```bash
|
|
166
|
-
echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached
|
|
167
|
-
```
|
|
166
|
+
If the command fails (nonzero exit status), it will not submit.
|
|
168
167
|
|
|
169
|
-
|
|
170
|
-
|
|
168
|
+
<CRITICAL>
|
|
169
|
+
- Creating/viewing the patch and submitting it MUST be separate commands (not combined with &&).
|
|
170
|
+
- If you modify patch.txt after verifying, you SHOULD verify again before submitting.
|
|
171
|
+
- You CANNOT continue working (reading, editing, testing) in any way on this task after submitting.
|
|
172
|
+
</CRITICAL>
|
|
171
173
|
</instructions>
|
|
172
|
-
|
|
174
|
+
step_limit: 250
|
|
175
|
+
cost_limit: 3.
|
|
176
|
+
|
|
177
|
+
environment:
|
|
178
|
+
cwd: "/testbed"
|
|
179
|
+
timeout: 60
|
|
180
|
+
env:
|
|
181
|
+
PAGER: cat
|
|
182
|
+
MANPAGER: cat
|
|
183
|
+
LESS: -R
|
|
184
|
+
PIP_PROGRESS_BAR: 'off'
|
|
185
|
+
TQDM_DISABLE: '1'
|
|
186
|
+
environment_class: docker
|
|
187
|
+
|
|
188
|
+
model:
|
|
189
|
+
observation_template: |
|
|
190
|
+
{% if output.exception_info -%}
|
|
191
|
+
<exception>{{output.exception_info}}</exception>
|
|
192
|
+
{% endif -%}
|
|
173
193
|
<returncode>{{output.returncode}}</returncode>
|
|
174
194
|
{% if output.output | length < 10000 -%}
|
|
175
195
|
<output>
|
|
@@ -202,28 +222,13 @@ agent:
|
|
|
202
222
|
<response_example>
|
|
203
223
|
Here are some thoughts about why you want to perform the action.
|
|
204
224
|
|
|
205
|
-
```
|
|
225
|
+
```mswea_bash_command
|
|
206
226
|
<action>
|
|
207
227
|
```
|
|
208
228
|
</response_example>
|
|
209
229
|
|
|
210
230
|
If you have completed your assignment, please consult the first message about how to
|
|
211
231
|
submit your solution (you will not be able to continue working on this task after that).
|
|
212
|
-
step_limit: 250
|
|
213
|
-
cost_limit: 3.
|
|
214
|
-
|
|
215
|
-
environment:
|
|
216
|
-
cwd: "/testbed"
|
|
217
|
-
timeout: 60
|
|
218
|
-
env:
|
|
219
|
-
PAGER: cat
|
|
220
|
-
MANPAGER: cat
|
|
221
|
-
LESS: -R
|
|
222
|
-
PIP_PROGRESS_BAR: 'off'
|
|
223
|
-
TQDM_DISABLE: '1'
|
|
224
|
-
environment_class: docker
|
|
225
|
-
|
|
226
|
-
model:
|
|
227
232
|
model_name: "anthropic/claude-sonnet-4-5-20250929"
|
|
228
233
|
model_kwargs:
|
|
229
234
|
drop_params: true
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# SWE-bench Modal environment configuration
|
|
2
|
+
#
|
|
3
|
+
# This config provides Modal cloud environment settings for SWE-bench runs.
|
|
4
|
+
# It should be used together with the base swebench config:
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# mini-extra swebench -c swebench -c swebench_modal [other options]
|
|
8
|
+
#
|
|
9
|
+
# The configs are merged, with swebench_modal overriding the environment
|
|
10
|
+
# settings to use Modal instead of Docker.
|
|
11
|
+
#
|
|
12
|
+
# Requirements:
|
|
13
|
+
# 1. Run `modal setup` to authenticate with Modal
|
|
14
|
+
# 2. Install the modal extra: `pip install mini-swe-agent[modal]`
|
|
15
|
+
|
|
16
|
+
# Modal environment configuration via SWE-ReX
|
|
17
|
+
# The image will be dynamically set per-instance to match SWE-bench docker images
|
|
18
|
+
environment:
|
|
19
|
+
environment_class: swerex_modal
|
|
20
|
+
cwd: "/testbed"
|
|
21
|
+
timeout: 60
|
|
22
|
+
env:
|
|
23
|
+
PAGER: cat
|
|
24
|
+
MANPAGER: cat
|
|
25
|
+
LESS: -R
|
|
26
|
+
PIP_PROGRESS_BAR: 'off'
|
|
27
|
+
TQDM_DISABLE: '1'
|
|
28
|
+
# Modal-specific settings
|
|
29
|
+
startup_timeout: 600.0 # 10 min to allow image building on first run
|
|
30
|
+
runtime_timeout: 1800.0 # 30 minutes max per instance
|
|
31
|
+
deployment_timeout: 1800.0
|
|
32
|
+
install_pipx: true
|
|
33
|
+
# Environment variables for the sandbox
|
|
34
|
+
modal_sandbox_kwargs: {}
|
|
35
|
+
# You can add Modal-specific options here, e.g.:
|
|
36
|
+
# cpu: 2.0
|
|
37
|
+
# memory: 4096
|
|
38
|
+
|
|
39
|
+
# GPT-5 mini via Portkey
|
|
40
|
+
model:
|
|
41
|
+
model_name: "gpt-5-mini"
|
|
42
|
+
model_class: portkey
|
|
43
|
+
provider: "@openai"
|
|
44
|
+
model_kwargs:
|
|
45
|
+
drop_params: true
|
|
46
|
+
# GPT-5 mini only supports temperature=1.0
|
|
47
|
+
# temperature: 0.0
|