mini-swe-agent 1.17.5__py3-none-any.whl → 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
- mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
- mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
- minisweagent/__init__.py +19 -26
- minisweagent/agents/default.py +128 -113
- minisweagent/agents/interactive.py +119 -58
- minisweagent/config/README.md +3 -4
- minisweagent/config/__init__.py +36 -1
- minisweagent/config/benchmarks/swebench.yaml +156 -0
- minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
- minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
- minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
- minisweagent/config/default.yaml +24 -21
- minisweagent/config/inspector.tcss +42 -0
- minisweagent/config/mini.yaml +53 -71
- minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
- minisweagent/environments/__init__.py +1 -0
- minisweagent/environments/docker.py +67 -20
- minisweagent/environments/extra/bubblewrap.py +86 -47
- minisweagent/environments/extra/swerex_docker.py +53 -20
- minisweagent/environments/extra/swerex_modal.py +90 -0
- minisweagent/environments/local.py +62 -21
- minisweagent/environments/singularity.py +59 -18
- minisweagent/exceptions.py +22 -0
- minisweagent/models/__init__.py +6 -7
- minisweagent/models/extra/roulette.py +20 -17
- minisweagent/models/litellm_model.py +90 -44
- minisweagent/models/litellm_response_model.py +80 -0
- minisweagent/models/litellm_textbased_model.py +45 -0
- minisweagent/models/openrouter_model.py +87 -45
- minisweagent/models/openrouter_response_model.py +123 -0
- minisweagent/models/openrouter_textbased_model.py +76 -0
- minisweagent/models/portkey_model.py +84 -42
- minisweagent/models/portkey_response_model.py +163 -0
- minisweagent/models/requesty_model.py +91 -41
- minisweagent/models/test_models.py +246 -19
- minisweagent/models/utils/actions_text.py +60 -0
- minisweagent/models/utils/actions_toolcall.py +102 -0
- minisweagent/models/utils/actions_toolcall_response.py +110 -0
- minisweagent/models/utils/anthropic_utils.py +28 -0
- minisweagent/models/utils/cache_control.py +15 -2
- minisweagent/models/utils/content_string.py +74 -0
- minisweagent/models/utils/openai_multimodal.py +50 -0
- minisweagent/models/utils/retry.py +25 -0
- minisweagent/run/benchmarks/__init__.py +1 -0
- minisweagent/run/{extra → benchmarks}/swebench.py +56 -35
- minisweagent/run/{extra → benchmarks}/swebench_single.py +36 -26
- minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
- minisweagent/run/hello_world.py +6 -0
- minisweagent/run/mini.py +54 -63
- minisweagent/run/utilities/__init__.py +1 -0
- minisweagent/run/{extra → utilities}/config.py +2 -0
- minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
- minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
- minisweagent/utils/serialize.py +26 -0
- mini_swe_agent-1.17.5.dist-info/RECORD +0 -61
- mini_swe_agent-1.17.5.dist-info/entry_points.txt +0 -5
- minisweagent/agents/interactive_textual.py +0 -450
- minisweagent/config/extra/swebench_roulette.yaml +0 -233
- minisweagent/config/mini.tcss +0 -86
- minisweagent/models/anthropic.py +0 -35
- minisweagent/models/litellm_response_api_model.py +0 -82
- minisweagent/models/portkey_response_api_model.py +0 -75
- minisweagent/models/utils/key_per_thread.py +0 -20
- minisweagent/models/utils/openai_utils.py +0 -41
- minisweagent/run/github_issue.py +0 -87
- minisweagent/run/utils/__init__.py +0 -0
- minisweagent/run/utils/save.py +0 -78
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +0 -0
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
- /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
- /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
|
@@ -9,7 +9,7 @@ agent:
|
|
|
9
9
|
<format_example>
|
|
10
10
|
THOUGHT: Your reasoning and analysis here
|
|
11
11
|
|
|
12
|
-
<
|
|
12
|
+
<mswea_bash_command>your_command_here</mswea_bash_command>
|
|
13
13
|
</format_example>
|
|
14
14
|
|
|
15
15
|
Failure to follow these rules will cause your response to be rejected.
|
|
@@ -23,21 +23,25 @@ agent:
|
|
|
23
23
|
# Task Instructions
|
|
24
24
|
|
|
25
25
|
## Overview
|
|
26
|
+
|
|
26
27
|
You're a software engineer interacting continuously with a computer by submitting commands.
|
|
27
28
|
You'll be helping implement necessary changes to meet requirements in the PR description.
|
|
28
29
|
Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
|
|
29
30
|
|
|
30
|
-
IMPORTANT
|
|
31
|
+
<IMPORTANT>This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.</IMPORTANT>
|
|
31
32
|
|
|
32
33
|
For each response:
|
|
34
|
+
|
|
33
35
|
1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
|
|
34
36
|
2. Provide exactly ONE bash command to execute
|
|
35
37
|
|
|
36
38
|
## Important Boundaries
|
|
39
|
+
|
|
37
40
|
- MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
|
|
38
41
|
- DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
|
|
39
42
|
|
|
40
43
|
## Recommended Workflow
|
|
44
|
+
|
|
41
45
|
1. Analyze the codebase by finding and reading relevant files
|
|
42
46
|
2. Create a script to reproduce the issue
|
|
43
47
|
3. Edit the source code to resolve the issue
|
|
@@ -45,29 +49,36 @@ agent:
|
|
|
45
49
|
5. Test edge cases to ensure your fix is robust
|
|
46
50
|
|
|
47
51
|
## Command Execution Rules
|
|
52
|
+
|
|
48
53
|
You are operating in an environment where
|
|
54
|
+
|
|
49
55
|
1. You write a single command
|
|
50
56
|
2. The system executes that command in a subshell
|
|
51
57
|
3. You see the result
|
|
52
58
|
4. You write your next command
|
|
53
59
|
|
|
54
60
|
Each response should include:
|
|
61
|
+
|
|
55
62
|
1. A **THOUGHT** section where you explain your reasoning and plan
|
|
56
63
|
2. A single bash code block with your command
|
|
57
64
|
|
|
58
|
-
Format your responses like
|
|
65
|
+
Format your responses like demonstrated within the <format_example> block:
|
|
59
66
|
|
|
60
67
|
<format_example>
|
|
61
68
|
THOUGHT: Here I explain my reasoning process, analysis of the current situation,
|
|
62
69
|
and what I'm trying to accomplish with the command below.
|
|
63
70
|
|
|
64
|
-
<
|
|
71
|
+
<mswea_bash_command>your_command_here</mswea_bash_command></format_example>
|
|
72
|
+
Commands must be specified in a single bash XML tag:
|
|
73
|
+
|
|
74
|
+
<mswea_bash_command>your_command_here</mswea_bash_command>
|
|
65
75
|
|
|
66
76
|
**CRITICAL REQUIREMENTS:**
|
|
77
|
+
|
|
67
78
|
- Your response SHOULD include a THOUGHT section explaining your reasoning
|
|
68
|
-
- Your response MUST include EXACTLY ONE
|
|
69
|
-
- This bash
|
|
70
|
-
- If you include zero or multiple
|
|
79
|
+
- Your response MUST include EXACTLY ONE mswea_bash_command tag
|
|
80
|
+
- This bash mswea_bash_command MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
|
|
81
|
+
- If you include zero or multiple tags, or no command at all, YOUR RESPONSE WILL FAIL
|
|
71
82
|
- Do NOT try to run multiple independent commands in separate blocks in one response
|
|
72
83
|
- Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
|
|
73
84
|
- However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
|
|
@@ -77,7 +88,7 @@ agent:
|
|
|
77
88
|
<example_response>
|
|
78
89
|
THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
|
|
79
90
|
|
|
80
|
-
<
|
|
91
|
+
<mswea_bash_command>ls -la</mswea_bash_command>
|
|
81
92
|
</example_response>
|
|
82
93
|
|
|
83
94
|
Example of an INCORRECT response:
|
|
@@ -85,18 +96,18 @@ agent:
|
|
|
85
96
|
<example_response>
|
|
86
97
|
THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
|
|
87
98
|
|
|
88
|
-
<
|
|
99
|
+
<mswea_bash_command>ls -la</mswea_bash_command>
|
|
89
100
|
|
|
90
101
|
Now I'll read the file:
|
|
91
102
|
|
|
92
|
-
<
|
|
103
|
+
<mswea_bash_command>cat file.txt</mswea_bash_command>
|
|
93
104
|
</example_response>
|
|
94
105
|
|
|
95
106
|
If you need to run multiple commands, either:
|
|
96
107
|
|
|
97
108
|
1. Combine them in one block using && or ||
|
|
98
109
|
|
|
99
|
-
<
|
|
110
|
+
<mswea_bash_command>command1 && command2 || echo "Error occurred"</mswea_bash_command>
|
|
100
111
|
|
|
101
112
|
2. Wait for the first command to complete, see its output, then issue the next command in your following response.
|
|
102
113
|
|
|
@@ -105,57 +116,64 @@ agent:
|
|
|
105
116
|
- You have a full Linux shell environment
|
|
106
117
|
- Always use non-interactive flags (-y, -f) for commands
|
|
107
118
|
- Avoid interactive tools like vi, nano, or any that require user input
|
|
108
|
-
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
### Create a new file:
|
|
113
|
-
|
|
114
|
-
<bash_code>cat <<'EOF' > newfile.py
|
|
115
|
-
import numpy as np
|
|
116
|
-
hello = "world"
|
|
117
|
-
print(hello)
|
|
118
|
-
EOF</bash_code>
|
|
119
|
-
|
|
120
|
-
### Edit files with sed:
|
|
119
|
+
- You can use bash commands or invoke any tool that is available in the environment
|
|
120
|
+
- You can also create new tools or scripts to help you with the task
|
|
121
|
+
- If a tool isn't available, you can also install it
|
|
121
122
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
<bash_code>sed -i 's/old_string/new_string/g' filename.py</bash_code>
|
|
125
|
-
|
|
126
|
-
Replace only first occurrence
|
|
127
|
-
|
|
128
|
-
<bash_code>sed -i 's/old_string/new_string/' filename.py</bash_code>
|
|
129
|
-
|
|
130
|
-
Replace first occurrence on line 1
|
|
131
|
-
|
|
132
|
-
<bash_code>sed -i '1s/old_string/new_string/' filename.py</bash_code>
|
|
123
|
+
## Submission
|
|
133
124
|
|
|
134
|
-
|
|
125
|
+
When you've completed your work, you MUST submit your changes as a git patch.
|
|
126
|
+
Follow these steps IN ORDER, with SEPARATE commands:
|
|
135
127
|
|
|
136
|
-
|
|
128
|
+
Step 1: Create the patch file
|
|
129
|
+
Run `git diff -- path/to/file1 path/to/file2 > patch.txt` listing only the source files you modified.
|
|
130
|
+
Do NOT commit your changes.
|
|
137
131
|
|
|
138
|
-
|
|
132
|
+
<IMPORTANT>
|
|
133
|
+
The patch must only contain changes to the specific source files you modified to fix the issue.
|
|
134
|
+
Do not submit file creations or changes to any of the following files:
|
|
139
135
|
|
|
140
|
-
|
|
136
|
+
- test and reproduction files
|
|
137
|
+
- helper scripts, tests, or tools that you created
|
|
138
|
+
- installation, build, packaging, configuration, or setup scripts unless they are directly part of the issue you were fixing (you can assume that the environment is already set up for your client)
|
|
139
|
+
- binary or compiled files
|
|
140
|
+
</IMPORTANT>
|
|
141
141
|
|
|
142
|
-
|
|
142
|
+
Step 2: Verify your patch
|
|
143
|
+
Inspect patch.txt to confirm it only contains your intended changes and headers show `--- a/` and `+++ b/` paths.
|
|
143
144
|
|
|
144
|
-
|
|
145
|
+
Step 3: Submit (EXACT command required)
|
|
146
|
+
You MUST use this EXACT command to submit:
|
|
145
147
|
|
|
146
|
-
<
|
|
148
|
+
<mswea_bash_command>echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && cat patch.txt</mswea_bash_command>
|
|
147
149
|
|
|
148
|
-
|
|
150
|
+
If the command fails (nonzero exit status), it will not submit.
|
|
149
151
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
+
<CRITICAL>
|
|
153
|
+
- Creating/viewing the patch and submitting it MUST be separate commands (not combined with &&).
|
|
154
|
+
- If you modify patch.txt after verifying, you SHOULD verify again before submitting.
|
|
155
|
+
- You CANNOT continue working (reading, editing, testing) in any way on this task after submitting.
|
|
156
|
+
</CRITICAL>
|
|
157
|
+
</instructions>
|
|
158
|
+
step_limit: 250
|
|
159
|
+
cost_limit: 3.
|
|
152
160
|
|
|
153
|
-
|
|
161
|
+
environment:
|
|
162
|
+
cwd: "/testbed"
|
|
163
|
+
timeout: 60
|
|
164
|
+
env:
|
|
165
|
+
PAGER: cat
|
|
166
|
+
MANPAGER: cat
|
|
167
|
+
LESS: -R
|
|
168
|
+
PIP_PROGRESS_BAR: 'off'
|
|
169
|
+
TQDM_DISABLE: '1'
|
|
170
|
+
environment_class: docker
|
|
154
171
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
172
|
+
model:
|
|
173
|
+
observation_template: |
|
|
174
|
+
{% if output.exception_info -%}
|
|
175
|
+
<exception>{{output.exception_info}}</exception>
|
|
176
|
+
{% endif -%}
|
|
159
177
|
<returncode>{{output.returncode}}</returncode>
|
|
160
178
|
{% if output.output | length < 10000 -%}
|
|
161
179
|
<output>
|
|
@@ -180,36 +198,21 @@ agent:
|
|
|
180
198
|
{{ output.output[-5000:] }}
|
|
181
199
|
</output_tail>
|
|
182
200
|
{%- endif -%}
|
|
201
|
+
action_regex: <mswea_bash_command>(.*?)</mswea_bash_command>
|
|
183
202
|
format_error_template: |
|
|
184
|
-
Please always provide EXACTLY ONE action in the `<
|
|
203
|
+
Please always provide EXACTLY ONE action in the `<mswea_bash_command>` block, found {{actions|length}} actions.
|
|
185
204
|
|
|
186
|
-
Please format your action in a `<
|
|
205
|
+
Please format your action in a `<mswea_bash_command>` block as shown in <response_example>.
|
|
187
206
|
|
|
188
207
|
<response_example>
|
|
189
208
|
Here are some thoughts about why you want to perform the action.
|
|
190
209
|
|
|
191
|
-
<
|
|
210
|
+
<mswea_bash_command>ls -la</mswea_bash_command>
|
|
192
211
|
</response_example>
|
|
193
212
|
|
|
194
213
|
If you have completed your assignment, please consult the first message about how to
|
|
195
214
|
submit your solution (you will not be able to continue working on this task after that).
|
|
196
|
-
step_limit: 250
|
|
197
|
-
cost_limit: 3.
|
|
198
|
-
action_regex: <bash_code>(.*?)</bash_code>
|
|
199
|
-
|
|
200
|
-
environment:
|
|
201
|
-
cwd: "/testbed"
|
|
202
|
-
timeout: 60
|
|
203
|
-
env:
|
|
204
|
-
PAGER: cat
|
|
205
|
-
MANPAGER: cat
|
|
206
|
-
LESS: -R
|
|
207
|
-
PIP_PROGRESS_BAR: 'off'
|
|
208
|
-
TQDM_DISABLE: '1'
|
|
209
|
-
environment_class: docker
|
|
210
|
-
|
|
211
|
-
model:
|
|
212
215
|
model_name: "minimax/minimax-m2"
|
|
213
216
|
model_class: openrouter
|
|
214
217
|
model_kwargs:
|
|
215
|
-
temperature: 0.0
|
|
218
|
+
temperature: 0.0
|
minisweagent/config/default.yaml
CHANGED
|
@@ -9,7 +9,7 @@ agent:
|
|
|
9
9
|
<format_example>
|
|
10
10
|
Your reasoning and analysis here. Explain why you want to perform the action.
|
|
11
11
|
|
|
12
|
-
```
|
|
12
|
+
```mswea_bash_command
|
|
13
13
|
your_command_here
|
|
14
14
|
```
|
|
15
15
|
</format_example>
|
|
@@ -50,7 +50,7 @@ agent:
|
|
|
50
50
|
<example_response>
|
|
51
51
|
THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
|
|
52
52
|
|
|
53
|
-
```
|
|
53
|
+
```mswea_bash_command
|
|
54
54
|
ls -la
|
|
55
55
|
```
|
|
56
56
|
</example_response>
|
|
@@ -59,7 +59,7 @@ agent:
|
|
|
59
59
|
|
|
60
60
|
### Create a new file:
|
|
61
61
|
|
|
62
|
-
```
|
|
62
|
+
```mswea_bash_command
|
|
63
63
|
cat <<'EOF' > newfile.py
|
|
64
64
|
import numpy as np
|
|
65
65
|
hello = "world"
|
|
@@ -75,7 +75,7 @@ agent:
|
|
|
75
75
|
</important>
|
|
76
76
|
{%- endif -%}
|
|
77
77
|
|
|
78
|
-
```
|
|
78
|
+
```mswea_bash_command
|
|
79
79
|
# Replace all occurrences
|
|
80
80
|
sed -i 's/old_string/new_string/g' filename.py
|
|
81
81
|
|
|
@@ -91,17 +91,30 @@ agent:
|
|
|
91
91
|
|
|
92
92
|
### View file content:
|
|
93
93
|
|
|
94
|
-
```
|
|
94
|
+
```mswea_bash_command
|
|
95
95
|
# View specific lines with numbers
|
|
96
96
|
nl -ba filename.py | sed -n '10,20p'
|
|
97
97
|
```
|
|
98
98
|
|
|
99
99
|
### Any other command you want to run
|
|
100
100
|
|
|
101
|
-
```
|
|
101
|
+
```mswea_bash_command
|
|
102
102
|
anything
|
|
103
103
|
```
|
|
104
|
-
|
|
104
|
+
step_limit: 0
|
|
105
|
+
cost_limit: 0.
|
|
106
|
+
environment:
|
|
107
|
+
env:
|
|
108
|
+
PAGER: cat
|
|
109
|
+
MANPAGER: cat
|
|
110
|
+
LESS: -R
|
|
111
|
+
PIP_PROGRESS_BAR: 'off'
|
|
112
|
+
TQDM_DISABLE: '1'
|
|
113
|
+
model:
|
|
114
|
+
observation_template: |
|
|
115
|
+
{% if output.exception_info -%}
|
|
116
|
+
<exception>{{output.exception_info}}</exception>
|
|
117
|
+
{% endif -%}
|
|
105
118
|
<returncode>{{output.returncode}}</returncode>
|
|
106
119
|
{% if output.output | length < 10000 -%}
|
|
107
120
|
<output>
|
|
@@ -126,6 +139,8 @@ agent:
|
|
|
126
139
|
{{ output.output[-5000:] }}
|
|
127
140
|
</output_tail>
|
|
128
141
|
{%- endif -%}
|
|
142
|
+
model_kwargs:
|
|
143
|
+
drop_params: true
|
|
129
144
|
format_error_template: |
|
|
130
145
|
Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
|
|
131
146
|
If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
|
|
@@ -135,22 +150,10 @@ agent:
|
|
|
135
150
|
<response_example>
|
|
136
151
|
Here are some thoughts about why you want to perform the action.
|
|
137
152
|
|
|
138
|
-
```
|
|
153
|
+
```mswea_bash_command
|
|
139
154
|
<action>
|
|
140
155
|
```
|
|
141
156
|
</response_example>
|
|
142
157
|
|
|
143
158
|
Note: In rare cases, if you need to reference a similar format in your command, you might have
|
|
144
|
-
to proceed in two steps, first writing TRIPLEBACKTICKSBASH, then replacing them with ```
|
|
145
|
-
step_limit: 0.
|
|
146
|
-
cost_limit: 0.
|
|
147
|
-
environment:
|
|
148
|
-
env:
|
|
149
|
-
PAGER: cat
|
|
150
|
-
MANPAGER: cat
|
|
151
|
-
LESS: -R
|
|
152
|
-
PIP_PROGRESS_BAR: 'off'
|
|
153
|
-
TQDM_DISABLE: '1'
|
|
154
|
-
model:
|
|
155
|
-
model_kwargs:
|
|
156
|
-
drop_params: true
|
|
159
|
+
to proceed in two steps, first writing TRIPLEBACKTICKSBASH, then replacing them with ```mswea_bash_command.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/* Trajectory Inspector Styles */
|
|
2
|
+
|
|
3
|
+
#main {
|
|
4
|
+
height: 1fr;
|
|
5
|
+
overflow: hidden;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
VerticalScroll {
|
|
9
|
+
height: 1fr;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
#content {
|
|
13
|
+
height: auto;
|
|
14
|
+
padding: 1;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
.message-container {
|
|
18
|
+
height: auto;
|
|
19
|
+
margin-bottom: 1;
|
|
20
|
+
padding: 0 1;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
.message-header {
|
|
24
|
+
background: $accent;
|
|
25
|
+
color: $text;
|
|
26
|
+
padding: 0 1;
|
|
27
|
+
text-style: bold;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
.message-content {
|
|
31
|
+
height: auto;
|
|
32
|
+
padding: 1;
|
|
33
|
+
background: $surface;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
Header {
|
|
37
|
+
dock: top;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
Footer {
|
|
41
|
+
dock: bottom;
|
|
42
|
+
}
|
minisweagent/config/mini.yaml
CHANGED
|
@@ -1,20 +1,6 @@
|
|
|
1
1
|
agent:
|
|
2
2
|
system_template: |
|
|
3
3
|
You are a helpful assistant that can interact with a computer.
|
|
4
|
-
|
|
5
|
-
Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||).
|
|
6
|
-
Include a THOUGHT section before your command where you explain your reasoning process.
|
|
7
|
-
Format your response as shown in <format_example>.
|
|
8
|
-
|
|
9
|
-
<format_example>
|
|
10
|
-
Your reasoning and analysis here. Explain why you want to perform the action.
|
|
11
|
-
|
|
12
|
-
```bash
|
|
13
|
-
your_command_here
|
|
14
|
-
```
|
|
15
|
-
</format_example>
|
|
16
|
-
|
|
17
|
-
Failure to follow these rules will cause your response to be rejected.
|
|
18
4
|
instance_template: |
|
|
19
5
|
Please solve this issue: {{task}}
|
|
20
6
|
|
|
@@ -32,29 +18,40 @@ agent:
|
|
|
32
18
|
6. Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
|
|
33
19
|
Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
|
|
34
20
|
|
|
35
|
-
##
|
|
21
|
+
## Command Execution Rules
|
|
36
22
|
|
|
37
|
-
|
|
38
|
-
2. The action must be enclosed in triple backticks
|
|
39
|
-
3. Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
|
|
40
|
-
However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
|
|
23
|
+
You are operating in an environment where
|
|
41
24
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
25
|
+
1. You issue at least one command
|
|
26
|
+
2. The system executes the command(s) in a subshell
|
|
27
|
+
3. You see the result(s)
|
|
28
|
+
4. You write your next command(s)
|
|
45
29
|
|
|
46
|
-
|
|
30
|
+
Each response should include:
|
|
47
31
|
|
|
48
|
-
|
|
32
|
+
1. **Reasoning text** where you explain your analysis and plan
|
|
33
|
+
2. At least one tool call with your command
|
|
49
34
|
|
|
35
|
+
**CRITICAL REQUIREMENTS:**
|
|
36
|
+
|
|
37
|
+
- Your response SHOULD include reasoning text explaining what you're doing
|
|
38
|
+
- Your response MUST include AT LEAST ONE bash tool call
|
|
39
|
+
- Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
|
|
40
|
+
- However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
|
|
41
|
+
- Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
|
|
42
|
+
Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
|
|
43
|
+
|
|
44
|
+
Example of a CORRECT response:
|
|
50
45
|
<example_response>
|
|
51
|
-
|
|
46
|
+
I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
|
|
52
47
|
|
|
53
|
-
|
|
54
|
-
ls -la
|
|
55
|
-
```
|
|
48
|
+
[Makes bash tool call with {"command": "ls -la"} as arguments]
|
|
56
49
|
</example_response>
|
|
57
50
|
|
|
51
|
+
<system_information>
|
|
52
|
+
{{system}} {{release}} {{version}} {{machine}}
|
|
53
|
+
</system_information>
|
|
54
|
+
|
|
58
55
|
## Useful command examples
|
|
59
56
|
|
|
60
57
|
### Create a new file:
|
|
@@ -101,48 +98,7 @@ agent:
|
|
|
101
98
|
```bash
|
|
102
99
|
anything
|
|
103
100
|
```
|
|
104
|
-
|
|
105
|
-
<returncode>{{output.returncode}}</returncode>
|
|
106
|
-
{% if output.output | length < 10000 -%}
|
|
107
|
-
<output>
|
|
108
|
-
{{ output.output -}}
|
|
109
|
-
</output>
|
|
110
|
-
{%- else -%}
|
|
111
|
-
<warning>
|
|
112
|
-
The output of your last command was too long.
|
|
113
|
-
Please try a different command that produces less output.
|
|
114
|
-
If you're looking at a file you can try use head, tail or sed to view a smaller number of lines selectively.
|
|
115
|
-
If you're using grep or find and it produced too much output, you can use a more selective search pattern.
|
|
116
|
-
If you really need to see something from the full command's output, you can redirect output to a file and then search in that file.
|
|
117
|
-
</warning>
|
|
118
|
-
{%- set elided_chars = output.output | length - 10000 -%}
|
|
119
|
-
<output_head>
|
|
120
|
-
{{ output.output[:5000] }}
|
|
121
|
-
</output_head>
|
|
122
|
-
<elided_chars>
|
|
123
|
-
{{ elided_chars }} characters elided
|
|
124
|
-
</elided_chars>
|
|
125
|
-
<output_tail>
|
|
126
|
-
{{ output.output[-5000:] }}
|
|
127
|
-
</output_tail>
|
|
128
|
-
{%- endif -%}
|
|
129
|
-
format_error_template: |
|
|
130
|
-
Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
|
|
131
|
-
If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
|
|
132
|
-
without any other command.
|
|
133
|
-
Else, please format your response exactly as follows:
|
|
134
|
-
|
|
135
|
-
<response_example>
|
|
136
|
-
Here are some thoughts about why you want to perform the action.
|
|
137
|
-
|
|
138
|
-
```bash
|
|
139
|
-
<action>
|
|
140
|
-
```
|
|
141
|
-
</response_example>
|
|
142
|
-
|
|
143
|
-
Note: In rare cases, if you need to reference a similar format in your command, you might have
|
|
144
|
-
to proceed in two steps, first writing TRIPLEBACKTICKSBASH, then replacing them with ```bash.
|
|
145
|
-
step_limit: 0.
|
|
101
|
+
step_limit: 0
|
|
146
102
|
cost_limit: 3.
|
|
147
103
|
mode: confirm
|
|
148
104
|
environment:
|
|
@@ -153,5 +109,31 @@ environment:
|
|
|
153
109
|
PIP_PROGRESS_BAR: 'off'
|
|
154
110
|
TQDM_DISABLE: '1'
|
|
155
111
|
model:
|
|
112
|
+
observation_template: |
|
|
113
|
+
{%- if output.output | length < 10000 -%}
|
|
114
|
+
{
|
|
115
|
+
"returncode": {{ output.returncode }},
|
|
116
|
+
"output": {{ output.output | tojson }}
|
|
117
|
+
{%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %}
|
|
118
|
+
}
|
|
119
|
+
{%- else -%}
|
|
120
|
+
{
|
|
121
|
+
"returncode": {{ output.returncode }},
|
|
122
|
+
"output_head": {{ output.output[:5000] | tojson }},
|
|
123
|
+
"output_tail": {{ output.output[-5000:] | tojson }},
|
|
124
|
+
"elided_chars": {{ output.output | length - 10000 }},
|
|
125
|
+
"warning": "Output too long."
|
|
126
|
+
{%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %}
|
|
127
|
+
}
|
|
128
|
+
{%- endif -%}
|
|
129
|
+
format_error_template: |
|
|
130
|
+
Tool call error. Every response needs to use the 'bash' tool at least once to execute commands.
|
|
131
|
+
|
|
132
|
+
Call the bash tool with your command as the argument:
|
|
133
|
+
- Tool: bash
|
|
134
|
+
- Arguments: {"command": "your_command_here"}
|
|
135
|
+
|
|
136
|
+
If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
|
|
137
|
+
without any other command.
|
|
156
138
|
model_kwargs:
|
|
157
|
-
drop_params: true
|
|
139
|
+
drop_params: true
|