mini-swe-agent 1.17.5__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
  2. mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
  3. mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
  4. minisweagent/__init__.py +19 -26
  5. minisweagent/agents/default.py +128 -113
  6. minisweagent/agents/interactive.py +119 -58
  7. minisweagent/config/README.md +3 -4
  8. minisweagent/config/__init__.py +36 -1
  9. minisweagent/config/benchmarks/swebench.yaml +156 -0
  10. minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
  11. minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
  12. minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
  13. minisweagent/config/default.yaml +24 -21
  14. minisweagent/config/inspector.tcss +42 -0
  15. minisweagent/config/mini.yaml +53 -71
  16. minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
  17. minisweagent/environments/__init__.py +1 -0
  18. minisweagent/environments/docker.py +67 -20
  19. minisweagent/environments/extra/bubblewrap.py +86 -47
  20. minisweagent/environments/extra/swerex_docker.py +53 -20
  21. minisweagent/environments/extra/swerex_modal.py +90 -0
  22. minisweagent/environments/local.py +62 -21
  23. minisweagent/environments/singularity.py +59 -18
  24. minisweagent/exceptions.py +22 -0
  25. minisweagent/models/__init__.py +6 -7
  26. minisweagent/models/extra/roulette.py +20 -17
  27. minisweagent/models/litellm_model.py +90 -44
  28. minisweagent/models/litellm_response_model.py +80 -0
  29. minisweagent/models/litellm_textbased_model.py +45 -0
  30. minisweagent/models/openrouter_model.py +87 -45
  31. minisweagent/models/openrouter_response_model.py +123 -0
  32. minisweagent/models/openrouter_textbased_model.py +76 -0
  33. minisweagent/models/portkey_model.py +84 -42
  34. minisweagent/models/portkey_response_model.py +163 -0
  35. minisweagent/models/requesty_model.py +91 -41
  36. minisweagent/models/test_models.py +246 -19
  37. minisweagent/models/utils/actions_text.py +60 -0
  38. minisweagent/models/utils/actions_toolcall.py +102 -0
  39. minisweagent/models/utils/actions_toolcall_response.py +110 -0
  40. minisweagent/models/utils/anthropic_utils.py +28 -0
  41. minisweagent/models/utils/cache_control.py +15 -2
  42. minisweagent/models/utils/content_string.py +74 -0
  43. minisweagent/models/utils/openai_multimodal.py +50 -0
  44. minisweagent/models/utils/retry.py +25 -0
  45. minisweagent/run/benchmarks/__init__.py +1 -0
  46. minisweagent/run/{extra → benchmarks}/swebench.py +56 -35
  47. minisweagent/run/{extra → benchmarks}/swebench_single.py +36 -26
  48. minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
  49. minisweagent/run/hello_world.py +6 -0
  50. minisweagent/run/mini.py +54 -63
  51. minisweagent/run/utilities/__init__.py +1 -0
  52. minisweagent/run/{extra → utilities}/config.py +2 -0
  53. minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
  54. minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
  55. minisweagent/utils/serialize.py +26 -0
  56. mini_swe_agent-1.17.5.dist-info/RECORD +0 -61
  57. mini_swe_agent-1.17.5.dist-info/entry_points.txt +0 -5
  58. minisweagent/agents/interactive_textual.py +0 -450
  59. minisweagent/config/extra/swebench_roulette.yaml +0 -233
  60. minisweagent/config/mini.tcss +0 -86
  61. minisweagent/models/anthropic.py +0 -35
  62. minisweagent/models/litellm_response_api_model.py +0 -82
  63. minisweagent/models/portkey_response_api_model.py +0 -75
  64. minisweagent/models/utils/key_per_thread.py +0 -20
  65. minisweagent/models/utils/openai_utils.py +0 -41
  66. minisweagent/run/github_issue.py +0 -87
  67. minisweagent/run/utils/__init__.py +0 -0
  68. minisweagent/run/utils/save.py +0 -78
  69. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +0 -0
  70. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
  71. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
  72. /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
  73. /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
@@ -9,7 +9,7 @@ agent:
9
9
  <format_example>
10
10
  THOUGHT: Your reasoning and analysis here
11
11
 
12
- <bash_code>your_command_here</bash_code>
12
+ <mswea_bash_command>your_command_here</mswea_bash_command>
13
13
  </format_example>
14
14
 
15
15
  Failure to follow these rules will cause your response to be rejected.
@@ -23,21 +23,25 @@ agent:
23
23
  # Task Instructions
24
24
 
25
25
  ## Overview
26
+
26
27
  You're a software engineer interacting continuously with a computer by submitting commands.
27
28
  You'll be helping implement necessary changes to meet requirements in the PR description.
28
29
  Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
29
30
 
30
- IMPORTANT: This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.
31
+ <IMPORTANT>This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.</IMPORTANT>
31
32
 
32
33
  For each response:
34
+
33
35
  1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
34
36
  2. Provide exactly ONE bash command to execute
35
37
 
36
38
  ## Important Boundaries
39
+
37
40
  - MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
38
41
  - DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
39
42
 
40
43
  ## Recommended Workflow
44
+
41
45
  1. Analyze the codebase by finding and reading relevant files
42
46
  2. Create a script to reproduce the issue
43
47
  3. Edit the source code to resolve the issue
@@ -45,29 +49,36 @@ agent:
45
49
  5. Test edge cases to ensure your fix is robust
46
50
 
47
51
  ## Command Execution Rules
52
+
48
53
  You are operating in an environment where
54
+
49
55
  1. You write a single command
50
56
  2. The system executes that command in a subshell
51
57
  3. You see the result
52
58
  4. You write your next command
53
59
 
54
60
  Each response should include:
61
+
55
62
  1. A **THOUGHT** section where you explain your reasoning and plan
56
63
  2. A single bash code block with your command
57
64
 
58
- Format your responses like included within the <format_example> block:
65
+ Format your responses like demonstrated within the <format_example> block:
59
66
 
60
67
  <format_example>
61
68
  THOUGHT: Here I explain my reasoning process, analysis of the current situation,
62
69
  and what I'm trying to accomplish with the command below.
63
70
 
64
- <bash_code>your_command_here </bash_code></format_example>
71
+ <mswea_bash_command>your_command_here</mswea_bash_command></format_example>
72
+ Commands must be specified in a single bash XML tag:
73
+
74
+ <mswea_bash_command>your_command_here</mswea_bash_command>
65
75
 
66
76
  **CRITICAL REQUIREMENTS:**
77
+
67
78
  - Your response SHOULD include a THOUGHT section explaining your reasoning
68
- - Your response MUST include EXACTLY ONE bash code block
69
- - This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
70
- - If you include zero or multiple bash blocks, or no command at all, YOUR RESPONSE WILL FAIL
79
+ - Your response MUST include EXACTLY ONE mswea_bash_command tag
80
+ - This bash mswea_bash_command MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
81
+ - If you include zero or multiple tags, or no command at all, YOUR RESPONSE WILL FAIL
71
82
  - Do NOT try to run multiple independent commands in separate blocks in one response
72
83
  - Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
73
84
  - However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
@@ -77,7 +88,7 @@ agent:
77
88
  <example_response>
78
89
  THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
79
90
 
80
- <bash_code>ls -la</bash_code>
91
+ <mswea_bash_command>ls -la</mswea_bash_command>
81
92
  </example_response>
82
93
 
83
94
  Example of an INCORRECT response:
@@ -85,18 +96,18 @@ agent:
85
96
  <example_response>
86
97
  THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
87
98
 
88
- <bash_code>ls -la</bash_code>
99
+ <mswea_bash_command>ls -la</mswea_bash_command>
89
100
 
90
101
  Now I'll read the file:
91
102
 
92
- <bash_code>cat file.txt</bash_code>
103
+ <mswea_bash_command>cat file.txt</mswea_bash_command>
93
104
  </example_response>
94
105
 
95
106
  If you need to run multiple commands, either:
96
107
 
97
108
  1. Combine them in one block using && or ||
98
109
 
99
- <bash_code>command1 && command2 || echo "Error occurred"</bash_code>
110
+ <mswea_bash_command>command1 && command2 || echo "Error occurred"</mswea_bash_command>
100
111
 
101
112
  2. Wait for the first command to complete, see its output, then issue the next command in your following response.
102
113
 
@@ -105,57 +116,64 @@ agent:
105
116
  - You have a full Linux shell environment
106
117
  - Always use non-interactive flags (-y, -f) for commands
107
118
  - Avoid interactive tools like vi, nano, or any that require user input
108
- - If a command isn't available, you can install it
109
-
110
- ## Useful Command Examples
111
-
112
- ### Create a new file:
113
-
114
- <bash_code>cat <<'EOF' > newfile.py
115
- import numpy as np
116
- hello = "world"
117
- print(hello)
118
- EOF</bash_code>
119
-
120
- ### Edit files with sed:
119
+ - You can use bash commands or invoke any tool that is available in the environment
120
+ - You can also create new tools or scripts to help you with the task
121
+ - If a tool isn't available, you can also install it
121
122
 
122
- Replace all occurrences
123
-
124
- <bash_code>sed -i 's/old_string/new_string/g' filename.py</bash_code>
125
-
126
- Replace only first occurrence
127
-
128
- <bash_code>sed -i 's/old_string/new_string/' filename.py</bash_code>
129
-
130
- Replace first occurrence on line 1
131
-
132
- <bash_code>sed -i '1s/old_string/new_string/' filename.py</bash_code>
123
+ ## Submission
133
124
 
134
- Replace all occurrences in lines 1-10
125
+ When you've completed your work, you MUST submit your changes as a git patch.
126
+ Follow these steps IN ORDER, with SEPARATE commands:
135
127
 
136
- <bash_code>sed -i '1,10s/old_string/new_string/g' filename.py</bash_code>
128
+ Step 1: Create the patch file
129
+ Run `git diff -- path/to/file1 path/to/file2 > patch.txt` listing only the source files you modified.
130
+ Do NOT commit your changes.
137
131
 
138
- ### View file content:
132
+ <IMPORTANT>
133
+ The patch must only contain changes to the specific source files you modified to fix the issue.
134
+ Do not submit file creations or changes to any of the following files:
139
135
 
140
- View specific lines with numbers
136
+ - test and reproduction files
137
+ - helper scripts, tests, or tools that you created
138
+ - installation, build, packaging, configuration, or setup scripts unless they are directly part of the issue you were fixing (you can assume that the environment is already set up for your client)
139
+ - binary or compiled files
140
+ </IMPORTANT>
141
141
 
142
- <bash_code> nl -ba filename.py | sed -n '10,20p'</bash_code>
142
+ Step 2: Verify your patch
143
+ Inspect patch.txt to confirm it only contains your intended changes and headers show `--- a/` and `+++ b/` paths.
143
144
 
144
- Any other command you want to run
145
+ Step 3: Submit (EXACT command required)
146
+ You MUST use this EXACT command to submit:
145
147
 
146
- <bash_code>anything</bash_code>
148
+ <mswea_bash_command>echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && cat patch.txt</mswea_bash_command>
147
149
 
148
- ## Submission
150
+ If the command fails (nonzero exit status), it will not submit.
149
151
 
150
- When you've completed your work (reading, editing, testing), and cannot make further progress
151
- issue exactly the following command:
152
+ <CRITICAL>
153
+ - Creating/viewing the patch and submitting it MUST be separate commands (not combined with &&).
154
+ - If you modify patch.txt after verifying, you SHOULD verify again before submitting.
155
+ - You CANNOT continue working (reading, editing, testing) in any way on this task after submitting.
156
+ </CRITICAL>
157
+ </instructions>
158
+ step_limit: 250
159
+ cost_limit: 3.
152
160
 
153
- <bash_code>echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached</bash_code>
161
+ environment:
162
+ cwd: "/testbed"
163
+ timeout: 60
164
+ env:
165
+ PAGER: cat
166
+ MANPAGER: cat
167
+ LESS: -R
168
+ PIP_PROGRESS_BAR: 'off'
169
+ TQDM_DISABLE: '1'
170
+ environment_class: docker
154
171
 
155
- This command will submit your work.
156
- You cannot continue working (reading, editing, testing) in any way on this task after submitting.
157
- </instructions>
158
- action_observation_template: |
172
+ model:
173
+ observation_template: |
174
+ {% if output.exception_info -%}
175
+ <exception>{{output.exception_info}}</exception>
176
+ {% endif -%}
159
177
  <returncode>{{output.returncode}}</returncode>
160
178
  {% if output.output | length < 10000 -%}
161
179
  <output>
@@ -180,36 +198,21 @@ agent:
180
198
  {{ output.output[-5000:] }}
181
199
  </output_tail>
182
200
  {%- endif -%}
201
+ action_regex: <mswea_bash_command>(.*?)</mswea_bash_command>
183
202
  format_error_template: |
184
- Please always provide EXACTLY ONE action in the `<bash_code>` block, found {{actions|length}} actions.
203
+ Please always provide EXACTLY ONE action in the `<mswea_bash_command>` block, found {{actions|length}} actions.
185
204
 
186
- Please format your action in a `<bash_code>` block as shown in <response_example>.
205
+ Please format your action in a `<mswea_bash_command>` block as shown in <response_example>.
187
206
 
188
207
  <response_example>
189
208
  Here are some thoughts about why you want to perform the action.
190
209
 
191
- <bash_code>ls -la</bash_code>
210
+ <mswea_bash_command>ls -la</mswea_bash_command>
192
211
  </response_example>
193
212
 
194
213
  If you have completed your assignment, please consult the first message about how to
195
214
  submit your solution (you will not be able to continue working on this task after that).
196
- step_limit: 250
197
- cost_limit: 3.
198
- action_regex: <bash_code>(.*?)</bash_code>
199
-
200
- environment:
201
- cwd: "/testbed"
202
- timeout: 60
203
- env:
204
- PAGER: cat
205
- MANPAGER: cat
206
- LESS: -R
207
- PIP_PROGRESS_BAR: 'off'
208
- TQDM_DISABLE: '1'
209
- environment_class: docker
210
-
211
- model:
212
215
  model_name: "minimax/minimax-m2"
213
216
  model_class: openrouter
214
217
  model_kwargs:
215
- temperature: 0.0
218
+ temperature: 0.0
@@ -9,7 +9,7 @@ agent:
9
9
  <format_example>
10
10
  Your reasoning and analysis here. Explain why you want to perform the action.
11
11
 
12
- ```bash
12
+ ```mswea_bash_command
13
13
  your_command_here
14
14
  ```
15
15
  </format_example>
@@ -50,7 +50,7 @@ agent:
50
50
  <example_response>
51
51
  THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
52
52
 
53
- ```bash
53
+ ```mswea_bash_command
54
54
  ls -la
55
55
  ```
56
56
  </example_response>
@@ -59,7 +59,7 @@ agent:
59
59
 
60
60
  ### Create a new file:
61
61
 
62
- ```bash
62
+ ```mswea_bash_command
63
63
  cat <<'EOF' > newfile.py
64
64
  import numpy as np
65
65
  hello = "world"
@@ -75,7 +75,7 @@ agent:
75
75
  </important>
76
76
  {%- endif -%}
77
77
 
78
- ```bash
78
+ ```mswea_bash_command
79
79
  # Replace all occurrences
80
80
  sed -i 's/old_string/new_string/g' filename.py
81
81
 
@@ -91,17 +91,30 @@ agent:
91
91
 
92
92
  ### View file content:
93
93
 
94
- ```bash
94
+ ```mswea_bash_command
95
95
  # View specific lines with numbers
96
96
  nl -ba filename.py | sed -n '10,20p'
97
97
  ```
98
98
 
99
99
  ### Any other command you want to run
100
100
 
101
- ```bash
101
+ ```mswea_bash_command
102
102
  anything
103
103
  ```
104
- action_observation_template: |
104
+ step_limit: 0
105
+ cost_limit: 0.
106
+ environment:
107
+ env:
108
+ PAGER: cat
109
+ MANPAGER: cat
110
+ LESS: -R
111
+ PIP_PROGRESS_BAR: 'off'
112
+ TQDM_DISABLE: '1'
113
+ model:
114
+ observation_template: |
115
+ {% if output.exception_info -%}
116
+ <exception>{{output.exception_info}}</exception>
117
+ {% endif -%}
105
118
  <returncode>{{output.returncode}}</returncode>
106
119
  {% if output.output | length < 10000 -%}
107
120
  <output>
@@ -126,6 +139,8 @@ agent:
126
139
  {{ output.output[-5000:] }}
127
140
  </output_tail>
128
141
  {%- endif -%}
142
+ model_kwargs:
143
+ drop_params: true
129
144
  format_error_template: |
130
145
  Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
131
146
  If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
@@ -135,22 +150,10 @@ agent:
135
150
  <response_example>
136
151
  Here are some thoughts about why you want to perform the action.
137
152
 
138
- ```bash
153
+ ```mswea_bash_command
139
154
  <action>
140
155
  ```
141
156
  </response_example>
142
157
 
143
158
  Note: In rare cases, if you need to reference a similar format in your command, you might have
144
- to proceed in two steps, first writing TRIPLEBACKTICKSBASH, then replacing them with ```bash.
145
- step_limit: 0.
146
- cost_limit: 0.
147
- environment:
148
- env:
149
- PAGER: cat
150
- MANPAGER: cat
151
- LESS: -R
152
- PIP_PROGRESS_BAR: 'off'
153
- TQDM_DISABLE: '1'
154
- model:
155
- model_kwargs:
156
- drop_params: true
159
+ to proceed in two steps, first writing TRIPLEBACKTICKSBASH, then replacing them with ```mswea_bash_command.
@@ -0,0 +1,42 @@
1
+ /* Trajectory Inspector Styles */
2
+
3
+ #main {
4
+ height: 1fr;
5
+ overflow: hidden;
6
+ }
7
+
8
+ VerticalScroll {
9
+ height: 1fr;
10
+ }
11
+
12
+ #content {
13
+ height: auto;
14
+ padding: 1;
15
+ }
16
+
17
+ .message-container {
18
+ height: auto;
19
+ margin-bottom: 1;
20
+ padding: 0 1;
21
+ }
22
+
23
+ .message-header {
24
+ background: $accent;
25
+ color: $text;
26
+ padding: 0 1;
27
+ text-style: bold;
28
+ }
29
+
30
+ .message-content {
31
+ height: auto;
32
+ padding: 1;
33
+ background: $surface;
34
+ }
35
+
36
+ Header {
37
+ dock: top;
38
+ }
39
+
40
+ Footer {
41
+ dock: bottom;
42
+ }
@@ -1,20 +1,6 @@
1
1
  agent:
2
2
  system_template: |
3
3
  You are a helpful assistant that can interact with a computer.
4
-
5
- Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||).
6
- Include a THOUGHT section before your command where you explain your reasoning process.
7
- Format your response as shown in <format_example>.
8
-
9
- <format_example>
10
- Your reasoning and analysis here. Explain why you want to perform the action.
11
-
12
- ```bash
13
- your_command_here
14
- ```
15
- </format_example>
16
-
17
- Failure to follow these rules will cause your response to be rejected.
18
4
  instance_template: |
19
5
  Please solve this issue: {{task}}
20
6
 
@@ -32,29 +18,40 @@ agent:
32
18
  6. Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
33
19
  Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
34
20
 
35
- ## Important Rules
21
+ ## Command Execution Rules
36
22
 
37
- 1. Every response must contain exactly one action
38
- 2. The action must be enclosed in triple backticks
39
- 3. Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
40
- However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
23
+ You are operating in an environment where
41
24
 
42
- <system_information>
43
- {{system}} {{release}} {{version}} {{machine}}
44
- </system_information>
25
+ 1. You issue at least one command
26
+ 2. The system executes the command(s) in a subshell
27
+ 3. You see the result(s)
28
+ 4. You write your next command(s)
45
29
 
46
- ## Formatting your response
30
+ Each response should include:
47
31
 
48
- Here is an example of a correct response:
32
+ 1. **Reasoning text** where you explain your analysis and plan
33
+ 2. At least one tool call with your command
49
34
 
35
+ **CRITICAL REQUIREMENTS:**
36
+
37
+ - Your response SHOULD include reasoning text explaining what you're doing
38
+ - Your response MUST include AT LEAST ONE bash tool call
39
+ - Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
40
+ - However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
41
+ - Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
42
+ Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
43
+
44
+ Example of a CORRECT response:
50
45
  <example_response>
51
- THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
46
+ I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
52
47
 
53
- ```bash
54
- ls -la
55
- ```
48
+ [Makes bash tool call with {"command": "ls -la"} as arguments]
56
49
  </example_response>
57
50
 
51
+ <system_information>
52
+ {{system}} {{release}} {{version}} {{machine}}
53
+ </system_information>
54
+
58
55
  ## Useful command examples
59
56
 
60
57
  ### Create a new file:
@@ -101,48 +98,7 @@ agent:
101
98
  ```bash
102
99
  anything
103
100
  ```
104
- action_observation_template: |
105
- <returncode>{{output.returncode}}</returncode>
106
- {% if output.output | length < 10000 -%}
107
- <output>
108
- {{ output.output -}}
109
- </output>
110
- {%- else -%}
111
- <warning>
112
- The output of your last command was too long.
113
- Please try a different command that produces less output.
114
- If you're looking at a file you can try use head, tail or sed to view a smaller number of lines selectively.
115
- If you're using grep or find and it produced too much output, you can use a more selective search pattern.
116
- If you really need to see something from the full command's output, you can redirect output to a file and then search in that file.
117
- </warning>
118
- {%- set elided_chars = output.output | length - 10000 -%}
119
- <output_head>
120
- {{ output.output[:5000] }}
121
- </output_head>
122
- <elided_chars>
123
- {{ elided_chars }} characters elided
124
- </elided_chars>
125
- <output_tail>
126
- {{ output.output[-5000:] }}
127
- </output_tail>
128
- {%- endif -%}
129
- format_error_template: |
130
- Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
131
- If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
132
- without any other command.
133
- Else, please format your response exactly as follows:
134
-
135
- <response_example>
136
- Here are some thoughts about why you want to perform the action.
137
-
138
- ```bash
139
- <action>
140
- ```
141
- </response_example>
142
-
143
- Note: In rare cases, if you need to reference a similar format in your command, you might have
144
- to proceed in two steps, first writing TRIPLEBACKTICKSBASH, then replacing them with ```bash.
145
- step_limit: 0.
101
+ step_limit: 0
146
102
  cost_limit: 3.
147
103
  mode: confirm
148
104
  environment:
@@ -153,5 +109,31 @@ environment:
153
109
  PIP_PROGRESS_BAR: 'off'
154
110
  TQDM_DISABLE: '1'
155
111
  model:
112
+ observation_template: |
113
+ {%- if output.output | length < 10000 -%}
114
+ {
115
+ "returncode": {{ output.returncode }},
116
+ "output": {{ output.output | tojson }}
117
+ {%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %}
118
+ }
119
+ {%- else -%}
120
+ {
121
+ "returncode": {{ output.returncode }},
122
+ "output_head": {{ output.output[:5000] | tojson }},
123
+ "output_tail": {{ output.output[-5000:] | tojson }},
124
+ "elided_chars": {{ output.output | length - 10000 }},
125
+ "warning": "Output too long."
126
+ {%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %}
127
+ }
128
+ {%- endif -%}
129
+ format_error_template: |
130
+ Tool call error. Every response needs to use the 'bash' tool at least once to execute commands.
131
+
132
+ Call the bash tool with your command as the argument:
133
+ - Tool: bash
134
+ - Arguments: {"command": "your_command_here"}
135
+
136
+ If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
137
+ without any other command.
156
138
  model_kwargs:
157
- drop_params: true
139
+ drop_params: true