mini-swe-agent 1.17.4__py3-none-any.whl → 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
- mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +1 -1
- mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
- minisweagent/__init__.py +19 -26
- minisweagent/agents/default.py +128 -113
- minisweagent/agents/interactive.py +119 -58
- minisweagent/config/README.md +3 -4
- minisweagent/config/__init__.py +36 -1
- minisweagent/config/benchmarks/swebench.yaml +156 -0
- minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
- minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
- minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
- minisweagent/config/default.yaml +24 -21
- minisweagent/config/inspector.tcss +42 -0
- minisweagent/config/mini.yaml +53 -71
- minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
- minisweagent/environments/__init__.py +1 -0
- minisweagent/environments/docker.py +67 -20
- minisweagent/environments/extra/bubblewrap.py +86 -47
- minisweagent/environments/extra/swerex_docker.py +53 -20
- minisweagent/environments/extra/swerex_modal.py +90 -0
- minisweagent/environments/local.py +62 -21
- minisweagent/environments/singularity.py +59 -18
- minisweagent/exceptions.py +22 -0
- minisweagent/models/__init__.py +6 -7
- minisweagent/models/extra/roulette.py +20 -17
- minisweagent/models/litellm_model.py +90 -44
- minisweagent/models/litellm_response_model.py +80 -0
- minisweagent/models/litellm_textbased_model.py +45 -0
- minisweagent/models/openrouter_model.py +87 -45
- minisweagent/models/openrouter_response_model.py +123 -0
- minisweagent/models/openrouter_textbased_model.py +76 -0
- minisweagent/models/portkey_model.py +84 -42
- minisweagent/models/portkey_response_model.py +163 -0
- minisweagent/models/requesty_model.py +91 -41
- minisweagent/models/test_models.py +246 -19
- minisweagent/models/utils/actions_text.py +60 -0
- minisweagent/models/utils/actions_toolcall.py +102 -0
- minisweagent/models/utils/actions_toolcall_response.py +110 -0
- minisweagent/models/utils/anthropic_utils.py +28 -0
- minisweagent/models/utils/cache_control.py +15 -2
- minisweagent/models/utils/content_string.py +74 -0
- minisweagent/models/utils/openai_multimodal.py +50 -0
- minisweagent/models/utils/retry.py +25 -0
- minisweagent/run/benchmarks/__init__.py +1 -0
- minisweagent/run/{extra → benchmarks}/swebench.py +57 -36
- minisweagent/run/benchmarks/swebench_single.py +89 -0
- minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
- minisweagent/run/hello_world.py +6 -0
- minisweagent/run/mini.py +54 -63
- minisweagent/run/utilities/__init__.py +1 -0
- minisweagent/run/{extra → utilities}/config.py +2 -0
- minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
- minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
- minisweagent/utils/serialize.py +26 -0
- mini_swe_agent-1.17.4.dist-info/RECORD +0 -61
- mini_swe_agent-1.17.4.dist-info/entry_points.txt +0 -5
- minisweagent/agents/interactive_textual.py +0 -450
- minisweagent/config/extra/swebench_roulette.yaml +0 -233
- minisweagent/config/mini.tcss +0 -86
- minisweagent/models/anthropic.py +0 -35
- minisweagent/models/litellm_response_api_model.py +0 -82
- minisweagent/models/portkey_response_api_model.py +0 -75
- minisweagent/models/utils/key_per_thread.py +0 -20
- minisweagent/models/utils/openai_utils.py +0 -41
- minisweagent/run/extra/swebench_single.py +0 -79
- minisweagent/run/github_issue.py +0 -87
- minisweagent/run/utils/__init__.py +0 -0
- minisweagent/run/utils/save.py +0 -78
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
- /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
- /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
|
@@ -1,233 +0,0 @@
|
|
|
1
|
-
agent:
|
|
2
|
-
system_template: |
|
|
3
|
-
You are a helpful assistant that can interact multiple times with a computer shell to solve programming tasks.
|
|
4
|
-
Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||).
|
|
5
|
-
|
|
6
|
-
Include a THOUGHT section before your command where you explain your reasoning process.
|
|
7
|
-
Format your response as shown in <format_example>.
|
|
8
|
-
|
|
9
|
-
<format_example>
|
|
10
|
-
THOUGHT: Your reasoning and analysis here
|
|
11
|
-
|
|
12
|
-
```bash
|
|
13
|
-
your_command_here
|
|
14
|
-
```
|
|
15
|
-
</format_example>
|
|
16
|
-
|
|
17
|
-
Failure to follow these rules will cause your response to be rejected.
|
|
18
|
-
instance_template: |
|
|
19
|
-
<pr_description>
|
|
20
|
-
Consider the following PR description:
|
|
21
|
-
{{task}}
|
|
22
|
-
</pr_description>
|
|
23
|
-
|
|
24
|
-
<instructions>
|
|
25
|
-
# Task Instructions
|
|
26
|
-
|
|
27
|
-
## Overview
|
|
28
|
-
You're a software engineer interacting continuously with a computer by submitting commands.
|
|
29
|
-
You'll be helping implement necessary changes to meet requirements in the PR description.
|
|
30
|
-
Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
|
|
31
|
-
|
|
32
|
-
IMPORTANT: This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.
|
|
33
|
-
|
|
34
|
-
For each response:
|
|
35
|
-
1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
|
|
36
|
-
2. Provide exactly ONE bash command to execute
|
|
37
|
-
|
|
38
|
-
## Important Boundaries
|
|
39
|
-
- MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
|
|
40
|
-
- DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
|
|
41
|
-
|
|
42
|
-
## Recommended Workflow
|
|
43
|
-
1. Analyze the codebase by finding and reading relevant files
|
|
44
|
-
2. Create a script to reproduce the issue
|
|
45
|
-
3. Edit the source code to resolve the issue
|
|
46
|
-
4. Verify your fix works by running your script again
|
|
47
|
-
5. Test edge cases to ensure your fix is robust
|
|
48
|
-
|
|
49
|
-
## Command Execution Rules
|
|
50
|
-
You are operating in an environment where
|
|
51
|
-
1. You write a single command
|
|
52
|
-
2. The system executes that command in a subshell
|
|
53
|
-
3. You see the result
|
|
54
|
-
4. You write your next command
|
|
55
|
-
|
|
56
|
-
Each response should include:
|
|
57
|
-
1. A **THOUGHT** section where you explain your reasoning and plan
|
|
58
|
-
2. A single bash code block with your command
|
|
59
|
-
|
|
60
|
-
Format your responses like this:
|
|
61
|
-
|
|
62
|
-
<format_example>
|
|
63
|
-
THOUGHT: Here I explain my reasoning process, analysis of the current situation,
|
|
64
|
-
and what I'm trying to accomplish with the command below.
|
|
65
|
-
|
|
66
|
-
```bash
|
|
67
|
-
your_command_here
|
|
68
|
-
```
|
|
69
|
-
</format_example>
|
|
70
|
-
|
|
71
|
-
Commands must be specified in a single bash code block:
|
|
72
|
-
|
|
73
|
-
```bash
|
|
74
|
-
your_command_here
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
**CRITICAL REQUIREMENTS:**
|
|
78
|
-
- Your response SHOULD include a THOUGHT section explaining your reasoning
|
|
79
|
-
- Your response MUST include EXACTLY ONE bash code block
|
|
80
|
-
- This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
|
|
81
|
-
- If you include zero or multiple bash blocks, or no command at all, YOUR RESPONSE WILL FAIL
|
|
82
|
-
- Do NOT try to run multiple independent commands in separate blocks in one response
|
|
83
|
-
- Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
|
|
84
|
-
- However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
|
|
85
|
-
|
|
86
|
-
Example of a CORRECT response:
|
|
87
|
-
<example_response>
|
|
88
|
-
THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
|
|
89
|
-
|
|
90
|
-
```bash
|
|
91
|
-
ls -la
|
|
92
|
-
```
|
|
93
|
-
</example_response>
|
|
94
|
-
|
|
95
|
-
Example of an INCORRECT response:
|
|
96
|
-
<example_response>
|
|
97
|
-
THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
|
|
98
|
-
|
|
99
|
-
```bash
|
|
100
|
-
ls -la
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
Now I'll read the file:
|
|
104
|
-
|
|
105
|
-
```bash
|
|
106
|
-
cat file.txt
|
|
107
|
-
```
|
|
108
|
-
</example_response>
|
|
109
|
-
|
|
110
|
-
If you need to run multiple commands, either:
|
|
111
|
-
1. Combine them in one block using && or ||
|
|
112
|
-
```bash
|
|
113
|
-
command1 && command2 || echo "Error occurred"
|
|
114
|
-
```
|
|
115
|
-
|
|
116
|
-
2. Wait for the first command to complete, see its output, then issue the next command in your following response.
|
|
117
|
-
|
|
118
|
-
## Environment Details
|
|
119
|
-
- You have a full Linux shell environment
|
|
120
|
-
- Always use non-interactive flags (-y, -f) for commands
|
|
121
|
-
- Avoid interactive tools like vi, nano, or any that require user input
|
|
122
|
-
- If a command isn't available, you can install it
|
|
123
|
-
|
|
124
|
-
## Useful Command Examples
|
|
125
|
-
|
|
126
|
-
### Create a new file:
|
|
127
|
-
```bash
|
|
128
|
-
cat <<'EOF' > newfile.py
|
|
129
|
-
import numpy as np
|
|
130
|
-
hello = "world"
|
|
131
|
-
print(hello)
|
|
132
|
-
EOF
|
|
133
|
-
```
|
|
134
|
-
|
|
135
|
-
### Edit files with sed:
|
|
136
|
-
```bash
|
|
137
|
-
# Replace all occurrences
|
|
138
|
-
sed -i 's/old_string/new_string/g' filename.py
|
|
139
|
-
|
|
140
|
-
# Replace only first occurrence
|
|
141
|
-
sed -i 's/old_string/new_string/' filename.py
|
|
142
|
-
|
|
143
|
-
# Replace first occurrence on line 1
|
|
144
|
-
sed -i '1s/old_string/new_string/' filename.py
|
|
145
|
-
|
|
146
|
-
# Replace all occurrences in lines 1-10
|
|
147
|
-
sed -i '1,10s/old_string/new_string/g' filename.py
|
|
148
|
-
```
|
|
149
|
-
|
|
150
|
-
### View file content:
|
|
151
|
-
```bash
|
|
152
|
-
# View specific lines with numbers
|
|
153
|
-
nl -ba filename.py | sed -n '10,20p'
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
### Any other command you want to run
|
|
157
|
-
```bash
|
|
158
|
-
anything
|
|
159
|
-
```
|
|
160
|
-
|
|
161
|
-
## Submission
|
|
162
|
-
When you've completed your work (reading, editing, testing), and cannot make further progress
|
|
163
|
-
issue exactly the following command:
|
|
164
|
-
|
|
165
|
-
```bash
|
|
166
|
-
echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached
|
|
167
|
-
```
|
|
168
|
-
|
|
169
|
-
This command will submit your work.
|
|
170
|
-
You cannot continue working (reading, editing, testing) in any way on this task after submitting.
|
|
171
|
-
</instructions>
|
|
172
|
-
action_observation_template: |
|
|
173
|
-
<returncode>{{output.returncode}}</returncode>
|
|
174
|
-
{% if output.output | length < 10000 -%}
|
|
175
|
-
<output>
|
|
176
|
-
{{ output.output -}}
|
|
177
|
-
</output>
|
|
178
|
-
{%- else -%}
|
|
179
|
-
<warning>
|
|
180
|
-
The output of your last command was too long.
|
|
181
|
-
Please try a different command that produces less output.
|
|
182
|
-
If you're looking at a file you can try use head, tail or sed to view a smaller number of lines selectively.
|
|
183
|
-
If you're using grep or find and it produced too much output, you can use a more selective search pattern.
|
|
184
|
-
If you really need to see something from the full command's output, you can redirect output to a file and then search in that file.
|
|
185
|
-
</warning>
|
|
186
|
-
{%- set elided_chars = output.output | length - 10000 -%}
|
|
187
|
-
<output_head>
|
|
188
|
-
{{ output.output[:5000] }}
|
|
189
|
-
</output_head>
|
|
190
|
-
<elided_chars>
|
|
191
|
-
{{ elided_chars }} characters elided
|
|
192
|
-
</elided_chars>
|
|
193
|
-
<output_tail>
|
|
194
|
-
{{ output.output[-5000:] }}
|
|
195
|
-
</output_tail>
|
|
196
|
-
{%- endif -%}
|
|
197
|
-
format_error_template: |
|
|
198
|
-
Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
|
|
199
|
-
|
|
200
|
-
Please format your action in triple backticks as shown in <response_example>.
|
|
201
|
-
|
|
202
|
-
<response_example>
|
|
203
|
-
Here are some thoughts about why you want to perform the action.
|
|
204
|
-
|
|
205
|
-
```bash
|
|
206
|
-
<action>
|
|
207
|
-
```
|
|
208
|
-
</response_example>
|
|
209
|
-
|
|
210
|
-
If you have completed your assignment, please consult the first message about how to
|
|
211
|
-
submit your solution (you will not be able to continue working on this task after that).
|
|
212
|
-
step_limit: 250
|
|
213
|
-
cost_limit: 3.
|
|
214
|
-
|
|
215
|
-
environment:
|
|
216
|
-
cwd: "/testbed"
|
|
217
|
-
timeout: 60
|
|
218
|
-
env:
|
|
219
|
-
PAGER: cat
|
|
220
|
-
MANPAGER: cat
|
|
221
|
-
LESS: -R
|
|
222
|
-
PIP_PROGRESS_BAR: 'off'
|
|
223
|
-
TQDM_DISABLE: '1'
|
|
224
|
-
environment_class: docker
|
|
225
|
-
|
|
226
|
-
model:
|
|
227
|
-
model_name: "roulette"
|
|
228
|
-
model_class: "minisweagent.models.extra.roulette.RouletteModel"
|
|
229
|
-
model_kwargs:
|
|
230
|
-
- model_name: "anthropic/claude-sonnet-4-5-20250929"
|
|
231
|
-
model_kwargs:
|
|
232
|
-
temperature: 0.
|
|
233
|
-
- model_name: "gpt-5"
|
minisweagent/config/mini.tcss
DELETED
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
Screen {
|
|
2
|
-
layout: grid;
|
|
3
|
-
grid-size: 1;
|
|
4
|
-
grid-rows: auto 1fr auto;
|
|
5
|
-
}
|
|
6
|
-
|
|
7
|
-
#main {
|
|
8
|
-
height: 100%;
|
|
9
|
-
padding: 1;
|
|
10
|
-
layout: vertical;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
Footer {
|
|
14
|
-
dock: bottom;
|
|
15
|
-
content-align: center middle;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
#content {
|
|
19
|
-
height: auto;
|
|
20
|
-
min-height: 0;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
.smart-input-container {
|
|
24
|
-
height: auto;
|
|
25
|
-
margin-top: 0;
|
|
26
|
-
padding: 0;
|
|
27
|
-
min-height: 0;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
.multi-input {
|
|
31
|
-
height: auto;
|
|
32
|
-
max-height: 20;
|
|
33
|
-
min-height: 3;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
.prompt-display {
|
|
37
|
-
margin-bottom: 1;
|
|
38
|
-
padding: 0 1;
|
|
39
|
-
text-style: bold;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
.hint-text{
|
|
43
|
-
margin-top: 1;
|
|
44
|
-
margin-bottom: 1;
|
|
45
|
-
padding: 0 1;
|
|
46
|
-
color: white;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
.message-container {
|
|
50
|
-
margin: 1;
|
|
51
|
-
padding: 1;
|
|
52
|
-
background: $surface;
|
|
53
|
-
height: auto;
|
|
54
|
-
width: 100%;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
.message-header {
|
|
58
|
-
text-align: left;
|
|
59
|
-
color: $primary;
|
|
60
|
-
padding: 0 1;
|
|
61
|
-
text-style: bold;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
.input-request-header {
|
|
65
|
-
color: $warning;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
.message-content {
|
|
69
|
-
margin-top: 1;
|
|
70
|
-
padding: 0 1;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
Header.running {
|
|
74
|
-
background: $error;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
.button-container {
|
|
78
|
-
layout: horizontal;
|
|
79
|
-
align-horizontal: center;
|
|
80
|
-
margin-top: 1;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
.button-container Button {
|
|
84
|
-
margin: 0 1;
|
|
85
|
-
min-width: 10;
|
|
86
|
-
}
|
minisweagent/models/anthropic.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import warnings
|
|
3
|
-
from typing import Literal
|
|
4
|
-
|
|
5
|
-
from minisweagent.models.litellm_model import LitellmModel, LitellmModelConfig
|
|
6
|
-
from minisweagent.models.utils.cache_control import set_cache_control
|
|
7
|
-
from minisweagent.models.utils.key_per_thread import get_key_per_thread
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class AnthropicModelConfig(LitellmModelConfig):
|
|
11
|
-
set_cache_control: Literal["default_end"] | None = "default_end"
|
|
12
|
-
"""Set explicit cache control markers, for example for Anthropic models"""
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class AnthropicModel(LitellmModel):
|
|
16
|
-
"""This class is now only a thin wrapper around the LitellmModel class.
|
|
17
|
-
It is largely kept for backwards compatibility.
|
|
18
|
-
It will not be selected by `get_model` and `get_model_class` unless explicitly specified.
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
def __init__(self, *, config_class: type = AnthropicModelConfig, **kwargs):
|
|
22
|
-
super().__init__(config_class=config_class, **kwargs)
|
|
23
|
-
|
|
24
|
-
def query(self, messages: list[dict], **kwargs) -> dict:
|
|
25
|
-
api_key = None
|
|
26
|
-
# Legacy only
|
|
27
|
-
if rotating_keys := os.getenv("ANTHROPIC_API_KEYS"):
|
|
28
|
-
warnings.warn(
|
|
29
|
-
"ANTHROPIC_API_KEYS is deprecated and will be removed in the future. "
|
|
30
|
-
"Simply use the ANTHROPIC_API_KEY environment variable instead. "
|
|
31
|
-
"Key rotation is no longer required."
|
|
32
|
-
)
|
|
33
|
-
api_key = get_key_per_thread(rotating_keys.split("::"))
|
|
34
|
-
messages = set_cache_control(messages, mode="default_end")
|
|
35
|
-
return super().query(messages, api_key=api_key, **kwargs)
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from collections.abc import Callable
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
import litellm
|
|
6
|
-
from tenacity import (
|
|
7
|
-
before_sleep_log,
|
|
8
|
-
retry,
|
|
9
|
-
retry_if_not_exception_type,
|
|
10
|
-
stop_after_attempt,
|
|
11
|
-
wait_exponential,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
from minisweagent.models.litellm_model import LitellmModel, LitellmModelConfig
|
|
15
|
-
from minisweagent.models.utils.openai_utils import coerce_responses_text
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger("litellm_response_api_model")
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
@dataclass
|
|
21
|
-
class LitellmResponseAPIModelConfig(LitellmModelConfig):
|
|
22
|
-
pass
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class LitellmResponseAPIModel(LitellmModel):
|
|
26
|
-
def __init__(self, *, config_class: Callable = LitellmResponseAPIModelConfig, **kwargs):
|
|
27
|
-
super().__init__(config_class=config_class, **kwargs)
|
|
28
|
-
self._previous_response_id: str | None = None
|
|
29
|
-
|
|
30
|
-
@retry(
|
|
31
|
-
reraise=True,
|
|
32
|
-
stop=stop_after_attempt(10),
|
|
33
|
-
wait=wait_exponential(multiplier=1, min=4, max=60),
|
|
34
|
-
before_sleep=before_sleep_log(logger, logging.WARNING),
|
|
35
|
-
retry=retry_if_not_exception_type(
|
|
36
|
-
(
|
|
37
|
-
litellm.exceptions.UnsupportedParamsError,
|
|
38
|
-
litellm.exceptions.NotFoundError,
|
|
39
|
-
litellm.exceptions.PermissionDeniedError,
|
|
40
|
-
litellm.exceptions.ContextWindowExceededError,
|
|
41
|
-
litellm.exceptions.APIError,
|
|
42
|
-
litellm.exceptions.AuthenticationError,
|
|
43
|
-
KeyboardInterrupt,
|
|
44
|
-
)
|
|
45
|
-
),
|
|
46
|
-
)
|
|
47
|
-
def _query(self, messages: list[dict[str, str]], **kwargs):
|
|
48
|
-
try:
|
|
49
|
-
# Remove 'timestamp' field added by agent - not supported by OpenAI responses API
|
|
50
|
-
clean_messages = [{"role": msg["role"], "content": msg["content"]} for msg in messages]
|
|
51
|
-
resp = litellm.responses(
|
|
52
|
-
model=self.config.model_name,
|
|
53
|
-
input=clean_messages if self._previous_response_id is None else clean_messages[-1:],
|
|
54
|
-
previous_response_id=self._previous_response_id,
|
|
55
|
-
**(self.config.model_kwargs | kwargs),
|
|
56
|
-
)
|
|
57
|
-
self._previous_response_id = getattr(resp, "id", None)
|
|
58
|
-
return resp
|
|
59
|
-
except litellm.exceptions.AuthenticationError as e:
|
|
60
|
-
e.message += " You can permanently set your API key with `mini-extra config set KEY VALUE`."
|
|
61
|
-
raise e
|
|
62
|
-
|
|
63
|
-
def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
|
|
64
|
-
response = self._query(messages, **kwargs)
|
|
65
|
-
text = coerce_responses_text(response)
|
|
66
|
-
try:
|
|
67
|
-
cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
|
|
68
|
-
except Exception as e:
|
|
69
|
-
logger.critical(
|
|
70
|
-
f"Error calculating cost for model {self.config.model_name}: {e}. "
|
|
71
|
-
"Please check the 'Updating the model registry' section in the documentation. "
|
|
72
|
-
"http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
|
|
73
|
-
)
|
|
74
|
-
raise
|
|
75
|
-
self.n_calls += 1
|
|
76
|
-
self.cost += cost
|
|
77
|
-
from minisweagent.models import GLOBAL_MODEL_STATS
|
|
78
|
-
|
|
79
|
-
GLOBAL_MODEL_STATS.add(cost)
|
|
80
|
-
return {
|
|
81
|
-
"content": text,
|
|
82
|
-
}
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import os
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
|
|
5
|
-
import litellm
|
|
6
|
-
from tenacity import (
|
|
7
|
-
before_sleep_log,
|
|
8
|
-
retry,
|
|
9
|
-
retry_if_not_exception_type,
|
|
10
|
-
stop_after_attempt,
|
|
11
|
-
wait_exponential,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
from minisweagent.models import GLOBAL_MODEL_STATS
|
|
15
|
-
from minisweagent.models.portkey_model import PortkeyModel, PortkeyModelConfig
|
|
16
|
-
from minisweagent.models.utils.cache_control import set_cache_control
|
|
17
|
-
from minisweagent.models.utils.openai_utils import coerce_responses_text
|
|
18
|
-
|
|
19
|
-
logger = logging.getLogger("portkey_response_api_model")
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
@dataclass
|
|
23
|
-
class PortkeyResponseAPIModelConfig(PortkeyModelConfig):
|
|
24
|
-
pass
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class PortkeyResponseAPIModel(PortkeyModel):
|
|
28
|
-
def __init__(self, *, config_class: type = PortkeyResponseAPIModelConfig, **kwargs):
|
|
29
|
-
super().__init__(config_class=config_class, **kwargs)
|
|
30
|
-
self._previous_response_id: str | None = None
|
|
31
|
-
|
|
32
|
-
@retry(
|
|
33
|
-
reraise=True,
|
|
34
|
-
stop=stop_after_attempt(int(os.getenv("MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT", "10"))),
|
|
35
|
-
wait=wait_exponential(multiplier=1, min=4, max=60),
|
|
36
|
-
before_sleep=before_sleep_log(logger, logging.WARNING),
|
|
37
|
-
retry=retry_if_not_exception_type((KeyboardInterrupt, TypeError, ValueError)),
|
|
38
|
-
)
|
|
39
|
-
def _query(self, messages: list[dict[str, str]], **kwargs):
|
|
40
|
-
input_messages = messages if self._previous_response_id is None else messages[-1:]
|
|
41
|
-
resp = self.client.responses.create(
|
|
42
|
-
model=self.config.model_name,
|
|
43
|
-
input=input_messages,
|
|
44
|
-
previous_response_id=self._previous_response_id,
|
|
45
|
-
**(self.config.model_kwargs | kwargs),
|
|
46
|
-
)
|
|
47
|
-
self._previous_response_id = getattr(resp, "id", None)
|
|
48
|
-
return resp
|
|
49
|
-
|
|
50
|
-
def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
|
|
51
|
-
if self.config.set_cache_control:
|
|
52
|
-
messages = set_cache_control(messages, mode=self.config.set_cache_control)
|
|
53
|
-
response = self._query(messages, **kwargs)
|
|
54
|
-
text = coerce_responses_text(response)
|
|
55
|
-
try:
|
|
56
|
-
cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
|
|
57
|
-
assert cost > 0.0, f"Cost is not positive: {cost}"
|
|
58
|
-
except Exception as e:
|
|
59
|
-
if self.config.cost_tracking != "ignore_errors":
|
|
60
|
-
raise RuntimeError(
|
|
61
|
-
f"Error calculating cost for model {self.config.model_name}: {e}. "
|
|
62
|
-
"You can ignore this issue from your config file with cost_tracking: 'ignore_errors' or "
|
|
63
|
-
"globally with export MSWEA_COST_TRACKING='ignore_errors' to ignore this error. "
|
|
64
|
-
) from e
|
|
65
|
-
cost = 0.0
|
|
66
|
-
self.n_calls += 1
|
|
67
|
-
self.cost += cost
|
|
68
|
-
GLOBAL_MODEL_STATS.add(cost)
|
|
69
|
-
return {
|
|
70
|
-
"content": text,
|
|
71
|
-
"extra": {
|
|
72
|
-
"response": response.model_dump() if hasattr(response, "model_dump") else {},
|
|
73
|
-
"cost": cost,
|
|
74
|
-
},
|
|
75
|
-
}
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
"""Utility for anthropic where we need different keys for different parallel
|
|
2
|
-
agents to not mess up prompt caching.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import threading
|
|
6
|
-
import warnings
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
|
-
_THREADS_THAT_USED_API_KEYS: list[Any] = []
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def get_key_per_thread(api_keys: list[Any]) -> Any:
|
|
13
|
-
"""Choose key based on thread name. Returns None if no keys are available."""
|
|
14
|
-
warnings.warn("get_key_per_thread is deprecated and will be removed in the future")
|
|
15
|
-
thread_name = threading.current_thread().name
|
|
16
|
-
if thread_name not in _THREADS_THAT_USED_API_KEYS:
|
|
17
|
-
_THREADS_THAT_USED_API_KEYS.append(thread_name)
|
|
18
|
-
thread_idx = _THREADS_THAT_USED_API_KEYS.index(thread_name)
|
|
19
|
-
key_idx = thread_idx % len(api_keys)
|
|
20
|
-
return api_keys[key_idx] or None
|
|
@@ -1,41 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from typing import Any
|
|
3
|
-
|
|
4
|
-
from openai.types.responses.response_output_message import ResponseOutputMessage
|
|
5
|
-
|
|
6
|
-
logger = logging.getLogger("openai_utils")
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def coerce_responses_text(resp: Any) -> str:
|
|
10
|
-
"""Helper to normalize OpenAI Responses API result to text.
|
|
11
|
-
|
|
12
|
-
Works with both OpenAI client responses and LiteLLM/Portkey responses.
|
|
13
|
-
"""
|
|
14
|
-
text = getattr(resp, "output_text", None)
|
|
15
|
-
if isinstance(text, str) and text:
|
|
16
|
-
return text
|
|
17
|
-
|
|
18
|
-
try:
|
|
19
|
-
output = []
|
|
20
|
-
for item in resp.output:
|
|
21
|
-
if isinstance(item, dict):
|
|
22
|
-
content = item.get("content", [])
|
|
23
|
-
elif isinstance(item, ResponseOutputMessage):
|
|
24
|
-
content = item.content
|
|
25
|
-
else:
|
|
26
|
-
continue
|
|
27
|
-
|
|
28
|
-
for content_item in content:
|
|
29
|
-
if isinstance(content_item, dict):
|
|
30
|
-
text_val = content_item.get("text")
|
|
31
|
-
elif hasattr(content_item, "text"):
|
|
32
|
-
text_val = content_item.text
|
|
33
|
-
else:
|
|
34
|
-
continue
|
|
35
|
-
|
|
36
|
-
if text_val:
|
|
37
|
-
output.append(text_val)
|
|
38
|
-
return "\n\n".join(output) or ""
|
|
39
|
-
except (AttributeError, IndexError, TypeError):
|
|
40
|
-
logger.warning(f"Could not extract text from response: {resp}")
|
|
41
|
-
return ""
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
"""Run on a single SWE-Bench instance."""
|
|
2
|
-
|
|
3
|
-
import traceback
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
import typer
|
|
7
|
-
import yaml
|
|
8
|
-
from datasets import load_dataset
|
|
9
|
-
|
|
10
|
-
from minisweagent import global_config_dir
|
|
11
|
-
from minisweagent.agents.interactive import InteractiveAgent
|
|
12
|
-
from minisweagent.config import builtin_config_dir, get_config_path
|
|
13
|
-
from minisweagent.models import get_model
|
|
14
|
-
from minisweagent.run.extra.swebench import (
|
|
15
|
-
DATASET_MAPPING,
|
|
16
|
-
get_sb_environment,
|
|
17
|
-
)
|
|
18
|
-
from minisweagent.run.utils.save import save_traj
|
|
19
|
-
from minisweagent.utils.log import logger
|
|
20
|
-
|
|
21
|
-
app = typer.Typer(add_completion=False)
|
|
22
|
-
|
|
23
|
-
DEFAULT_OUTPUT = global_config_dir / "last_swebench_single_run.traj.json"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
# fmt: off
|
|
27
|
-
@app.command()
|
|
28
|
-
def main(
|
|
29
|
-
subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
|
|
30
|
-
split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
|
|
31
|
-
instance_spec: str = typer.Option(0, "-i", "--instance", help="SWE-Bench instance ID or index", rich_help_panel="Data selection"),
|
|
32
|
-
model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
|
|
33
|
-
model_class: str | None = typer.Option(None, "-c", "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
|
|
34
|
-
config_path: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
|
|
35
|
-
environment_class: str | None = typer.Option(None, "--environment-class", rich_help_panel="Advanced"),
|
|
36
|
-
exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Basic"),
|
|
37
|
-
output: Path = typer.Option(DEFAULT_OUTPUT, "-o", "--output", help="Output trajectory file", rich_help_panel="Basic"),
|
|
38
|
-
) -> None:
|
|
39
|
-
# fmt: on
|
|
40
|
-
"""Run on a single SWE-Bench instance."""
|
|
41
|
-
dataset_path = DATASET_MAPPING.get(subset, subset)
|
|
42
|
-
logger.info(f"Loading dataset from {dataset_path}, split {split}...")
|
|
43
|
-
instances = {
|
|
44
|
-
inst["instance_id"]: inst # type: ignore
|
|
45
|
-
for inst in load_dataset(dataset_path, split=split)
|
|
46
|
-
}
|
|
47
|
-
if instance_spec.isnumeric():
|
|
48
|
-
instance_spec = sorted(instances.keys())[int(instance_spec)]
|
|
49
|
-
instance: dict = instances[instance_spec] # type: ignore
|
|
50
|
-
|
|
51
|
-
config_path = get_config_path(config_path)
|
|
52
|
-
logger.info(f"Loading agent config from '{config_path}'")
|
|
53
|
-
config = yaml.safe_load(config_path.read_text())
|
|
54
|
-
if environment_class is not None:
|
|
55
|
-
config.setdefault("environment", {})["environment_class"] = environment_class
|
|
56
|
-
if model_class is not None:
|
|
57
|
-
config.setdefault("model", {})["model_class"] = model_class
|
|
58
|
-
if exit_immediately:
|
|
59
|
-
config.setdefault("agent", {})["confirm_exit"] = False
|
|
60
|
-
env = get_sb_environment(config, instance)
|
|
61
|
-
agent = InteractiveAgent(
|
|
62
|
-
get_model(model_name, config.get("model", {})),
|
|
63
|
-
env,
|
|
64
|
-
**({"mode": "yolo"} | config.get("agent", {})),
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
exit_status, result, extra_info = None, None, None
|
|
68
|
-
try:
|
|
69
|
-
exit_status, result = agent.run(instance["problem_statement"]) # type: ignore[arg-type]
|
|
70
|
-
except Exception as e:
|
|
71
|
-
logger.error(f"Error processing instance {instance_spec}: {e}", exc_info=True)
|
|
72
|
-
exit_status, result = type(e).__name__, str(e)
|
|
73
|
-
extra_info = {"traceback": traceback.format_exc()}
|
|
74
|
-
finally:
|
|
75
|
-
save_traj(agent, output, exit_status=exit_status, result=result, extra_info=extra_info) # type: ignore[arg-type]
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
if __name__ == "__main__":
|
|
79
|
-
app()
|