mini-swe-agent 1.17.4__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
  2. mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
  3. {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +1 -1
  4. mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
  5. minisweagent/__init__.py +19 -26
  6. minisweagent/agents/default.py +128 -113
  7. minisweagent/agents/interactive.py +119 -58
  8. minisweagent/config/README.md +3 -4
  9. minisweagent/config/__init__.py +36 -1
  10. minisweagent/config/benchmarks/swebench.yaml +156 -0
  11. minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
  12. minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
  13. minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
  14. minisweagent/config/default.yaml +24 -21
  15. minisweagent/config/inspector.tcss +42 -0
  16. minisweagent/config/mini.yaml +53 -71
  17. minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
  18. minisweagent/environments/__init__.py +1 -0
  19. minisweagent/environments/docker.py +67 -20
  20. minisweagent/environments/extra/bubblewrap.py +86 -47
  21. minisweagent/environments/extra/swerex_docker.py +53 -20
  22. minisweagent/environments/extra/swerex_modal.py +90 -0
  23. minisweagent/environments/local.py +62 -21
  24. minisweagent/environments/singularity.py +59 -18
  25. minisweagent/exceptions.py +22 -0
  26. minisweagent/models/__init__.py +6 -7
  27. minisweagent/models/extra/roulette.py +20 -17
  28. minisweagent/models/litellm_model.py +90 -44
  29. minisweagent/models/litellm_response_model.py +80 -0
  30. minisweagent/models/litellm_textbased_model.py +45 -0
  31. minisweagent/models/openrouter_model.py +87 -45
  32. minisweagent/models/openrouter_response_model.py +123 -0
  33. minisweagent/models/openrouter_textbased_model.py +76 -0
  34. minisweagent/models/portkey_model.py +84 -42
  35. minisweagent/models/portkey_response_model.py +163 -0
  36. minisweagent/models/requesty_model.py +91 -41
  37. minisweagent/models/test_models.py +246 -19
  38. minisweagent/models/utils/actions_text.py +60 -0
  39. minisweagent/models/utils/actions_toolcall.py +102 -0
  40. minisweagent/models/utils/actions_toolcall_response.py +110 -0
  41. minisweagent/models/utils/anthropic_utils.py +28 -0
  42. minisweagent/models/utils/cache_control.py +15 -2
  43. minisweagent/models/utils/content_string.py +74 -0
  44. minisweagent/models/utils/openai_multimodal.py +50 -0
  45. minisweagent/models/utils/retry.py +25 -0
  46. minisweagent/run/benchmarks/__init__.py +1 -0
  47. minisweagent/run/{extra → benchmarks}/swebench.py +57 -36
  48. minisweagent/run/benchmarks/swebench_single.py +89 -0
  49. minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
  50. minisweagent/run/hello_world.py +6 -0
  51. minisweagent/run/mini.py +54 -63
  52. minisweagent/run/utilities/__init__.py +1 -0
  53. minisweagent/run/{extra → utilities}/config.py +2 -0
  54. minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
  55. minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
  56. minisweagent/utils/serialize.py +26 -0
  57. mini_swe_agent-1.17.4.dist-info/RECORD +0 -61
  58. mini_swe_agent-1.17.4.dist-info/entry_points.txt +0 -5
  59. minisweagent/agents/interactive_textual.py +0 -450
  60. minisweagent/config/extra/swebench_roulette.yaml +0 -233
  61. minisweagent/config/mini.tcss +0 -86
  62. minisweagent/models/anthropic.py +0 -35
  63. minisweagent/models/litellm_response_api_model.py +0 -82
  64. minisweagent/models/portkey_response_api_model.py +0 -75
  65. minisweagent/models/utils/key_per_thread.py +0 -20
  66. minisweagent/models/utils/openai_utils.py +0 -41
  67. minisweagent/run/extra/swebench_single.py +0 -79
  68. minisweagent/run/github_issue.py +0 -87
  69. minisweagent/run/utils/__init__.py +0 -0
  70. minisweagent/run/utils/save.py +0 -78
  71. {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
  72. {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
  73. /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
  74. /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
@@ -1,233 +0,0 @@
1
- agent:
2
- system_template: |
3
- You are a helpful assistant that can interact multiple times with a computer shell to solve programming tasks.
4
- Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||).
5
-
6
- Include a THOUGHT section before your command where you explain your reasoning process.
7
- Format your response as shown in <format_example>.
8
-
9
- <format_example>
10
- THOUGHT: Your reasoning and analysis here
11
-
12
- ```bash
13
- your_command_here
14
- ```
15
- </format_example>
16
-
17
- Failure to follow these rules will cause your response to be rejected.
18
- instance_template: |
19
- <pr_description>
20
- Consider the following PR description:
21
- {{task}}
22
- </pr_description>
23
-
24
- <instructions>
25
- # Task Instructions
26
-
27
- ## Overview
28
- You're a software engineer interacting continuously with a computer by submitting commands.
29
- You'll be helping implement necessary changes to meet requirements in the PR description.
30
- Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
31
-
32
- IMPORTANT: This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.
33
-
34
- For each response:
35
- 1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
36
- 2. Provide exactly ONE bash command to execute
37
-
38
- ## Important Boundaries
39
- - MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
40
- - DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
41
-
42
- ## Recommended Workflow
43
- 1. Analyze the codebase by finding and reading relevant files
44
- 2. Create a script to reproduce the issue
45
- 3. Edit the source code to resolve the issue
46
- 4. Verify your fix works by running your script again
47
- 5. Test edge cases to ensure your fix is robust
48
-
49
- ## Command Execution Rules
50
- You are operating in an environment where
51
- 1. You write a single command
52
- 2. The system executes that command in a subshell
53
- 3. You see the result
54
- 4. You write your next command
55
-
56
- Each response should include:
57
- 1. A **THOUGHT** section where you explain your reasoning and plan
58
- 2. A single bash code block with your command
59
-
60
- Format your responses like this:
61
-
62
- <format_example>
63
- THOUGHT: Here I explain my reasoning process, analysis of the current situation,
64
- and what I'm trying to accomplish with the command below.
65
-
66
- ```bash
67
- your_command_here
68
- ```
69
- </format_example>
70
-
71
- Commands must be specified in a single bash code block:
72
-
73
- ```bash
74
- your_command_here
75
- ```
76
-
77
- **CRITICAL REQUIREMENTS:**
78
- - Your response SHOULD include a THOUGHT section explaining your reasoning
79
- - Your response MUST include EXACTLY ONE bash code block
80
- - This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
81
- - If you include zero or multiple bash blocks, or no command at all, YOUR RESPONSE WILL FAIL
82
- - Do NOT try to run multiple independent commands in separate blocks in one response
83
- - Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
84
- - However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
85
-
86
- Example of a CORRECT response:
87
- <example_response>
88
- THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
89
-
90
- ```bash
91
- ls -la
92
- ```
93
- </example_response>
94
-
95
- Example of an INCORRECT response:
96
- <example_response>
97
- THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
98
-
99
- ```bash
100
- ls -la
101
- ```
102
-
103
- Now I'll read the file:
104
-
105
- ```bash
106
- cat file.txt
107
- ```
108
- </example_response>
109
-
110
- If you need to run multiple commands, either:
111
- 1. Combine them in one block using && or ||
112
- ```bash
113
- command1 && command2 || echo "Error occurred"
114
- ```
115
-
116
- 2. Wait for the first command to complete, see its output, then issue the next command in your following response.
117
-
118
- ## Environment Details
119
- - You have a full Linux shell environment
120
- - Always use non-interactive flags (-y, -f) for commands
121
- - Avoid interactive tools like vi, nano, or any that require user input
122
- - If a command isn't available, you can install it
123
-
124
- ## Useful Command Examples
125
-
126
- ### Create a new file:
127
- ```bash
128
- cat <<'EOF' > newfile.py
129
- import numpy as np
130
- hello = "world"
131
- print(hello)
132
- EOF
133
- ```
134
-
135
- ### Edit files with sed:
136
- ```bash
137
- # Replace all occurrences
138
- sed -i 's/old_string/new_string/g' filename.py
139
-
140
- # Replace only first occurrence
141
- sed -i 's/old_string/new_string/' filename.py
142
-
143
- # Replace first occurrence on line 1
144
- sed -i '1s/old_string/new_string/' filename.py
145
-
146
- # Replace all occurrences in lines 1-10
147
- sed -i '1,10s/old_string/new_string/g' filename.py
148
- ```
149
-
150
- ### View file content:
151
- ```bash
152
- # View specific lines with numbers
153
- nl -ba filename.py | sed -n '10,20p'
154
- ```
155
-
156
- ### Any other command you want to run
157
- ```bash
158
- anything
159
- ```
160
-
161
- ## Submission
162
- When you've completed your work (reading, editing, testing), and cannot make further progress
163
- issue exactly the following command:
164
-
165
- ```bash
166
- echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached
167
- ```
168
-
169
- This command will submit your work.
170
- You cannot continue working (reading, editing, testing) in any way on this task after submitting.
171
- </instructions>
172
- action_observation_template: |
173
- <returncode>{{output.returncode}}</returncode>
174
- {% if output.output | length < 10000 -%}
175
- <output>
176
- {{ output.output -}}
177
- </output>
178
- {%- else -%}
179
- <warning>
180
- The output of your last command was too long.
181
- Please try a different command that produces less output.
182
- If you're looking at a file you can try use head, tail or sed to view a smaller number of lines selectively.
183
- If you're using grep or find and it produced too much output, you can use a more selective search pattern.
184
- If you really need to see something from the full command's output, you can redirect output to a file and then search in that file.
185
- </warning>
186
- {%- set elided_chars = output.output | length - 10000 -%}
187
- <output_head>
188
- {{ output.output[:5000] }}
189
- </output_head>
190
- <elided_chars>
191
- {{ elided_chars }} characters elided
192
- </elided_chars>
193
- <output_tail>
194
- {{ output.output[-5000:] }}
195
- </output_tail>
196
- {%- endif -%}
197
- format_error_template: |
198
- Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
199
-
200
- Please format your action in triple backticks as shown in <response_example>.
201
-
202
- <response_example>
203
- Here are some thoughts about why you want to perform the action.
204
-
205
- ```bash
206
- <action>
207
- ```
208
- </response_example>
209
-
210
- If you have completed your assignment, please consult the first message about how to
211
- submit your solution (you will not be able to continue working on this task after that).
212
- step_limit: 250
213
- cost_limit: 3.
214
-
215
- environment:
216
- cwd: "/testbed"
217
- timeout: 60
218
- env:
219
- PAGER: cat
220
- MANPAGER: cat
221
- LESS: -R
222
- PIP_PROGRESS_BAR: 'off'
223
- TQDM_DISABLE: '1'
224
- environment_class: docker
225
-
226
- model:
227
- model_name: "roulette"
228
- model_class: "minisweagent.models.extra.roulette.RouletteModel"
229
- model_kwargs:
230
- - model_name: "anthropic/claude-sonnet-4-5-20250929"
231
- model_kwargs:
232
- temperature: 0.
233
- - model_name: "gpt-5"
@@ -1,86 +0,0 @@
1
- Screen {
2
- layout: grid;
3
- grid-size: 1;
4
- grid-rows: auto 1fr auto;
5
- }
6
-
7
- #main {
8
- height: 100%;
9
- padding: 1;
10
- layout: vertical;
11
- }
12
-
13
- Footer {
14
- dock: bottom;
15
- content-align: center middle;
16
- }
17
-
18
- #content {
19
- height: auto;
20
- min-height: 0;
21
- }
22
-
23
- .smart-input-container {
24
- height: auto;
25
- margin-top: 0;
26
- padding: 0;
27
- min-height: 0;
28
- }
29
-
30
- .multi-input {
31
- height: auto;
32
- max-height: 20;
33
- min-height: 3;
34
- }
35
-
36
- .prompt-display {
37
- margin-bottom: 1;
38
- padding: 0 1;
39
- text-style: bold;
40
- }
41
-
42
- .hint-text{
43
- margin-top: 1;
44
- margin-bottom: 1;
45
- padding: 0 1;
46
- color: white;
47
- }
48
-
49
- .message-container {
50
- margin: 1;
51
- padding: 1;
52
- background: $surface;
53
- height: auto;
54
- width: 100%;
55
- }
56
-
57
- .message-header {
58
- text-align: left;
59
- color: $primary;
60
- padding: 0 1;
61
- text-style: bold;
62
- }
63
-
64
- .input-request-header {
65
- color: $warning;
66
- }
67
-
68
- .message-content {
69
- margin-top: 1;
70
- padding: 0 1;
71
- }
72
-
73
- Header.running {
74
- background: $error;
75
- }
76
-
77
- .button-container {
78
- layout: horizontal;
79
- align-horizontal: center;
80
- margin-top: 1;
81
- }
82
-
83
- .button-container Button {
84
- margin: 0 1;
85
- min-width: 10;
86
- }
@@ -1,35 +0,0 @@
1
- import os
2
- import warnings
3
- from typing import Literal
4
-
5
- from minisweagent.models.litellm_model import LitellmModel, LitellmModelConfig
6
- from minisweagent.models.utils.cache_control import set_cache_control
7
- from minisweagent.models.utils.key_per_thread import get_key_per_thread
8
-
9
-
10
- class AnthropicModelConfig(LitellmModelConfig):
11
- set_cache_control: Literal["default_end"] | None = "default_end"
12
- """Set explicit cache control markers, for example for Anthropic models"""
13
-
14
-
15
- class AnthropicModel(LitellmModel):
16
- """This class is now only a thin wrapper around the LitellmModel class.
17
- It is largely kept for backwards compatibility.
18
- It will not be selected by `get_model` and `get_model_class` unless explicitly specified.
19
- """
20
-
21
- def __init__(self, *, config_class: type = AnthropicModelConfig, **kwargs):
22
- super().__init__(config_class=config_class, **kwargs)
23
-
24
- def query(self, messages: list[dict], **kwargs) -> dict:
25
- api_key = None
26
- # Legacy only
27
- if rotating_keys := os.getenv("ANTHROPIC_API_KEYS"):
28
- warnings.warn(
29
- "ANTHROPIC_API_KEYS is deprecated and will be removed in the future. "
30
- "Simply use the ANTHROPIC_API_KEY environment variable instead. "
31
- "Key rotation is no longer required."
32
- )
33
- api_key = get_key_per_thread(rotating_keys.split("::"))
34
- messages = set_cache_control(messages, mode="default_end")
35
- return super().query(messages, api_key=api_key, **kwargs)
@@ -1,82 +0,0 @@
1
- import logging
2
- from collections.abc import Callable
3
- from dataclasses import dataclass
4
-
5
- import litellm
6
- from tenacity import (
7
- before_sleep_log,
8
- retry,
9
- retry_if_not_exception_type,
10
- stop_after_attempt,
11
- wait_exponential,
12
- )
13
-
14
- from minisweagent.models.litellm_model import LitellmModel, LitellmModelConfig
15
- from minisweagent.models.utils.openai_utils import coerce_responses_text
16
-
17
- logger = logging.getLogger("litellm_response_api_model")
18
-
19
-
20
- @dataclass
21
- class LitellmResponseAPIModelConfig(LitellmModelConfig):
22
- pass
23
-
24
-
25
- class LitellmResponseAPIModel(LitellmModel):
26
- def __init__(self, *, config_class: Callable = LitellmResponseAPIModelConfig, **kwargs):
27
- super().__init__(config_class=config_class, **kwargs)
28
- self._previous_response_id: str | None = None
29
-
30
- @retry(
31
- reraise=True,
32
- stop=stop_after_attempt(10),
33
- wait=wait_exponential(multiplier=1, min=4, max=60),
34
- before_sleep=before_sleep_log(logger, logging.WARNING),
35
- retry=retry_if_not_exception_type(
36
- (
37
- litellm.exceptions.UnsupportedParamsError,
38
- litellm.exceptions.NotFoundError,
39
- litellm.exceptions.PermissionDeniedError,
40
- litellm.exceptions.ContextWindowExceededError,
41
- litellm.exceptions.APIError,
42
- litellm.exceptions.AuthenticationError,
43
- KeyboardInterrupt,
44
- )
45
- ),
46
- )
47
- def _query(self, messages: list[dict[str, str]], **kwargs):
48
- try:
49
- # Remove 'timestamp' field added by agent - not supported by OpenAI responses API
50
- clean_messages = [{"role": msg["role"], "content": msg["content"]} for msg in messages]
51
- resp = litellm.responses(
52
- model=self.config.model_name,
53
- input=clean_messages if self._previous_response_id is None else clean_messages[-1:],
54
- previous_response_id=self._previous_response_id,
55
- **(self.config.model_kwargs | kwargs),
56
- )
57
- self._previous_response_id = getattr(resp, "id", None)
58
- return resp
59
- except litellm.exceptions.AuthenticationError as e:
60
- e.message += " You can permanently set your API key with `mini-extra config set KEY VALUE`."
61
- raise e
62
-
63
- def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
64
- response = self._query(messages, **kwargs)
65
- text = coerce_responses_text(response)
66
- try:
67
- cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
68
- except Exception as e:
69
- logger.critical(
70
- f"Error calculating cost for model {self.config.model_name}: {e}. "
71
- "Please check the 'Updating the model registry' section in the documentation. "
72
- "http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
73
- )
74
- raise
75
- self.n_calls += 1
76
- self.cost += cost
77
- from minisweagent.models import GLOBAL_MODEL_STATS
78
-
79
- GLOBAL_MODEL_STATS.add(cost)
80
- return {
81
- "content": text,
82
- }
@@ -1,75 +0,0 @@
1
- import logging
2
- import os
3
- from dataclasses import dataclass
4
-
5
- import litellm
6
- from tenacity import (
7
- before_sleep_log,
8
- retry,
9
- retry_if_not_exception_type,
10
- stop_after_attempt,
11
- wait_exponential,
12
- )
13
-
14
- from minisweagent.models import GLOBAL_MODEL_STATS
15
- from minisweagent.models.portkey_model import PortkeyModel, PortkeyModelConfig
16
- from minisweagent.models.utils.cache_control import set_cache_control
17
- from minisweagent.models.utils.openai_utils import coerce_responses_text
18
-
19
- logger = logging.getLogger("portkey_response_api_model")
20
-
21
-
22
- @dataclass
23
- class PortkeyResponseAPIModelConfig(PortkeyModelConfig):
24
- pass
25
-
26
-
27
- class PortkeyResponseAPIModel(PortkeyModel):
28
- def __init__(self, *, config_class: type = PortkeyResponseAPIModelConfig, **kwargs):
29
- super().__init__(config_class=config_class, **kwargs)
30
- self._previous_response_id: str | None = None
31
-
32
- @retry(
33
- reraise=True,
34
- stop=stop_after_attempt(int(os.getenv("MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT", "10"))),
35
- wait=wait_exponential(multiplier=1, min=4, max=60),
36
- before_sleep=before_sleep_log(logger, logging.WARNING),
37
- retry=retry_if_not_exception_type((KeyboardInterrupt, TypeError, ValueError)),
38
- )
39
- def _query(self, messages: list[dict[str, str]], **kwargs):
40
- input_messages = messages if self._previous_response_id is None else messages[-1:]
41
- resp = self.client.responses.create(
42
- model=self.config.model_name,
43
- input=input_messages,
44
- previous_response_id=self._previous_response_id,
45
- **(self.config.model_kwargs | kwargs),
46
- )
47
- self._previous_response_id = getattr(resp, "id", None)
48
- return resp
49
-
50
- def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
51
- if self.config.set_cache_control:
52
- messages = set_cache_control(messages, mode=self.config.set_cache_control)
53
- response = self._query(messages, **kwargs)
54
- text = coerce_responses_text(response)
55
- try:
56
- cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
57
- assert cost > 0.0, f"Cost is not positive: {cost}"
58
- except Exception as e:
59
- if self.config.cost_tracking != "ignore_errors":
60
- raise RuntimeError(
61
- f"Error calculating cost for model {self.config.model_name}: {e}. "
62
- "You can ignore this issue from your config file with cost_tracking: 'ignore_errors' or "
63
- "globally with export MSWEA_COST_TRACKING='ignore_errors' to ignore this error. "
64
- ) from e
65
- cost = 0.0
66
- self.n_calls += 1
67
- self.cost += cost
68
- GLOBAL_MODEL_STATS.add(cost)
69
- return {
70
- "content": text,
71
- "extra": {
72
- "response": response.model_dump() if hasattr(response, "model_dump") else {},
73
- "cost": cost,
74
- },
75
- }
@@ -1,20 +0,0 @@
1
- """Utility for anthropic where we need different keys for different parallel
2
- agents to not mess up prompt caching.
3
- """
4
-
5
- import threading
6
- import warnings
7
- from typing import Any
8
-
9
- _THREADS_THAT_USED_API_KEYS: list[Any] = []
10
-
11
-
12
- def get_key_per_thread(api_keys: list[Any]) -> Any:
13
- """Choose key based on thread name. Returns None if no keys are available."""
14
- warnings.warn("get_key_per_thread is deprecated and will be removed in the future")
15
- thread_name = threading.current_thread().name
16
- if thread_name not in _THREADS_THAT_USED_API_KEYS:
17
- _THREADS_THAT_USED_API_KEYS.append(thread_name)
18
- thread_idx = _THREADS_THAT_USED_API_KEYS.index(thread_name)
19
- key_idx = thread_idx % len(api_keys)
20
- return api_keys[key_idx] or None
@@ -1,41 +0,0 @@
1
- import logging
2
- from typing import Any
3
-
4
- from openai.types.responses.response_output_message import ResponseOutputMessage
5
-
6
- logger = logging.getLogger("openai_utils")
7
-
8
-
9
- def coerce_responses_text(resp: Any) -> str:
10
- """Helper to normalize OpenAI Responses API result to text.
11
-
12
- Works with both OpenAI client responses and LiteLLM/Portkey responses.
13
- """
14
- text = getattr(resp, "output_text", None)
15
- if isinstance(text, str) and text:
16
- return text
17
-
18
- try:
19
- output = []
20
- for item in resp.output:
21
- if isinstance(item, dict):
22
- content = item.get("content", [])
23
- elif isinstance(item, ResponseOutputMessage):
24
- content = item.content
25
- else:
26
- continue
27
-
28
- for content_item in content:
29
- if isinstance(content_item, dict):
30
- text_val = content_item.get("text")
31
- elif hasattr(content_item, "text"):
32
- text_val = content_item.text
33
- else:
34
- continue
35
-
36
- if text_val:
37
- output.append(text_val)
38
- return "\n\n".join(output) or ""
39
- except (AttributeError, IndexError, TypeError):
40
- logger.warning(f"Could not extract text from response: {resp}")
41
- return ""
@@ -1,79 +0,0 @@
1
- """Run on a single SWE-Bench instance."""
2
-
3
- import traceback
4
- from pathlib import Path
5
-
6
- import typer
7
- import yaml
8
- from datasets import load_dataset
9
-
10
- from minisweagent import global_config_dir
11
- from minisweagent.agents.interactive import InteractiveAgent
12
- from minisweagent.config import builtin_config_dir, get_config_path
13
- from minisweagent.models import get_model
14
- from minisweagent.run.extra.swebench import (
15
- DATASET_MAPPING,
16
- get_sb_environment,
17
- )
18
- from minisweagent.run.utils.save import save_traj
19
- from minisweagent.utils.log import logger
20
-
21
- app = typer.Typer(add_completion=False)
22
-
23
- DEFAULT_OUTPUT = global_config_dir / "last_swebench_single_run.traj.json"
24
-
25
-
26
- # fmt: off
27
- @app.command()
28
- def main(
29
- subset: str = typer.Option("lite", "--subset", help="SWEBench subset to use or path to a dataset", rich_help_panel="Data selection"),
30
- split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
31
- instance_spec: str = typer.Option(0, "-i", "--instance", help="SWE-Bench instance ID or index", rich_help_panel="Data selection"),
32
- model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
33
- model_class: str | None = typer.Option(None, "-c", "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
34
- config_path: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
35
- environment_class: str | None = typer.Option(None, "--environment-class", rich_help_panel="Advanced"),
36
- exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Basic"),
37
- output: Path = typer.Option(DEFAULT_OUTPUT, "-o", "--output", help="Output trajectory file", rich_help_panel="Basic"),
38
- ) -> None:
39
- # fmt: on
40
- """Run on a single SWE-Bench instance."""
41
- dataset_path = DATASET_MAPPING.get(subset, subset)
42
- logger.info(f"Loading dataset from {dataset_path}, split {split}...")
43
- instances = {
44
- inst["instance_id"]: inst # type: ignore
45
- for inst in load_dataset(dataset_path, split=split)
46
- }
47
- if instance_spec.isnumeric():
48
- instance_spec = sorted(instances.keys())[int(instance_spec)]
49
- instance: dict = instances[instance_spec] # type: ignore
50
-
51
- config_path = get_config_path(config_path)
52
- logger.info(f"Loading agent config from '{config_path}'")
53
- config = yaml.safe_load(config_path.read_text())
54
- if environment_class is not None:
55
- config.setdefault("environment", {})["environment_class"] = environment_class
56
- if model_class is not None:
57
- config.setdefault("model", {})["model_class"] = model_class
58
- if exit_immediately:
59
- config.setdefault("agent", {})["confirm_exit"] = False
60
- env = get_sb_environment(config, instance)
61
- agent = InteractiveAgent(
62
- get_model(model_name, config.get("model", {})),
63
- env,
64
- **({"mode": "yolo"} | config.get("agent", {})),
65
- )
66
-
67
- exit_status, result, extra_info = None, None, None
68
- try:
69
- exit_status, result = agent.run(instance["problem_statement"]) # type: ignore[arg-type]
70
- except Exception as e:
71
- logger.error(f"Error processing instance {instance_spec}: {e}", exc_info=True)
72
- exit_status, result = type(e).__name__, str(e)
73
- extra_info = {"traceback": traceback.format_exc()}
74
- finally:
75
- save_traj(agent, output, exit_status=exit_status, result=result, extra_info=extra_info) # type: ignore[arg-type]
76
-
77
-
78
- if __name__ == "__main__":
79
- app()