mini-swe-agent 1.9.0__tar.gz → 1.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mini_swe_agent-1.9.0/src/mini_swe_agent.egg-info → mini_swe_agent-1.10.0}/PKG-INFO +3 -2
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/README.md +1 -1
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/pyproject.toml +1 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0/src/mini_swe_agent.egg-info}/PKG-INFO +3 -2
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/SOURCES.txt +4 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/requires.txt +1 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/__init__.py +14 -2
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/agents/interactive.py +2 -2
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/agents/interactive_textual.py +8 -4
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/extra/swebench.yaml +1 -1
- mini_swe_agent-1.10.0/src/minisweagent/config/extra/swebench_roulette.yaml +233 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/__init__.py +1 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/docker.py +7 -7
- mini_swe_agent-1.10.0/src/minisweagent/environments/extra/bubblewrap.py +112 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/singularity.py +31 -13
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/__init__.py +36 -10
- mini_swe_agent-1.10.0/src/minisweagent/models/extra/roulette.py +62 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/litellm_model.py +9 -1
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/extra/swebench.py +6 -3
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/extra/swebench_single.py +10 -2
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/github_issue.py +1 -1
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/mini.py +14 -17
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/utils/save.py +22 -0
- mini_swe_agent-1.10.0/src/minisweagent/utils/__init__.py +0 -0
- mini_swe_agent-1.10.0/src/minisweagent/utils/log.py +36 -0
- mini_swe_agent-1.9.0/src/minisweagent/utils/log.py +0 -32
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/LICENSE.md +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/setup.cfg +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/dependency_links.txt +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/entry_points.txt +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/top_level.txt +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/__main__.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/agents/__init__.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/agents/default.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/README.md +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/__init__.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/default.yaml +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/extra/__init__.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/github_issue.yaml +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/mini.tcss +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/mini.yaml +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/mini_no_temp.yaml +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/extra/__init__.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/extra/swerex_docker.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/local.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/anthropic.py +0 -0
- {mini_swe_agent-1.9.0/src/minisweagent/models/utils → mini_swe_agent-1.10.0/src/minisweagent/models/extra}/__init__.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/test_models.py +0 -0
- {mini_swe_agent-1.9.0/src/minisweagent/run/extra → mini_swe_agent-1.10.0/src/minisweagent/models/utils}/__init__.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/utils/cache_control.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/utils/key_per_thread.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/py.typed +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/__init__.py +0 -0
- {mini_swe_agent-1.9.0/src/minisweagent/run/extra/utils → mini_swe_agent-1.10.0/src/minisweagent/run/extra}/__init__.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/extra/config.py +0 -0
- {mini_swe_agent-1.9.0/src/minisweagent/run → mini_swe_agent-1.10.0/src/minisweagent/run/extra}/utils/__init__.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/extra/utils/batch_progress.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/hello_world.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/inspector.py +0 -0
- {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/mini_extra.py +0 -0
- {mini_swe_agent-1.9.0/src/minisweagent → mini_swe_agent-1.10.0/src/minisweagent/run}/utils/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mini-swe-agent
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.10.0
|
|
4
4
|
Summary: Nano SWE Agent - A simple AI software engineering agent
|
|
5
5
|
Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
|
|
6
6
|
License: MIT License
|
|
@@ -48,6 +48,7 @@ Requires-Dist: typer
|
|
|
48
48
|
Requires-Dist: platformdirs
|
|
49
49
|
Requires-Dist: textual
|
|
50
50
|
Requires-Dist: prompt_toolkit
|
|
51
|
+
Requires-Dist: openai!=1.100.0,!=1.100.1
|
|
51
52
|
Provides-Extra: full
|
|
52
53
|
Requires-Dist: mini-swe-agent[dev]; extra == "full"
|
|
53
54
|
Requires-Dist: swe-rex>=1.4.0; extra == "full"
|
|
@@ -72,7 +73,7 @@ Dynamic: license-file
|
|
|
72
73
|
|
|
73
74
|
# The 100 line AI agent that solves GitHub issues & more
|
|
74
75
|
|
|
75
|
-
📣 [
|
|
76
|
+
📣 [New blogpost: Randomly switching between GPT-5 and Sonnet 4 boosts performance](https://www.swebench.com/SWE-bench/blog/2025/08/19/mini-roulette/)
|
|
76
77
|
|
|
77
78
|
[](https://mini-swe-agent.com/latest/)
|
|
78
79
|
[](https://join.slack.com/t/swe-bench/shared_invite/zt-36pj9bu5s-o3_yXPZbaH2wVnxnss1EkQ)
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
# The 100 line AI agent that solves GitHub issues & more
|
|
6
6
|
|
|
7
|
-
📣 [
|
|
7
|
+
📣 [New blogpost: Randomly switching between GPT-5 and Sonnet 4 boosts performance](https://www.swebench.com/SWE-bench/blog/2025/08/19/mini-roulette/)
|
|
8
8
|
|
|
9
9
|
[](https://mini-swe-agent.com/latest/)
|
|
10
10
|
[](https://join.slack.com/t/swe-bench/shared_invite/zt-36pj9bu5s-o3_yXPZbaH2wVnxnss1EkQ)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mini-swe-agent
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.10.0
|
|
4
4
|
Summary: Nano SWE Agent - A simple AI software engineering agent
|
|
5
5
|
Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
|
|
6
6
|
License: MIT License
|
|
@@ -48,6 +48,7 @@ Requires-Dist: typer
|
|
|
48
48
|
Requires-Dist: platformdirs
|
|
49
49
|
Requires-Dist: textual
|
|
50
50
|
Requires-Dist: prompt_toolkit
|
|
51
|
+
Requires-Dist: openai!=1.100.0,!=1.100.1
|
|
51
52
|
Provides-Extra: full
|
|
52
53
|
Requires-Dist: mini-swe-agent[dev]; extra == "full"
|
|
53
54
|
Requires-Dist: swe-rex>=1.4.0; extra == "full"
|
|
@@ -72,7 +73,7 @@ Dynamic: license-file
|
|
|
72
73
|
|
|
73
74
|
# The 100 line AI agent that solves GitHub issues & more
|
|
74
75
|
|
|
75
|
-
📣 [
|
|
76
|
+
📣 [New blogpost: Randomly switching between GPT-5 and Sonnet 4 boosts performance](https://www.swebench.com/SWE-bench/blog/2025/08/19/mini-roulette/)
|
|
76
77
|
|
|
77
78
|
[](https://mini-swe-agent.com/latest/)
|
|
78
79
|
[](https://join.slack.com/t/swe-bench/shared_invite/zt-36pj9bu5s-o3_yXPZbaH2wVnxnss1EkQ)
|
|
@@ -23,16 +23,20 @@ src/minisweagent/config/mini.yaml
|
|
|
23
23
|
src/minisweagent/config/mini_no_temp.yaml
|
|
24
24
|
src/minisweagent/config/extra/__init__.py
|
|
25
25
|
src/minisweagent/config/extra/swebench.yaml
|
|
26
|
+
src/minisweagent/config/extra/swebench_roulette.yaml
|
|
26
27
|
src/minisweagent/environments/__init__.py
|
|
27
28
|
src/minisweagent/environments/docker.py
|
|
28
29
|
src/minisweagent/environments/local.py
|
|
29
30
|
src/minisweagent/environments/singularity.py
|
|
30
31
|
src/minisweagent/environments/extra/__init__.py
|
|
32
|
+
src/minisweagent/environments/extra/bubblewrap.py
|
|
31
33
|
src/minisweagent/environments/extra/swerex_docker.py
|
|
32
34
|
src/minisweagent/models/__init__.py
|
|
33
35
|
src/minisweagent/models/anthropic.py
|
|
34
36
|
src/minisweagent/models/litellm_model.py
|
|
35
37
|
src/minisweagent/models/test_models.py
|
|
38
|
+
src/minisweagent/models/extra/__init__.py
|
|
39
|
+
src/minisweagent/models/extra/roulette.py
|
|
36
40
|
src/minisweagent/models/utils/__init__.py
|
|
37
41
|
src/minisweagent/models/utils/cache_control.py
|
|
38
42
|
src/minisweagent/models/utils/key_per_thread.py
|
|
@@ -8,7 +8,7 @@ This file provides:
|
|
|
8
8
|
unless you want the static type checking.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
__version__ = "1.
|
|
11
|
+
__version__ = "1.10.0"
|
|
12
12
|
|
|
13
13
|
import os
|
|
14
14
|
from pathlib import Path
|
|
@@ -18,6 +18,8 @@ import dotenv
|
|
|
18
18
|
from platformdirs import user_config_dir
|
|
19
19
|
from rich.console import Console
|
|
20
20
|
|
|
21
|
+
from minisweagent.utils.log import logger
|
|
22
|
+
|
|
21
23
|
package_dir = Path(__file__).resolve().parent
|
|
22
24
|
|
|
23
25
|
global_config_dir = Path(os.getenv("MSWEA_GLOBAL_CONFIG_DIR") or user_config_dir("mini-swe-agent"))
|
|
@@ -64,8 +66,18 @@ class Agent(Protocol):
|
|
|
64
66
|
model: Model
|
|
65
67
|
env: Environment
|
|
66
68
|
messages: list[dict[str, str]]
|
|
69
|
+
config: Any
|
|
67
70
|
|
|
68
71
|
def run(self, task: str, **kwargs) -> tuple[str, str]: ...
|
|
69
72
|
|
|
70
73
|
|
|
71
|
-
__all__ = [
|
|
74
|
+
__all__ = [
|
|
75
|
+
"Agent",
|
|
76
|
+
"Model",
|
|
77
|
+
"Environment",
|
|
78
|
+
"package_dir",
|
|
79
|
+
"__version__",
|
|
80
|
+
"global_config_file",
|
|
81
|
+
"global_config_dir",
|
|
82
|
+
"logger",
|
|
83
|
+
]
|
|
@@ -39,9 +39,9 @@ class InteractiveAgent(DefaultAgent):
|
|
|
39
39
|
super().__init__(*args, config_class=config_class, **kwargs)
|
|
40
40
|
self.cost_last_confirmed = 0.0
|
|
41
41
|
|
|
42
|
-
def add_message(self, role: str, content: str):
|
|
42
|
+
def add_message(self, role: str, content: str, **kwargs):
|
|
43
43
|
# Extend supermethod to print messages
|
|
44
|
-
super().add_message(role, content)
|
|
44
|
+
super().add_message(role, content, **kwargs)
|
|
45
45
|
if role == "assistant":
|
|
46
46
|
console.print(
|
|
47
47
|
f"\n[red][bold]mini-swe-agent[/bold] (step [bold]{self.model.n_calls}[/bold], [bold]${self.model.cost:.2f}[/bold]):[/red]\n",
|
{mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/agents/interactive_textual.py
RENAMED
|
@@ -44,8 +44,8 @@ class _TextualAgent(DefaultAgent):
|
|
|
44
44
|
super().__init__(*args, config_class=TextualAgentConfig, **kwargs)
|
|
45
45
|
self._current_action_from_human = False
|
|
46
46
|
|
|
47
|
-
def add_message(self, role: str, content: str):
|
|
48
|
-
super().add_message(role, content)
|
|
47
|
+
def add_message(self, role: str, content: str, **kwargs):
|
|
48
|
+
super().add_message(role, content, **kwargs)
|
|
49
49
|
if self.app.agent_state != "UNINITIALIZED":
|
|
50
50
|
self.app.call_from_thread(self.app.on_message_added)
|
|
51
51
|
|
|
@@ -276,13 +276,17 @@ class TextualAgent(App):
|
|
|
276
276
|
|
|
277
277
|
self._vscroll = VerticalScroll()
|
|
278
278
|
|
|
279
|
-
def run(self, task: str) -> tuple[str, str]:
|
|
280
|
-
threading.Thread(target=lambda: self.agent.run(task), daemon=True).start()
|
|
279
|
+
def run(self, task: str, **kwargs) -> tuple[str, str]:
|
|
280
|
+
threading.Thread(target=lambda: self.agent.run(task, **kwargs), daemon=True).start()
|
|
281
281
|
super().run()
|
|
282
282
|
return self.exit_status, self.result
|
|
283
283
|
|
|
284
284
|
# --- Basics ---
|
|
285
285
|
|
|
286
|
+
@property
|
|
287
|
+
def config(self):
|
|
288
|
+
return self.agent.config
|
|
289
|
+
|
|
286
290
|
@property
|
|
287
291
|
def i_step(self) -> int:
|
|
288
292
|
"""Current step index."""
|
|
@@ -36,7 +36,7 @@ agent:
|
|
|
36
36
|
2. Provide exactly ONE bash command to execute
|
|
37
37
|
|
|
38
38
|
## Important Boundaries
|
|
39
|
-
- MODIFY: Regular source code files in
|
|
39
|
+
- MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
|
|
40
40
|
- DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
|
|
41
41
|
|
|
42
42
|
## Recommended Workflow
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
agent:
|
|
2
|
+
system_template: |
|
|
3
|
+
You are a helpful assistant that can interact multiple times with a computer shell to solve programming tasks.
|
|
4
|
+
Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||).
|
|
5
|
+
|
|
6
|
+
Include a THOUGHT section before your command where you explain your reasoning process.
|
|
7
|
+
Format your response as shown in <format_example>.
|
|
8
|
+
|
|
9
|
+
<format_example>
|
|
10
|
+
THOUGHT: Your reasoning and analysis here
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
your_command_here
|
|
14
|
+
```
|
|
15
|
+
</format_example>
|
|
16
|
+
|
|
17
|
+
Failure to follow these rules will cause your response to be rejected.
|
|
18
|
+
instance_template: |
|
|
19
|
+
<pr_description>
|
|
20
|
+
Consider the following PR description:
|
|
21
|
+
{{task}}
|
|
22
|
+
</pr_description>
|
|
23
|
+
|
|
24
|
+
<instructions>
|
|
25
|
+
# Task Instructions
|
|
26
|
+
|
|
27
|
+
## Overview
|
|
28
|
+
You're a software engineer interacting continuously with a computer by submitting commands.
|
|
29
|
+
You'll be helping implement necessary changes to meet requirements in the PR description.
|
|
30
|
+
Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
|
|
31
|
+
|
|
32
|
+
IMPORTANT: This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.
|
|
33
|
+
|
|
34
|
+
For each response:
|
|
35
|
+
1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
|
|
36
|
+
2. Provide exactly ONE bash command to execute
|
|
37
|
+
|
|
38
|
+
## Important Boundaries
|
|
39
|
+
- MODIFY: Regular source code files in {{working_dir}}
|
|
40
|
+
- DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
|
|
41
|
+
|
|
42
|
+
## Recommended Workflow
|
|
43
|
+
1. Analyze the codebase by finding and reading relevant files
|
|
44
|
+
2. Create a script to reproduce the issue
|
|
45
|
+
3. Edit the source code to resolve the issue
|
|
46
|
+
4. Verify your fix works by running your script again
|
|
47
|
+
5. Test edge cases to ensure your fix is robust
|
|
48
|
+
|
|
49
|
+
## Command Execution Rules
|
|
50
|
+
You are operating in an environment where
|
|
51
|
+
1. You write a single command
|
|
52
|
+
2. The system executes that command in a subshell
|
|
53
|
+
3. You see the result
|
|
54
|
+
4. You write your next command
|
|
55
|
+
|
|
56
|
+
Each response should include:
|
|
57
|
+
1. A **THOUGHT** section where you explain your reasoning and plan
|
|
58
|
+
2. A single bash code block with your command
|
|
59
|
+
|
|
60
|
+
Format your responses like this:
|
|
61
|
+
|
|
62
|
+
<format_example>
|
|
63
|
+
THOUGHT: Here I explain my reasoning process, analysis of the current situation,
|
|
64
|
+
and what I'm trying to accomplish with the command below.
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
your_command_here
|
|
68
|
+
```
|
|
69
|
+
</format_example>
|
|
70
|
+
|
|
71
|
+
Commands must be specified in a single bash code block:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
your_command_here
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**CRITICAL REQUIREMENTS:**
|
|
78
|
+
- Your response SHOULD include a THOUGHT section explaining your reasoning
|
|
79
|
+
- Your response MUST include EXACTLY ONE bash code block
|
|
80
|
+
- This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
|
|
81
|
+
- If you include zero or multiple bash blocks, or no command at all, YOUR RESPONSE WILL FAIL
|
|
82
|
+
- Do NOT try to run multiple independent commands in separate blocks in one response
|
|
83
|
+
- Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
|
|
84
|
+
- However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
|
|
85
|
+
|
|
86
|
+
Example of a CORRECT response:
|
|
87
|
+
<example_response>
|
|
88
|
+
THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
ls -la
|
|
92
|
+
```
|
|
93
|
+
</example_response>
|
|
94
|
+
|
|
95
|
+
Example of an INCORRECT response:
|
|
96
|
+
<example_response>
|
|
97
|
+
THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
ls -la
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
Now I'll read the file:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
cat file.txt
|
|
107
|
+
```
|
|
108
|
+
</example_response>
|
|
109
|
+
|
|
110
|
+
If you need to run multiple commands, either:
|
|
111
|
+
1. Combine them in one block using && or ||
|
|
112
|
+
```bash
|
|
113
|
+
command1 && command2 || echo "Error occurred"
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
2. Wait for the first command to complete, see its output, then issue the next command in your following response.
|
|
117
|
+
|
|
118
|
+
## Environment Details
|
|
119
|
+
- You have a full Linux shell environment
|
|
120
|
+
- Always use non-interactive flags (-y, -f) for commands
|
|
121
|
+
- Avoid interactive tools like vi, nano, or any that require user input
|
|
122
|
+
- If a command isn't available, you can install it
|
|
123
|
+
|
|
124
|
+
## Useful Command Examples
|
|
125
|
+
|
|
126
|
+
### Create a new file:
|
|
127
|
+
```bash
|
|
128
|
+
cat <<'EOF' > newfile.py
|
|
129
|
+
import numpy as np
|
|
130
|
+
hello = "world"
|
|
131
|
+
print(hello)
|
|
132
|
+
EOF
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Edit files with sed:
|
|
136
|
+
```bash
|
|
137
|
+
# Replace all occurrences
|
|
138
|
+
sed -i 's/old_string/new_string/g' filename.py
|
|
139
|
+
|
|
140
|
+
# Replace only first occurrence
|
|
141
|
+
sed -i 's/old_string/new_string/' filename.py
|
|
142
|
+
|
|
143
|
+
# Replace first occurrence on line 1
|
|
144
|
+
sed -i '1s/old_string/new_string/' filename.py
|
|
145
|
+
|
|
146
|
+
# Replace all occurrences in lines 1-10
|
|
147
|
+
sed -i '1,10s/old_string/new_string/g' filename.py
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### View file content:
|
|
151
|
+
```bash
|
|
152
|
+
# View specific lines with numbers
|
|
153
|
+
nl -ba filename.py | sed -n '10,20p'
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Any other command you want to run
|
|
157
|
+
```bash
|
|
158
|
+
anything
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
## Submission
|
|
162
|
+
When you've completed your work (reading, editing, testing), and cannot make further progress
|
|
163
|
+
issue exactly the following command:
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
This command will submit your work.
|
|
170
|
+
You cannot continue working (reading, editing, testing) in any way on this task after submitting.
|
|
171
|
+
</instructions>
|
|
172
|
+
action_observation_template: |
|
|
173
|
+
<returncode>{{output.returncode}}</returncode>
|
|
174
|
+
{% if output.output | length < 10000 -%}
|
|
175
|
+
<output>
|
|
176
|
+
{{ output.output -}}
|
|
177
|
+
</output>
|
|
178
|
+
{%- else -%}
|
|
179
|
+
<warning>
|
|
180
|
+
The output of your last command was too long.
|
|
181
|
+
Please try a different command that produces less output.
|
|
182
|
+
If you're looking at a file you can try use head, tail or sed to view a smaller number of lines selectively.
|
|
183
|
+
If you're using grep or find and it produced too much output, you can use a more selective search pattern.
|
|
184
|
+
If you really need to see something from the full command's output, you can redirect output to a file and then search in that file.
|
|
185
|
+
</warning>
|
|
186
|
+
{%- set elided_chars = output.output | length - 10000 -%}
|
|
187
|
+
<output_head>
|
|
188
|
+
{{ output.output[:5000] }}
|
|
189
|
+
</output_head>
|
|
190
|
+
<elided_chars>
|
|
191
|
+
{{ elided_chars }} characters elided
|
|
192
|
+
</elided_chars>
|
|
193
|
+
<output_tail>
|
|
194
|
+
{{ output.output[-5000:] }}
|
|
195
|
+
</output_tail>
|
|
196
|
+
{%- endif -%}
|
|
197
|
+
format_error_template: |
|
|
198
|
+
Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
|
|
199
|
+
|
|
200
|
+
Please format your action in triple backticks as shown in <response_example>.
|
|
201
|
+
|
|
202
|
+
<response_example>
|
|
203
|
+
Here are some thoughts about why you want to perform the action.
|
|
204
|
+
|
|
205
|
+
```bash
|
|
206
|
+
<action>
|
|
207
|
+
```
|
|
208
|
+
</response_example>
|
|
209
|
+
|
|
210
|
+
If you have completed your assignment, please consult the first message about how to
|
|
211
|
+
submit your solution (you will not be able to continue working on this task after that).
|
|
212
|
+
step_limit: 250
|
|
213
|
+
cost_limit: 3.
|
|
214
|
+
|
|
215
|
+
environment:
|
|
216
|
+
cwd: "/testbed"
|
|
217
|
+
timeout: 60
|
|
218
|
+
env:
|
|
219
|
+
PAGER: cat
|
|
220
|
+
MANPAGER: cat
|
|
221
|
+
LESS: -R
|
|
222
|
+
PIP_PROGRESS_BAR: 'off'
|
|
223
|
+
TQDM_DISABLE: '1'
|
|
224
|
+
environment_class: docker
|
|
225
|
+
|
|
226
|
+
model:
|
|
227
|
+
model_name: "roulette"
|
|
228
|
+
model_class: "minisweagent.models.extra.roulette.RouletteModel"
|
|
229
|
+
model_kwargs:
|
|
230
|
+
- model_name: "claude-sonnet-4-20250514"
|
|
231
|
+
model_kwargs:
|
|
232
|
+
temperature: 0.
|
|
233
|
+
- model_name: "gpt-5"
|
|
@@ -10,6 +10,7 @@ _ENVIRONMENT_MAPPING = {
|
|
|
10
10
|
"singularity": "minisweagent.environments.singularity.SingularityEnvironment",
|
|
11
11
|
"local": "minisweagent.environments.local.LocalEnvironment",
|
|
12
12
|
"swerex_docker": "minisweagent.environments.extra.swerex_docker.SwerexDockerEnvironment",
|
|
13
|
+
"bubblewrap": "minisweagent.environments.extra.bubblewrap.BubblewrapEnvironment",
|
|
13
14
|
}
|
|
14
15
|
|
|
15
16
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
import shlex
|
|
3
4
|
import subprocess
|
|
@@ -5,8 +6,6 @@ import uuid
|
|
|
5
6
|
from dataclasses import asdict, dataclass, field
|
|
6
7
|
from typing import Any
|
|
7
8
|
|
|
8
|
-
from minisweagent.utils.log import get_logger
|
|
9
|
-
|
|
10
9
|
|
|
11
10
|
@dataclass
|
|
12
11
|
class DockerEnvironmentConfig:
|
|
@@ -24,18 +23,20 @@ class DockerEnvironmentConfig:
|
|
|
24
23
|
"""Timeout for executing commands in the container."""
|
|
25
24
|
executable: str = os.getenv("MSWEA_DOCKER_EXECUTABLE", "docker")
|
|
26
25
|
"""Path to the docker/container executable."""
|
|
27
|
-
run_args: list[str] = field(default_factory=
|
|
28
|
-
"""Additional arguments to pass to the docker/container executable.
|
|
26
|
+
run_args: list[str] = field(default_factory=lambda: ["--rm"])
|
|
27
|
+
"""Additional arguments to pass to the docker/container executable.
|
|
28
|
+
Default is ["--rm"], which removes the container after it exits.
|
|
29
|
+
"""
|
|
29
30
|
container_timeout: str = "2h"
|
|
30
31
|
"""Max duration to keep container running. Uses the same format as the sleep command."""
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
class DockerEnvironment:
|
|
34
|
-
def __init__(self, *, config_class: type = DockerEnvironmentConfig, **kwargs):
|
|
35
|
+
def __init__(self, *, config_class: type = DockerEnvironmentConfig, logger: logging.Logger | None = None, **kwargs):
|
|
35
36
|
"""This class executes bash commands in a Docker container using direct docker commands.
|
|
36
37
|
See `DockerEnvironmentConfig` for keyword arguments.
|
|
37
38
|
"""
|
|
38
|
-
self.logger =
|
|
39
|
+
self.logger = logger or logging.getLogger("minisweagent.environment")
|
|
39
40
|
self.container_id: str | None = None
|
|
40
41
|
self.config = config_class(**kwargs)
|
|
41
42
|
self._start_container()
|
|
@@ -97,7 +98,6 @@ class DockerEnvironment:
|
|
|
97
98
|
def cleanup(self):
|
|
98
99
|
"""Stop and remove the Docker container."""
|
|
99
100
|
if getattr(self, "container_id", None) is not None: # if init fails early, container_id might not be set
|
|
100
|
-
self.logger.info(f"Stopping container {self.container_id}")
|
|
101
101
|
cmd = f"(timeout 60 {self.config.executable} stop {self.container_id} || {self.config.executable} rm -f {self.container_id}) >/dev/null 2>&1 &"
|
|
102
102
|
subprocess.Popen(cmd, shell=True)
|
|
103
103
|
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
[Bubblewrap](https://github.com/containers/bubblewrap) is a low-level, unprivileged sandboxing tool for Linux that enables running applications
|
|
3
|
+
in isolated environments with restricted access to the operating system and user data.
|
|
4
|
+
This environment uses bubblewrap to execute commands in a sandboxed environment.
|
|
5
|
+
|
|
6
|
+
!!! warning
|
|
7
|
+
This environment is experimental.
|
|
8
|
+
|
|
9
|
+
!!! warning
|
|
10
|
+
This environment is not supported on Windows.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import os
|
|
15
|
+
import platform
|
|
16
|
+
import shutil
|
|
17
|
+
import subprocess
|
|
18
|
+
import tempfile
|
|
19
|
+
import uuid
|
|
20
|
+
from dataclasses import asdict, dataclass, field
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class BubblewrapEnvironmentConfig:
|
|
27
|
+
cwd: str = ""
|
|
28
|
+
"""Working directory for the sandbox."""
|
|
29
|
+
env: dict[str, str] = field(default_factory=dict)
|
|
30
|
+
"""Dictionary of environment variables to set in the sandbox."""
|
|
31
|
+
timeout: int = 30
|
|
32
|
+
"""Timeout for the command in seconds."""
|
|
33
|
+
executable: str = os.getenv("MSWEA_BUBBLEWRAP_EXECUTABLE", "bwrap")
|
|
34
|
+
"""Path to the bubblewrap executable."""
|
|
35
|
+
wrapper_args: list[str] = field(
|
|
36
|
+
default_factory=lambda: [
|
|
37
|
+
"--unshare-user-try",
|
|
38
|
+
"--ro-bind",
|
|
39
|
+
"/usr",
|
|
40
|
+
"/usr",
|
|
41
|
+
"--ro-bind",
|
|
42
|
+
"/bin",
|
|
43
|
+
"/bin",
|
|
44
|
+
"--ro-bind",
|
|
45
|
+
"/lib",
|
|
46
|
+
"/lib",
|
|
47
|
+
"--ro-bind",
|
|
48
|
+
"/lib64",
|
|
49
|
+
"/lib64",
|
|
50
|
+
"--ro-bind",
|
|
51
|
+
"/etc",
|
|
52
|
+
"/etc",
|
|
53
|
+
"--tmpfs",
|
|
54
|
+
"/tmp",
|
|
55
|
+
"--proc",
|
|
56
|
+
"/proc",
|
|
57
|
+
"--dev",
|
|
58
|
+
"/dev",
|
|
59
|
+
"--new-session",
|
|
60
|
+
"--setenv",
|
|
61
|
+
"PATH",
|
|
62
|
+
"/usr/local/bin:/usr/sbin:/usr/bin:/bin",
|
|
63
|
+
]
|
|
64
|
+
)
|
|
65
|
+
"""Arguments to pass to the bubblewrap executable."""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class BubblewrapEnvironment:
|
|
69
|
+
def __init__(
|
|
70
|
+
self, *, config_class: type = BubblewrapEnvironmentConfig, logger: logging.Logger | None = None, **kwargs
|
|
71
|
+
):
|
|
72
|
+
"""This class executes bash commands in a bubblewrap environment and a separate working
|
|
73
|
+
directory for each environment. See `BubblewrapEnvironmentConfig` for kwargs.
|
|
74
|
+
"""
|
|
75
|
+
self.logger = logger or logging.getLogger("minisweagent.environment")
|
|
76
|
+
self.config = config_class(**kwargs)
|
|
77
|
+
self.working_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
|
|
78
|
+
self.working_dir.mkdir(parents=True)
|
|
79
|
+
|
|
80
|
+
def execute(self, command: str, cwd: str = "") -> dict[str, Any]:
|
|
81
|
+
"""Execute a command in the bubblewrap environment and return the result as a dict."""
|
|
82
|
+
cwd = cwd or self.config.cwd or str(self.working_dir)
|
|
83
|
+
|
|
84
|
+
cmd = [self.config.executable] + self.config.wrapper_args + ["--bind", cwd, cwd, "--chdir", cwd]
|
|
85
|
+
|
|
86
|
+
# Add environment variables
|
|
87
|
+
for key, value in self.config.env.items():
|
|
88
|
+
cmd.extend(["--setenv", key, value])
|
|
89
|
+
|
|
90
|
+
cmd.extend(["bash", "-c", command])
|
|
91
|
+
|
|
92
|
+
result = subprocess.run(
|
|
93
|
+
cmd,
|
|
94
|
+
text=True,
|
|
95
|
+
timeout=self.config.timeout,
|
|
96
|
+
encoding="utf-8",
|
|
97
|
+
errors="replace",
|
|
98
|
+
stdout=subprocess.PIPE,
|
|
99
|
+
stderr=subprocess.STDOUT,
|
|
100
|
+
)
|
|
101
|
+
return {"output": result.stdout, "returncode": result.returncode}
|
|
102
|
+
|
|
103
|
+
def cleanup(self):
|
|
104
|
+
if self.working_dir.exists():
|
|
105
|
+
shutil.rmtree(self.working_dir)
|
|
106
|
+
|
|
107
|
+
def __del__(self):
|
|
108
|
+
"""Cleanup working_dir when object is destroyed."""
|
|
109
|
+
self.cleanup()
|
|
110
|
+
|
|
111
|
+
def get_template_vars(self) -> dict[str, Any]:
|
|
112
|
+
return asdict(self.config) | platform.uname()._asdict()
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
import os
|
|
4
5
|
import shutil
|
|
5
6
|
import subprocess
|
|
@@ -9,8 +10,6 @@ from dataclasses import asdict, dataclass, field
|
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import Any
|
|
11
12
|
|
|
12
|
-
from minisweagent.utils.log import get_logger
|
|
13
|
-
|
|
14
13
|
|
|
15
14
|
@dataclass
|
|
16
15
|
class SingularityEnvironmentConfig:
|
|
@@ -24,18 +23,39 @@ class SingularityEnvironmentConfig:
|
|
|
24
23
|
"""Timeout for executing commands in the container."""
|
|
25
24
|
executable: str = os.getenv("MSWEA_SINGULARITY_EXECUTABLE", "singularity")
|
|
26
25
|
"""Path to the singularity executable."""
|
|
26
|
+
sandbox_build_retries: int = 3
|
|
27
|
+
"""Number of retries for building the sandbox if an error occurs."""
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
class SingularityEnvironment:
|
|
30
|
-
def __init__(
|
|
31
|
+
def __init__(
|
|
32
|
+
self, *, config_class: type = SingularityEnvironmentConfig, logger: logging.Logger | None = None, **kwargs
|
|
33
|
+
):
|
|
31
34
|
"""Singularity environment. See `SingularityEnvironmentConfig` for kwargs."""
|
|
32
|
-
self.logger =
|
|
33
|
-
self.config =
|
|
34
|
-
self.sandbox_dir =
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
self.logger = logger or logging.getLogger("minisweagent.environment")
|
|
36
|
+
self.config = config_class(**kwargs)
|
|
37
|
+
self.sandbox_dir = self._build_sandbox()
|
|
38
|
+
|
|
39
|
+
def _build_sandbox(self) -> Path:
|
|
40
|
+
# Building the sandbox can fail (very rarely), so we retry it
|
|
41
|
+
max_retries = self.config.sandbox_build_retries
|
|
42
|
+
for attempt in range(max_retries):
|
|
43
|
+
sandbox_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
|
|
44
|
+
try:
|
|
45
|
+
subprocess.run(
|
|
46
|
+
[self.config.executable, "build", "--sandbox", sandbox_dir, self.config.image],
|
|
47
|
+
check=True,
|
|
48
|
+
capture_output=True,
|
|
49
|
+
)
|
|
50
|
+
break
|
|
51
|
+
except subprocess.CalledProcessError as e:
|
|
52
|
+
shutil.rmtree(sandbox_dir, ignore_errors=True)
|
|
53
|
+
self.logger.error(
|
|
54
|
+
f"Error building image {self.config.image}, stdout: {e.stdout}, stderr: {e.stderr} (attempt {attempt + 1}/{max_retries})"
|
|
55
|
+
)
|
|
56
|
+
if attempt == max_retries - 1:
|
|
57
|
+
raise
|
|
58
|
+
return sandbox_dir
|
|
39
59
|
|
|
40
60
|
def get_template_vars(self) -> dict[str, Any]:
|
|
41
61
|
return asdict(self.config)
|
|
@@ -70,9 +90,7 @@ class SingularityEnvironment:
|
|
|
70
90
|
return {"output": result.stdout, "returncode": result.returncode}
|
|
71
91
|
|
|
72
92
|
def cleanup(self):
|
|
73
|
-
|
|
74
|
-
self.logger.info(f"Removing sandbox {self.sandbox_dir}")
|
|
75
|
-
shutil.rmtree(self.sandbox_dir)
|
|
93
|
+
shutil.rmtree(self.sandbox_dir, ignore_errors=True)
|
|
76
94
|
|
|
77
95
|
def __del__(self):
|
|
78
96
|
"""Cleanup sandbox when object is destroyed."""
|
|
@@ -3,6 +3,7 @@ You can ignore this file completely if you explicitly set your model in your run
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import copy
|
|
6
|
+
import importlib
|
|
6
7
|
import os
|
|
7
8
|
import threading
|
|
8
9
|
|
|
@@ -49,12 +50,12 @@ def get_model(input_model_name: str | None = None, config: dict | None = None) -
|
|
|
49
50
|
config = copy.deepcopy(config)
|
|
50
51
|
config["model_name"] = resolved_model_name
|
|
51
52
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
return
|
|
53
|
+
model_class = get_model_class(resolved_model_name, config.pop("model_class", ""))
|
|
54
|
+
|
|
55
|
+
if (from_env := os.getenv("MSWEA_MODEL_API_KEY")) and not str(type(model_class)).endswith("DeterministicModel"):
|
|
56
|
+
config.setdefault("model_kwargs", {})["api_key"] = from_env
|
|
57
|
+
|
|
58
|
+
return model_class(**config)
|
|
58
59
|
|
|
59
60
|
|
|
60
61
|
def get_model_name(input_model_name: str | None = None, config: dict | None = None) -> str:
|
|
@@ -63,19 +64,44 @@ def get_model_name(input_model_name: str | None = None, config: dict | None = No
|
|
|
63
64
|
config = {}
|
|
64
65
|
if input_model_name:
|
|
65
66
|
return input_model_name
|
|
66
|
-
if from_env := os.getenv("MSWEA_MODEL_NAME"):
|
|
67
|
-
return from_env
|
|
68
67
|
if from_config := config.get("model_name"):
|
|
69
68
|
return from_config
|
|
69
|
+
if from_env := os.getenv("MSWEA_MODEL_NAME"):
|
|
70
|
+
return from_env
|
|
70
71
|
raise ValueError("No default model set. Please run `mini-extra config setup` to set one.")
|
|
71
72
|
|
|
72
73
|
|
|
73
|
-
|
|
74
|
-
"""
|
|
74
|
+
_MODEL_CLASS_MAPPING = {
|
|
75
|
+
"anthropic": "minisweagent.models.anthropic.AnthropicModel",
|
|
76
|
+
"litellm": "minisweagent.models.litellm_model.LitellmModel",
|
|
77
|
+
"deterministic": "minisweagent.models.test_models.DeterministicModel",
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_model_class(model_name: str, model_class: str = "") -> type:
|
|
82
|
+
"""Select the best model class.
|
|
83
|
+
|
|
84
|
+
If a model_class is provided (as shortcut name, or as full import path,
|
|
85
|
+
e.g., "anthropic" or "minisweagent.models.anthropic.AnthropicModel"),
|
|
86
|
+
it takes precedence over the `model_name`.
|
|
87
|
+
Otherwise, the model_name is used to select the best model class.
|
|
88
|
+
"""
|
|
89
|
+
if model_class:
|
|
90
|
+
full_path = _MODEL_CLASS_MAPPING.get(model_class, model_class)
|
|
91
|
+
try:
|
|
92
|
+
module_name, class_name = full_path.rsplit(".", 1)
|
|
93
|
+
module = importlib.import_module(module_name)
|
|
94
|
+
return getattr(module, class_name)
|
|
95
|
+
except (ValueError, ImportError, AttributeError):
|
|
96
|
+
msg = f"Unknown model class: {model_class} (resolved to {full_path}, available: {_MODEL_CLASS_MAPPING})"
|
|
97
|
+
raise ValueError(msg)
|
|
98
|
+
|
|
75
99
|
if any(s in model_name.lower() for s in ["anthropic", "sonnet", "opus", "claude"]):
|
|
76
100
|
from minisweagent.models.anthropic import AnthropicModel
|
|
77
101
|
|
|
78
102
|
return AnthropicModel
|
|
103
|
+
|
|
104
|
+
# Default to LitellmModel
|
|
79
105
|
from minisweagent.models.litellm_model import LitellmModel
|
|
80
106
|
|
|
81
107
|
return LitellmModel
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
from dataclasses import asdict, dataclass
|
|
4
|
+
|
|
5
|
+
from minisweagent import Model
|
|
6
|
+
from minisweagent.models import get_model
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class RouletteModelConfig:
|
|
11
|
+
model_kwargs: list[dict]
|
|
12
|
+
"""The models to choose from"""
|
|
13
|
+
model_name: str = "roulette"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RouletteModel:
|
|
17
|
+
def __init__(self, *, config_class: Callable = RouletteModelConfig, **kwargs):
|
|
18
|
+
"""This "meta"-model randomly selects one of the models at every call"""
|
|
19
|
+
self.config = config_class(**kwargs)
|
|
20
|
+
self.models = [get_model(config=config) for config in self.config.model_kwargs]
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def cost(self) -> float:
|
|
24
|
+
return sum(model.cost for model in self.models)
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def n_calls(self) -> int:
|
|
28
|
+
return sum(model.n_calls for model in self.models)
|
|
29
|
+
|
|
30
|
+
def get_template_vars(self) -> dict:
|
|
31
|
+
return asdict(self.config) | {"n_model_calls": self.n_calls, "model_cost": self.cost}
|
|
32
|
+
|
|
33
|
+
def select_model(self) -> Model:
|
|
34
|
+
return random.choice(self.models)
|
|
35
|
+
|
|
36
|
+
def query(self, *args, **kwargs) -> dict:
|
|
37
|
+
model = self.select_model()
|
|
38
|
+
response = model.query(*args, **kwargs)
|
|
39
|
+
response["model_name"] = model.config.model_name
|
|
40
|
+
return response
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class InterleavingModelConfig:
|
|
45
|
+
model_kwargs: list[dict]
|
|
46
|
+
sequence: list[int] | None = None
|
|
47
|
+
"""If set to 0, 0, 1, we will return the first model 2 times, then the second model 1 time,
|
|
48
|
+
then the first model again, etc."""
|
|
49
|
+
model_name: str = "interleaving"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class InterleavingModel(RouletteModel):
|
|
53
|
+
def __init__(self, *, config_class: Callable = InterleavingModelConfig, **kwargs):
|
|
54
|
+
"""This "meta"-model alternates between the models in the sequence for every call"""
|
|
55
|
+
super().__init__(config_class=config_class, **kwargs)
|
|
56
|
+
|
|
57
|
+
def select_model(self) -> Model:
|
|
58
|
+
if self.config.sequence is None:
|
|
59
|
+
i_model = self.n_calls % len(self.models)
|
|
60
|
+
else:
|
|
61
|
+
i_model = self.config.sequence[self.n_calls % len(self.config.sequence)]
|
|
62
|
+
return self.models[i_model]
|
|
@@ -61,7 +61,15 @@ class LitellmModel:
|
|
|
61
61
|
|
|
62
62
|
def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
|
|
63
63
|
response = self._query(messages, **kwargs)
|
|
64
|
-
|
|
64
|
+
try:
|
|
65
|
+
cost = litellm.cost_calculator.completion_cost(response)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
logger.critical(
|
|
68
|
+
f"Error calculating cost for model {self.config.model_name}: {e}. "
|
|
69
|
+
"Please check the 'Updating the model registry' section in the documentation. "
|
|
70
|
+
"http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
|
|
71
|
+
)
|
|
72
|
+
raise
|
|
65
73
|
self.n_calls += 1
|
|
66
74
|
self.cost += cost
|
|
67
75
|
GLOBAL_MODEL_STATS.add(cost)
|
|
@@ -24,7 +24,7 @@ from minisweagent.environments import get_environment
|
|
|
24
24
|
from minisweagent.models import get_model
|
|
25
25
|
from minisweagent.run.extra.utils.batch_progress import RunBatchProgressManager
|
|
26
26
|
from minisweagent.run.utils.save import save_traj
|
|
27
|
-
from minisweagent.utils.log import
|
|
27
|
+
from minisweagent.utils.log import add_file_handler, logger
|
|
28
28
|
|
|
29
29
|
_HELP_TEXT = """Run mini-SWE-agent on SWEBench instances.
|
|
30
30
|
|
|
@@ -78,7 +78,7 @@ def get_swebench_docker_image_name(instance: dict) -> str:
|
|
|
78
78
|
|
|
79
79
|
def get_sb_environment(config: dict, instance: dict) -> Environment:
|
|
80
80
|
image_name = get_swebench_docker_image_name(instance)
|
|
81
|
-
env_config = config.
|
|
81
|
+
env_config = config.setdefault("environment", {})
|
|
82
82
|
if env_config.get("environment_class") == "singularity":
|
|
83
83
|
image_name = "docker://" + image_name
|
|
84
84
|
env_config["image"] = image_name
|
|
@@ -190,6 +190,7 @@ def main(
|
|
|
190
190
|
output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
|
|
191
191
|
workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
|
|
192
192
|
model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
|
|
193
|
+
model_class: str | None = typer.Option(None, "-c", "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
|
|
193
194
|
redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
|
|
194
195
|
config_spec: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
|
|
195
196
|
environment_class: str | None = typer.Option( None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
|
|
@@ -198,7 +199,7 @@ def main(
|
|
|
198
199
|
output_path = Path(output)
|
|
199
200
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
200
201
|
logger.info(f"Results will be saved to {output_path}")
|
|
201
|
-
|
|
202
|
+
add_file_handler(output_path / "minisweagent.log")
|
|
202
203
|
|
|
203
204
|
dataset_path = DATASET_MAPPING.get(subset, subset)
|
|
204
205
|
logger.info(f"Loading dataset {dataset_path}, split {split}...")
|
|
@@ -217,6 +218,8 @@ def main(
|
|
|
217
218
|
config.setdefault("environment", {})["environment_class"] = environment_class
|
|
218
219
|
if model is not None:
|
|
219
220
|
config.setdefault("model", {})["model_name"] = model
|
|
221
|
+
if model_class is not None:
|
|
222
|
+
config.setdefault("model", {})["model_class"] = model_class
|
|
220
223
|
|
|
221
224
|
progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")
|
|
222
225
|
|
{mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/extra/swebench_single.py
RENAMED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""Run on a single SWE-Bench instance."""
|
|
2
2
|
|
|
3
|
+
import traceback
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
import typer
|
|
@@ -29,6 +30,7 @@ def main(
|
|
|
29
30
|
split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
|
|
30
31
|
instance_spec: str = typer.Option(0, "-i", "--instance", help="SWE-Bench instance ID or index", rich_help_panel="Data selection"),
|
|
31
32
|
model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
|
|
33
|
+
model_class: str | None = typer.Option(None, "-c", "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
|
|
32
34
|
config_path: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
|
|
33
35
|
environment_class: str | None = typer.Option(None, "--environment-class", rich_help_panel="Advanced"),
|
|
34
36
|
exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Basic"),
|
|
@@ -49,6 +51,8 @@ def main(
|
|
|
49
51
|
config = yaml.safe_load(get_config_path(config_path).read_text())
|
|
50
52
|
if environment_class is not None:
|
|
51
53
|
config.setdefault("environment", {})["environment_class"] = environment_class
|
|
54
|
+
if model_class is not None:
|
|
55
|
+
config.setdefault("model", {})["model_class"] = model_class
|
|
52
56
|
if exit_immediately:
|
|
53
57
|
config.setdefault("agent", {})["confirm_exit"] = False
|
|
54
58
|
env = get_sb_environment(config, instance)
|
|
@@ -58,11 +62,15 @@ def main(
|
|
|
58
62
|
**({"mode": "yolo"} | config.get("agent", {})),
|
|
59
63
|
)
|
|
60
64
|
|
|
61
|
-
exit_status, result = None, None
|
|
65
|
+
exit_status, result, extra_info = None, None, None
|
|
62
66
|
try:
|
|
63
67
|
exit_status, result = agent.run(instance["problem_statement"]) # type: ignore[arg-type]
|
|
68
|
+
except Exception as e:
|
|
69
|
+
logger.error(f"Error processing instance {instance_spec}: {e}", exc_info=True)
|
|
70
|
+
exit_status, result = type(e).__name__, str(e)
|
|
71
|
+
extra_info = {"traceback": traceback.format_exc()}
|
|
64
72
|
finally:
|
|
65
|
-
save_traj(agent, output, exit_status=exit_status, result=result) # type: ignore[arg-type]
|
|
73
|
+
save_traj(agent, output, exit_status=exit_status, result=result, extra_info=extra_info) # type: ignore[arg-type]
|
|
66
74
|
|
|
67
75
|
|
|
68
76
|
if __name__ == "__main__":
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# Read this first: https://mini-swe-agent.com/latest/usage/mini/ (usage)
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
|
+
import traceback
|
|
7
8
|
from pathlib import Path
|
|
8
9
|
from typing import Any
|
|
9
10
|
|
|
@@ -22,6 +23,7 @@ from minisweagent.environments.local import LocalEnvironment
|
|
|
22
23
|
from minisweagent.models import get_model
|
|
23
24
|
from minisweagent.run.extra.config import configure_if_first_time
|
|
24
25
|
from minisweagent.run.utils.save import save_traj
|
|
26
|
+
from minisweagent.utils.log import logger
|
|
25
27
|
|
|
26
28
|
DEFAULT_CONFIG = Path(os.getenv("MSWEA_MINI_CONFIG_PATH", builtin_config_dir / "mini.yaml"))
|
|
27
29
|
DEFAULT_OUTPUT = global_config_dir / "last_mini_run.traj.json"
|
|
@@ -41,29 +43,19 @@ More information about the usage: [bold green]https://mini-swe-agent.com/latest/
|
|
|
41
43
|
"""
|
|
42
44
|
|
|
43
45
|
|
|
46
|
+
# fmt: off
|
|
44
47
|
@app.command(help=_HELP_TEXT)
|
|
45
48
|
def main(
|
|
46
|
-
visual: bool = typer.Option(
|
|
47
|
-
|
|
48
|
-
"-v",
|
|
49
|
-
"--visual",
|
|
50
|
-
help="Toggle (pager-style) UI (Textual) depending on the MSWEA_VISUAL_MODE_DEFAULT environment setting",
|
|
51
|
-
),
|
|
52
|
-
model_name: str | None = typer.Option(
|
|
53
|
-
None,
|
|
54
|
-
"-m",
|
|
55
|
-
"--model",
|
|
56
|
-
help="Model to use",
|
|
57
|
-
),
|
|
49
|
+
visual: bool = typer.Option(False, "-v", "--visual", help="Toggle (pager-style) UI (Textual) depending on the MSWEA_VISUAL_MODE_DEFAULT environment setting",),
|
|
50
|
+
model_name: str | None = typer.Option( None, "-m", "--model", help="Model to use",),
|
|
58
51
|
task: str | None = typer.Option(None, "-t", "--task", help="Task/problem statement", show_default=False),
|
|
59
52
|
yolo: bool = typer.Option(False, "-y", "--yolo", help="Run without confirmation"),
|
|
60
53
|
cost_limit: float | None = typer.Option(None, "-l", "--cost-limit", help="Cost limit. Set to 0 to disable."),
|
|
61
54
|
config_spec: Path = typer.Option(DEFAULT_CONFIG, "-c", "--config", help="Path to config file"),
|
|
62
55
|
output: Path | None = typer.Option(DEFAULT_OUTPUT, "-o", "--output", help="Output trajectory file"),
|
|
63
|
-
exit_immediately: bool = typer.Option(
|
|
64
|
-
False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting."
|
|
65
|
-
),
|
|
56
|
+
exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting."),
|
|
66
57
|
) -> Any:
|
|
58
|
+
# fmt: on
|
|
67
59
|
configure_if_first_time()
|
|
68
60
|
config = yaml.safe_load(get_config_path(config_spec).read_text())
|
|
69
61
|
|
|
@@ -92,13 +84,18 @@ def main(
|
|
|
92
84
|
agent_class = InteractiveAgent
|
|
93
85
|
if visual == (os.getenv("MSWEA_VISUAL_MODE_DEFAULT", "false") == "false"):
|
|
94
86
|
agent_class = TextualAgent
|
|
95
|
-
|
|
87
|
+
|
|
96
88
|
agent = agent_class(model, env, **config.get("agent", {}))
|
|
89
|
+
exit_status, result, extra_info = None, None, None
|
|
97
90
|
try:
|
|
98
91
|
exit_status, result = agent.run(task) # type: ignore[arg-type]
|
|
92
|
+
except Exception as e:
|
|
93
|
+
logger.error(f"Error running agent: {e}", exc_info=True)
|
|
94
|
+
exit_status, result = type(e).__name__, str(e)
|
|
95
|
+
extra_info = {"traceback": traceback.format_exc()}
|
|
99
96
|
finally:
|
|
100
97
|
if output:
|
|
101
|
-
save_traj(agent, output, exit_status=exit_status, result=result) # type: ignore[arg-type]
|
|
98
|
+
save_traj(agent, output, exit_status=exit_status, result=result, extra_info=extra_info) # type: ignore[arg-type]
|
|
102
99
|
return agent
|
|
103
100
|
|
|
104
101
|
|
|
@@ -1,10 +1,24 @@
|
|
|
1
|
+
import dataclasses
|
|
1
2
|
import json
|
|
2
3
|
from collections.abc import Callable
|
|
3
4
|
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
4
6
|
|
|
5
7
|
from minisweagent import Agent, __version__
|
|
6
8
|
|
|
7
9
|
|
|
10
|
+
def _get_class_name_with_module(obj: Any) -> str:
|
|
11
|
+
"""Get the full class name with module path."""
|
|
12
|
+
return f"{obj.__class__.__module__}.{obj.__class__.__name__}"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _asdict(obj: Any) -> dict:
|
|
16
|
+
"""Convert config objects to dicts."""
|
|
17
|
+
if dataclasses.is_dataclass(obj):
|
|
18
|
+
return dataclasses.asdict(obj) # type: ignore[arg-type]
|
|
19
|
+
return obj # let's try our luck
|
|
20
|
+
|
|
21
|
+
|
|
8
22
|
def save_traj(
|
|
9
23
|
agent: Agent | None,
|
|
10
24
|
path: Path,
|
|
@@ -45,6 +59,14 @@ def save_traj(
|
|
|
45
59
|
data["info"]["model_stats"]["instance_cost"] = agent.model.cost
|
|
46
60
|
data["info"]["model_stats"]["api_calls"] = agent.model.n_calls
|
|
47
61
|
data["messages"] = agent.messages
|
|
62
|
+
data["info"]["config"] = {
|
|
63
|
+
"agent": _asdict(agent.config),
|
|
64
|
+
"model": _asdict(agent.model.config),
|
|
65
|
+
"environment": _asdict(agent.env.config),
|
|
66
|
+
"agent_type": _get_class_name_with_module(agent),
|
|
67
|
+
"model_type": _get_class_name_with_module(agent.model),
|
|
68
|
+
"environment_type": _get_class_name_with_module(agent.env),
|
|
69
|
+
}
|
|
48
70
|
if extra_info:
|
|
49
71
|
data["info"].update(extra_info)
|
|
50
72
|
|
|
File without changes
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from rich.logging import RichHandler
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _setup_root_logger() -> None:
|
|
8
|
+
logger = logging.getLogger("minisweagent")
|
|
9
|
+
logger.setLevel(logging.DEBUG)
|
|
10
|
+
_handler = RichHandler(
|
|
11
|
+
show_path=False,
|
|
12
|
+
show_time=False,
|
|
13
|
+
show_level=False,
|
|
14
|
+
markup=True,
|
|
15
|
+
)
|
|
16
|
+
_formatter = logging.Formatter("%(name)s: %(levelname)s: %(message)s")
|
|
17
|
+
_handler.setFormatter(_formatter)
|
|
18
|
+
logger.addHandler(_handler)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def add_file_handler(path: Path | str, level: int = logging.DEBUG, *, print_path: bool = True) -> None:
|
|
22
|
+
logger = logging.getLogger("minisweagent")
|
|
23
|
+
handler = logging.FileHandler(path)
|
|
24
|
+
handler.setLevel(level)
|
|
25
|
+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
26
|
+
handler.setFormatter(formatter)
|
|
27
|
+
logger.addHandler(handler)
|
|
28
|
+
if print_path:
|
|
29
|
+
print(f"Logging to '{path}'")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_setup_root_logger()
|
|
33
|
+
logger = logging.getLogger("minisweagent")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
__all__ = ["logger"]
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
|
|
4
|
-
MINI_LOGGERS = {}
|
|
5
|
-
_EXTRA_HANDLERS = []
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def get_logger(name: str) -> logging.Logger:
|
|
9
|
-
if name in MINI_LOGGERS:
|
|
10
|
-
return MINI_LOGGERS[name]
|
|
11
|
-
logger = logging.getLogger(name)
|
|
12
|
-
logger.setLevel(logging.DEBUG)
|
|
13
|
-
handler = logging.StreamHandler()
|
|
14
|
-
formatter = logging.Formatter("%(name)s: %(levelname)s: %(message)s")
|
|
15
|
-
handler.setFormatter(formatter)
|
|
16
|
-
logger.addHandler(handler)
|
|
17
|
-
for handler in _EXTRA_HANDLERS:
|
|
18
|
-
logger.addHandler(handler)
|
|
19
|
-
MINI_LOGGERS[name] = logger
|
|
20
|
-
return logger
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def add_file_handlers(path: Path):
|
|
24
|
-
handler = logging.FileHandler(path)
|
|
25
|
-
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
26
|
-
handler.setFormatter(formatter)
|
|
27
|
-
_EXTRA_HANDLERS.append(handler)
|
|
28
|
-
for logger in MINI_LOGGERS.values():
|
|
29
|
-
logger.addHandler(handler)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
logger = get_logger("minisweagent")
|
|
File without changes
|
|
File without changes
|
{mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/extra/__init__.py
RENAMED
|
File without changes
|
{mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/extra/swerex_docker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/utils/cache_control.py
RENAMED
|
File without changes
|
{mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/utils/key_per_thread.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/extra/utils/batch_progress.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|