mini-swe-agent 1.17.5__py3-none-any.whl → 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
- mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
- mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
- minisweagent/__init__.py +19 -26
- minisweagent/agents/default.py +128 -113
- minisweagent/agents/interactive.py +119 -58
- minisweagent/config/README.md +3 -4
- minisweagent/config/__init__.py +36 -1
- minisweagent/config/benchmarks/swebench.yaml +156 -0
- minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
- minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
- minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
- minisweagent/config/default.yaml +24 -21
- minisweagent/config/inspector.tcss +42 -0
- minisweagent/config/mini.yaml +53 -71
- minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
- minisweagent/environments/__init__.py +1 -0
- minisweagent/environments/docker.py +67 -20
- minisweagent/environments/extra/bubblewrap.py +86 -47
- minisweagent/environments/extra/swerex_docker.py +53 -20
- minisweagent/environments/extra/swerex_modal.py +90 -0
- minisweagent/environments/local.py +62 -21
- minisweagent/environments/singularity.py +59 -18
- minisweagent/exceptions.py +22 -0
- minisweagent/models/__init__.py +6 -7
- minisweagent/models/extra/roulette.py +20 -17
- minisweagent/models/litellm_model.py +90 -44
- minisweagent/models/litellm_response_model.py +80 -0
- minisweagent/models/litellm_textbased_model.py +45 -0
- minisweagent/models/openrouter_model.py +87 -45
- minisweagent/models/openrouter_response_model.py +123 -0
- minisweagent/models/openrouter_textbased_model.py +76 -0
- minisweagent/models/portkey_model.py +84 -42
- minisweagent/models/portkey_response_model.py +163 -0
- minisweagent/models/requesty_model.py +91 -41
- minisweagent/models/test_models.py +246 -19
- minisweagent/models/utils/actions_text.py +60 -0
- minisweagent/models/utils/actions_toolcall.py +102 -0
- minisweagent/models/utils/actions_toolcall_response.py +110 -0
- minisweagent/models/utils/anthropic_utils.py +28 -0
- minisweagent/models/utils/cache_control.py +15 -2
- minisweagent/models/utils/content_string.py +74 -0
- minisweagent/models/utils/openai_multimodal.py +50 -0
- minisweagent/models/utils/retry.py +25 -0
- minisweagent/run/benchmarks/__init__.py +1 -0
- minisweagent/run/{extra → benchmarks}/swebench.py +56 -35
- minisweagent/run/{extra → benchmarks}/swebench_single.py +36 -26
- minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
- minisweagent/run/hello_world.py +6 -0
- minisweagent/run/mini.py +54 -63
- minisweagent/run/utilities/__init__.py +1 -0
- minisweagent/run/{extra → utilities}/config.py +2 -0
- minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
- minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
- minisweagent/utils/serialize.py +26 -0
- mini_swe_agent-1.17.5.dist-info/RECORD +0 -61
- mini_swe_agent-1.17.5.dist-info/entry_points.txt +0 -5
- minisweagent/agents/interactive_textual.py +0 -450
- minisweagent/config/extra/swebench_roulette.yaml +0 -233
- minisweagent/config/mini.tcss +0 -86
- minisweagent/models/anthropic.py +0 -35
- minisweagent/models/litellm_response_api_model.py +0 -82
- minisweagent/models/portkey_response_api_model.py +0 -75
- minisweagent/models/utils/key_per_thread.py +0 -20
- minisweagent/models/utils/openai_utils.py +0 -41
- minisweagent/run/github_issue.py +0 -87
- minisweagent/run/utils/__init__.py +0 -0
- minisweagent/run/utils/save.py +0 -78
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +0 -0
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
- {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
- /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
- /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
|
@@ -9,25 +9,28 @@ agent:
|
|
|
9
9
|
<format_example>
|
|
10
10
|
Your reasoning and analysis here. Explain why you want to perform the action.
|
|
11
11
|
|
|
12
|
-
```
|
|
12
|
+
```mswea_bash_command
|
|
13
13
|
your_command_here
|
|
14
14
|
```
|
|
15
15
|
</format_example>
|
|
16
16
|
|
|
17
17
|
Failure to follow these rules will cause your response to be rejected.
|
|
18
|
-
To finish, issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
|
|
19
|
-
without any other command.
|
|
20
18
|
instance_template: |
|
|
21
19
|
Please solve this issue: {{task}}
|
|
22
20
|
|
|
23
21
|
You can execute bash commands and edit files to implement the necessary changes.
|
|
24
22
|
|
|
25
23
|
## Recommended Workflow
|
|
24
|
+
|
|
25
|
+
This workflows should be done step-by-step so that you can iterate on your changes and any possible problems.
|
|
26
|
+
|
|
26
27
|
1. Analyze the codebase by finding and reading relevant files
|
|
27
28
|
2. Create a script to reproduce the issue
|
|
28
29
|
3. Edit the source code to resolve the issue
|
|
29
30
|
4. Verify your fix works by running your script again
|
|
30
31
|
5. Test edge cases to ensure your fix is robust
|
|
32
|
+
6. Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
|
|
33
|
+
Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
|
|
31
34
|
|
|
32
35
|
## Important Rules
|
|
33
36
|
|
|
@@ -35,8 +38,10 @@ agent:
|
|
|
35
38
|
2. The action must be enclosed in triple backticks
|
|
36
39
|
3. Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
|
|
37
40
|
However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
|
|
38
|
-
|
|
39
|
-
|
|
41
|
+
|
|
42
|
+
<system_information>
|
|
43
|
+
{{system}} {{release}} {{version}} {{machine}}
|
|
44
|
+
</system_information>
|
|
40
45
|
|
|
41
46
|
## Formatting your response
|
|
42
47
|
|
|
@@ -45,7 +50,7 @@ agent:
|
|
|
45
50
|
<example_response>
|
|
46
51
|
THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
|
|
47
52
|
|
|
48
|
-
```
|
|
53
|
+
```mswea_bash_command
|
|
49
54
|
ls -la
|
|
50
55
|
```
|
|
51
56
|
</example_response>
|
|
@@ -54,7 +59,7 @@ agent:
|
|
|
54
59
|
|
|
55
60
|
### Create a new file:
|
|
56
61
|
|
|
57
|
-
```
|
|
62
|
+
```mswea_bash_command
|
|
58
63
|
cat <<'EOF' > newfile.py
|
|
59
64
|
import numpy as np
|
|
60
65
|
hello = "world"
|
|
@@ -64,7 +69,13 @@ agent:
|
|
|
64
69
|
|
|
65
70
|
### Edit files with sed:
|
|
66
71
|
|
|
67
|
-
|
|
72
|
+
{%- if system == "Darwin" -%}
|
|
73
|
+
<important>
|
|
74
|
+
You are on MacOS. For all the below examples, you need to use `sed -i ''` instead of `sed -i`.
|
|
75
|
+
</important>
|
|
76
|
+
{%- endif -%}
|
|
77
|
+
|
|
78
|
+
```mswea_bash_command
|
|
68
79
|
# Replace all occurrences
|
|
69
80
|
sed -i 's/old_string/new_string/g' filename.py
|
|
70
81
|
|
|
@@ -80,17 +91,31 @@ agent:
|
|
|
80
91
|
|
|
81
92
|
### View file content:
|
|
82
93
|
|
|
83
|
-
```
|
|
94
|
+
```mswea_bash_command
|
|
84
95
|
# View specific lines with numbers
|
|
85
96
|
nl -ba filename.py | sed -n '10,20p'
|
|
86
97
|
```
|
|
87
98
|
|
|
88
99
|
### Any other command you want to run
|
|
89
100
|
|
|
90
|
-
```
|
|
101
|
+
```mswea_bash_command
|
|
91
102
|
anything
|
|
92
103
|
```
|
|
93
|
-
|
|
104
|
+
step_limit: 0
|
|
105
|
+
cost_limit: 3.
|
|
106
|
+
mode: confirm
|
|
107
|
+
environment:
|
|
108
|
+
env:
|
|
109
|
+
PAGER: cat
|
|
110
|
+
MANPAGER: cat
|
|
111
|
+
LESS: -R
|
|
112
|
+
PIP_PROGRESS_BAR: 'off'
|
|
113
|
+
TQDM_DISABLE: '1'
|
|
114
|
+
model:
|
|
115
|
+
observation_template: |
|
|
116
|
+
{% if output.exception_info -%}
|
|
117
|
+
<exception>{{output.exception_info}}</exception>
|
|
118
|
+
{% endif -%}
|
|
94
119
|
<returncode>{{output.returncode}}</returncode>
|
|
95
120
|
{% if output.output | length < 10000 -%}
|
|
96
121
|
<output>
|
|
@@ -115,31 +140,20 @@ agent:
|
|
|
115
140
|
{{ output.output[-5000:] }}
|
|
116
141
|
</output_tail>
|
|
117
142
|
{%- endif -%}
|
|
143
|
+
model_kwargs:
|
|
144
|
+
drop_params: true
|
|
118
145
|
format_error_template: |
|
|
119
146
|
Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
Else, please format your response exactly as follows:
|
|
147
|
+
|
|
148
|
+
Please format your action in triple backticks as shown in <response_example>.
|
|
123
149
|
|
|
124
150
|
<response_example>
|
|
125
151
|
Here are some thoughts about why you want to perform the action.
|
|
126
152
|
|
|
127
|
-
```
|
|
153
|
+
```mswea_bash_command
|
|
128
154
|
<action>
|
|
129
155
|
```
|
|
130
156
|
</response_example>
|
|
131
|
-
step_limit: 0.
|
|
132
|
-
cost_limit: 0.
|
|
133
157
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
cwd: "/testbed"
|
|
137
|
-
env:
|
|
138
|
-
PAGER: cat
|
|
139
|
-
MANPAGER: cat
|
|
140
|
-
LESS: -R
|
|
141
|
-
PIP_PROGRESS_BAR: 'off'
|
|
142
|
-
TQDM_DISABLE: '1'
|
|
143
|
-
model:
|
|
144
|
-
model_kwargs:
|
|
145
|
-
drop_params: true
|
|
158
|
+
If you have completed your assignment, please consult the first message about how to
|
|
159
|
+
submit your solution (you will not be able to continue working on this task after that).
|
|
@@ -10,6 +10,7 @@ _ENVIRONMENT_MAPPING = {
|
|
|
10
10
|
"singularity": "minisweagent.environments.singularity.SingularityEnvironment",
|
|
11
11
|
"local": "minisweagent.environments.local.LocalEnvironment",
|
|
12
12
|
"swerex_docker": "minisweagent.environments.extra.swerex_docker.SwerexDockerEnvironment",
|
|
13
|
+
"swerex_modal": "minisweagent.environments.extra.swerex_modal.SwerexModalEnvironment",
|
|
13
14
|
"bubblewrap": "minisweagent.environments.extra.bubblewrap.BubblewrapEnvironment",
|
|
14
15
|
}
|
|
15
16
|
|
|
@@ -1,20 +1,24 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import os
|
|
3
|
+
import platform
|
|
3
4
|
import shlex
|
|
4
5
|
import subprocess
|
|
5
6
|
import uuid
|
|
6
|
-
from dataclasses import asdict, dataclass, field
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
+
from pydantic import BaseModel
|
|
9
10
|
|
|
10
|
-
|
|
11
|
-
|
|
11
|
+
from minisweagent.exceptions import Submitted
|
|
12
|
+
from minisweagent.utils.serialize import recursive_merge
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DockerEnvironmentConfig(BaseModel):
|
|
12
16
|
image: str
|
|
13
17
|
cwd: str = "/"
|
|
14
18
|
"""Working directory in which to execute commands."""
|
|
15
|
-
env: dict[str, str] =
|
|
19
|
+
env: dict[str, str] = {}
|
|
16
20
|
"""Environment variables to set in the container."""
|
|
17
|
-
forward_env: list[str] =
|
|
21
|
+
forward_env: list[str] = []
|
|
18
22
|
"""Environment variables to forward to the container.
|
|
19
23
|
Variables are only forwarded if they are set in the host environment.
|
|
20
24
|
In case of conflict with `env`, the `env` variables take precedence.
|
|
@@ -23,7 +27,7 @@ class DockerEnvironmentConfig:
|
|
|
23
27
|
"""Timeout for executing commands in the container."""
|
|
24
28
|
executable: str = os.getenv("MSWEA_DOCKER_EXECUTABLE", "docker")
|
|
25
29
|
"""Path to the docker/container executable."""
|
|
26
|
-
run_args: list[str] =
|
|
30
|
+
run_args: list[str] = ["--rm"]
|
|
27
31
|
"""Additional arguments to pass to the docker/container executable.
|
|
28
32
|
Default is ["--rm"], which removes the container after it exits.
|
|
29
33
|
"""
|
|
@@ -31,6 +35,11 @@ class DockerEnvironmentConfig:
|
|
|
31
35
|
"""Max duration to keep container running. Uses the same format as the sleep command."""
|
|
32
36
|
pull_timeout: int = 120
|
|
33
37
|
"""Timeout in seconds for pulling images."""
|
|
38
|
+
interpreter: list[str] = ["bash", "-lc"]
|
|
39
|
+
"""Interpreter to use to execute commands. Default is ["bash", "-lc"].
|
|
40
|
+
The actual command will be appended as argument to this. Override this to e.g., modify shell flags
|
|
41
|
+
(e.g., to remove the `-l` flag to disable login shell) or to use python instead of bash to interpret commands.
|
|
42
|
+
"""
|
|
34
43
|
|
|
35
44
|
|
|
36
45
|
class DockerEnvironment:
|
|
@@ -49,8 +58,18 @@ class DockerEnvironment:
|
|
|
49
58
|
self.config = config_class(**kwargs)
|
|
50
59
|
self._start_container()
|
|
51
60
|
|
|
52
|
-
def get_template_vars(self) -> dict[str, Any]:
|
|
53
|
-
return
|
|
61
|
+
def get_template_vars(self, **kwargs) -> dict[str, Any]:
|
|
62
|
+
return recursive_merge(self.config.model_dump(), platform.uname()._asdict(), kwargs)
|
|
63
|
+
|
|
64
|
+
def serialize(self) -> dict:
|
|
65
|
+
return {
|
|
66
|
+
"info": {
|
|
67
|
+
"config": {
|
|
68
|
+
"environment": self.config.model_dump(mode="json"),
|
|
69
|
+
"environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
54
73
|
|
|
55
74
|
def _start_container(self):
|
|
56
75
|
"""Start the Docker container and return the container ID."""
|
|
@@ -79,8 +98,9 @@ class DockerEnvironment:
|
|
|
79
98
|
self.logger.info(f"Started container {container_name} with ID {result.stdout.strip()}")
|
|
80
99
|
self.container_id = result.stdout.strip()
|
|
81
100
|
|
|
82
|
-
def execute(self,
|
|
101
|
+
def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
|
|
83
102
|
"""Execute a command in the Docker container and return the result as a dict."""
|
|
103
|
+
command = action.get("command", "")
|
|
84
104
|
cwd = cwd or self.config.cwd
|
|
85
105
|
assert self.container_id, "Container not started"
|
|
86
106
|
|
|
@@ -90,18 +110,45 @@ class DockerEnvironment:
|
|
|
90
110
|
cmd.extend(["-e", f"{key}={value}"])
|
|
91
111
|
for key, value in self.config.env.items():
|
|
92
112
|
cmd.extend(["-e", f"{key}={value}"])
|
|
93
|
-
cmd.extend([self.container_id,
|
|
113
|
+
cmd.extend([self.container_id, *self.config.interpreter, command])
|
|
94
114
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
115
|
+
try:
|
|
116
|
+
result = subprocess.run(
|
|
117
|
+
cmd,
|
|
118
|
+
text=True,
|
|
119
|
+
timeout=timeout or self.config.timeout,
|
|
120
|
+
encoding="utf-8",
|
|
121
|
+
errors="replace",
|
|
122
|
+
stdout=subprocess.PIPE,
|
|
123
|
+
stderr=subprocess.STDOUT,
|
|
124
|
+
)
|
|
125
|
+
output = {"output": result.stdout, "returncode": result.returncode, "exception_info": ""}
|
|
126
|
+
except Exception as e:
|
|
127
|
+
raw_output = getattr(e, "output", None)
|
|
128
|
+
raw_output = (
|
|
129
|
+
raw_output.decode("utf-8", errors="replace") if isinstance(raw_output, bytes) else (raw_output or "")
|
|
130
|
+
)
|
|
131
|
+
output = {
|
|
132
|
+
"output": raw_output,
|
|
133
|
+
"returncode": -1,
|
|
134
|
+
"exception_info": f"An error occurred while executing the command: {e}",
|
|
135
|
+
"extra": {"exception_type": type(e).__name__, "exception": str(e)},
|
|
136
|
+
}
|
|
137
|
+
self._check_finished(output)
|
|
138
|
+
return output
|
|
139
|
+
|
|
140
|
+
def _check_finished(self, output: dict):
|
|
141
|
+
"""Raises Submitted if the output indicates task completion."""
|
|
142
|
+
lines = output.get("output", "").lstrip().splitlines(keepends=True)
|
|
143
|
+
if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0:
|
|
144
|
+
submission = "".join(lines[1:])
|
|
145
|
+
raise Submitted(
|
|
146
|
+
{
|
|
147
|
+
"role": "exit",
|
|
148
|
+
"content": submission,
|
|
149
|
+
"extra": {"exit_status": "Submitted", "submission": submission},
|
|
150
|
+
}
|
|
151
|
+
)
|
|
105
152
|
|
|
106
153
|
def cleanup(self):
|
|
107
154
|
"""Stop and remove the Docker container."""
|
|
@@ -17,51 +17,52 @@ import shutil
|
|
|
17
17
|
import subprocess
|
|
18
18
|
import tempfile
|
|
19
19
|
import uuid
|
|
20
|
-
from dataclasses import asdict, dataclass, field
|
|
21
20
|
from pathlib import Path
|
|
22
21
|
from typing import Any
|
|
23
22
|
|
|
23
|
+
from pydantic import BaseModel
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
from minisweagent.exceptions import Submitted
|
|
26
|
+
from minisweagent.utils.serialize import recursive_merge
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BubblewrapEnvironmentConfig(BaseModel):
|
|
27
30
|
cwd: str = ""
|
|
28
31
|
"""Working directory for the sandbox."""
|
|
29
|
-
env: dict[str, str] =
|
|
32
|
+
env: dict[str, str] = {}
|
|
30
33
|
"""Dictionary of environment variables to set in the sandbox."""
|
|
31
34
|
timeout: int = 30
|
|
32
35
|
"""Timeout for the command in seconds."""
|
|
33
36
|
executable: str = os.getenv("MSWEA_BUBBLEWRAP_EXECUTABLE", "bwrap")
|
|
34
37
|
"""Path to the bubblewrap executable."""
|
|
35
|
-
wrapper_args: list[str] =
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
]
|
|
64
|
-
)
|
|
38
|
+
wrapper_args: list[str] = [
|
|
39
|
+
"--unshare-user-try",
|
|
40
|
+
"--ro-bind",
|
|
41
|
+
"/usr",
|
|
42
|
+
"/usr",
|
|
43
|
+
"--ro-bind",
|
|
44
|
+
"/bin",
|
|
45
|
+
"/bin",
|
|
46
|
+
"--ro-bind",
|
|
47
|
+
"/lib",
|
|
48
|
+
"/lib",
|
|
49
|
+
"--ro-bind",
|
|
50
|
+
"/lib64",
|
|
51
|
+
"/lib64",
|
|
52
|
+
"--ro-bind",
|
|
53
|
+
"/etc",
|
|
54
|
+
"/etc",
|
|
55
|
+
"--tmpfs",
|
|
56
|
+
"/tmp",
|
|
57
|
+
"--proc",
|
|
58
|
+
"/proc",
|
|
59
|
+
"--dev",
|
|
60
|
+
"/dev",
|
|
61
|
+
"--new-session",
|
|
62
|
+
"--setenv",
|
|
63
|
+
"PATH",
|
|
64
|
+
"/usr/local/bin:/usr/sbin:/usr/bin:/bin",
|
|
65
|
+
]
|
|
65
66
|
"""Arguments to pass to the bubblewrap executable."""
|
|
66
67
|
|
|
67
68
|
|
|
@@ -77,8 +78,9 @@ class BubblewrapEnvironment:
|
|
|
77
78
|
self.working_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
|
|
78
79
|
self.working_dir.mkdir(parents=True)
|
|
79
80
|
|
|
80
|
-
def execute(self,
|
|
81
|
+
def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
|
|
81
82
|
"""Execute a command in the bubblewrap environment and return the result as a dict."""
|
|
83
|
+
command = action.get("command", "")
|
|
82
84
|
cwd = cwd or self.config.cwd or str(self.working_dir)
|
|
83
85
|
|
|
84
86
|
cmd = [self.config.executable] + self.config.wrapper_args + ["--bind", cwd, cwd, "--chdir", cwd]
|
|
@@ -89,16 +91,43 @@ class BubblewrapEnvironment:
|
|
|
89
91
|
|
|
90
92
|
cmd.extend(["bash", "-c", command])
|
|
91
93
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
94
|
+
try:
|
|
95
|
+
result = subprocess.run(
|
|
96
|
+
cmd,
|
|
97
|
+
text=True,
|
|
98
|
+
timeout=timeout or self.config.timeout,
|
|
99
|
+
encoding="utf-8",
|
|
100
|
+
errors="replace",
|
|
101
|
+
stdout=subprocess.PIPE,
|
|
102
|
+
stderr=subprocess.STDOUT,
|
|
103
|
+
)
|
|
104
|
+
output = {"output": result.stdout, "returncode": result.returncode, "exception_info": ""}
|
|
105
|
+
except Exception as e:
|
|
106
|
+
raw_output = getattr(e, "output", None)
|
|
107
|
+
raw_output = (
|
|
108
|
+
raw_output.decode("utf-8", errors="replace") if isinstance(raw_output, bytes) else (raw_output or "")
|
|
109
|
+
)
|
|
110
|
+
output = {
|
|
111
|
+
"output": raw_output,
|
|
112
|
+
"returncode": -1,
|
|
113
|
+
"exception_info": f"An error occurred while executing the command: {e}",
|
|
114
|
+
"extra": {"exception_type": type(e).__name__, "exception": str(e)},
|
|
115
|
+
}
|
|
116
|
+
self._check_finished(output)
|
|
117
|
+
return output
|
|
118
|
+
|
|
119
|
+
def _check_finished(self, output: dict):
|
|
120
|
+
"""Raises Submitted if the output indicates task completion."""
|
|
121
|
+
lines = output.get("output", "").lstrip().splitlines(keepends=True)
|
|
122
|
+
if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0:
|
|
123
|
+
submission = "".join(lines[1:])
|
|
124
|
+
raise Submitted(
|
|
125
|
+
{
|
|
126
|
+
"role": "exit",
|
|
127
|
+
"content": submission,
|
|
128
|
+
"extra": {"exit_status": "Submitted", "submission": submission},
|
|
129
|
+
}
|
|
130
|
+
)
|
|
102
131
|
|
|
103
132
|
def cleanup(self):
|
|
104
133
|
if self.working_dir.exists():
|
|
@@ -108,5 +137,15 @@ class BubblewrapEnvironment:
|
|
|
108
137
|
"""Cleanup working_dir when object is destroyed."""
|
|
109
138
|
self.cleanup()
|
|
110
139
|
|
|
111
|
-
def get_template_vars(self) -> dict[str, Any]:
|
|
112
|
-
return
|
|
140
|
+
def get_template_vars(self, **kwargs) -> dict[str, Any]:
|
|
141
|
+
return recursive_merge(self.config.model_dump(), platform.uname()._asdict(), kwargs)
|
|
142
|
+
|
|
143
|
+
def serialize(self) -> dict:
|
|
144
|
+
return {
|
|
145
|
+
"info": {
|
|
146
|
+
"config": {
|
|
147
|
+
"environment": self.config.model_dump(mode="json"),
|
|
148
|
+
"environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from dataclasses import asdict, dataclass, field
|
|
3
2
|
from typing import Any
|
|
4
3
|
|
|
4
|
+
from pydantic import BaseModel
|
|
5
5
|
from swerex.deployment.docker import DockerDeployment
|
|
6
6
|
from swerex.runtime.abstract import Command as RexCommand
|
|
7
7
|
|
|
8
|
+
from minisweagent.exceptions import Submitted
|
|
9
|
+
from minisweagent.utils.serialize import recursive_merge
|
|
8
10
|
|
|
9
|
-
|
|
10
|
-
class SwerexDockerEnvironmentConfig:
|
|
11
|
+
|
|
12
|
+
class SwerexDockerEnvironmentConfig(BaseModel):
|
|
11
13
|
image: str
|
|
12
14
|
cwd: str = "/"
|
|
13
15
|
"""Working directory in which to execute commands."""
|
|
14
16
|
timeout: int = 30
|
|
15
17
|
"""Timeout for executing commands in the container."""
|
|
16
|
-
deployment_extra_kwargs: dict[str, Any] =
|
|
18
|
+
deployment_extra_kwargs: dict[str, Any] = {}
|
|
17
19
|
"""Extra kwargs to pass to DockerDeployment."""
|
|
18
20
|
|
|
19
21
|
|
|
@@ -24,24 +26,55 @@ class SwerexDockerEnvironment:
|
|
|
24
26
|
self.deployment = DockerDeployment(image=self.config.image, **self.config.deployment_extra_kwargs)
|
|
25
27
|
asyncio.run(self.deployment.start())
|
|
26
28
|
|
|
27
|
-
def execute(self,
|
|
29
|
+
def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
|
|
28
30
|
"""Execute a command in the environment and return the raw output."""
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
31
|
+
command = action.get("command", "")
|
|
32
|
+
try:
|
|
33
|
+
result = asyncio.run(
|
|
34
|
+
self.deployment.runtime.execute(
|
|
35
|
+
RexCommand(
|
|
36
|
+
command=command,
|
|
37
|
+
shell=True,
|
|
38
|
+
check=False,
|
|
39
|
+
cwd=cwd or self.config.cwd,
|
|
40
|
+
timeout=timeout or self.config.timeout,
|
|
41
|
+
merge_output_streams=True,
|
|
42
|
+
)
|
|
38
43
|
)
|
|
39
44
|
)
|
|
40
|
-
|
|
45
|
+
output = {"output": result.stdout, "returncode": result.exit_code, "exception_info": ""}
|
|
46
|
+
except Exception as e:
|
|
47
|
+
output = {
|
|
48
|
+
"output": str(e) if str(e) else "",
|
|
49
|
+
"returncode": -1,
|
|
50
|
+
"exception_info": f"An error occurred while executing the command: {e}",
|
|
51
|
+
"extra": {"exception_type": type(e).__name__, "exception": str(e)},
|
|
52
|
+
}
|
|
53
|
+
self._check_finished(output)
|
|
54
|
+
return output
|
|
55
|
+
|
|
56
|
+
def _check_finished(self, output: dict):
|
|
57
|
+
"""Raises Submitted if the output indicates task completion."""
|
|
58
|
+
lines = output.get("output", "").lstrip().splitlines(keepends=True)
|
|
59
|
+
if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0:
|
|
60
|
+
submission = "".join(lines[1:])
|
|
61
|
+
raise Submitted(
|
|
62
|
+
{
|
|
63
|
+
"role": "exit",
|
|
64
|
+
"content": submission,
|
|
65
|
+
"extra": {"exit_status": "Submitted", "submission": submission},
|
|
66
|
+
}
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def get_template_vars(self, **kwargs) -> dict[str, Any]:
|
|
70
|
+
return recursive_merge(self.config.model_dump(), kwargs)
|
|
71
|
+
|
|
72
|
+
def serialize(self) -> dict:
|
|
41
73
|
return {
|
|
42
|
-
"
|
|
43
|
-
|
|
74
|
+
"info": {
|
|
75
|
+
"config": {
|
|
76
|
+
"environment": self.config.model_dump(mode="json"),
|
|
77
|
+
"environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
|
|
78
|
+
}
|
|
79
|
+
}
|
|
44
80
|
}
|
|
45
|
-
|
|
46
|
-
def get_template_vars(self) -> dict[str, Any]:
|
|
47
|
-
return asdict(self.config)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from swerex.deployment.modal import ModalDeployment
|
|
6
|
+
from swerex.runtime.abstract import Command as RexCommand
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SwerexModalEnvironmentConfig(BaseModel):
|
|
10
|
+
image: str
|
|
11
|
+
"""Image to use for the deployment. Can be:
|
|
12
|
+
- Dockerhub image name (e.g. `python:3.11-slim`)
|
|
13
|
+
- ECR image name (e.g. `123456789012.dkr.ecr.us-east-1.amazonaws.com/my-image:tag`)
|
|
14
|
+
- Path to a Dockerfile
|
|
15
|
+
"""
|
|
16
|
+
cwd: str = "/"
|
|
17
|
+
"""Working directory in which to execute commands."""
|
|
18
|
+
timeout: int = 30
|
|
19
|
+
"""Timeout for executing commands in the container."""
|
|
20
|
+
env: dict[str, str] = {}
|
|
21
|
+
"""Environment variables to set when executing commands."""
|
|
22
|
+
startup_timeout: float = 60.0
|
|
23
|
+
"""The time to wait for the runtime to start."""
|
|
24
|
+
runtime_timeout: float = 3600.0
|
|
25
|
+
"""The runtime timeout (how long the Modal sandbox can stay alive)."""
|
|
26
|
+
deployment_timeout: float = 3600.0
|
|
27
|
+
"""The deployment timeout."""
|
|
28
|
+
install_pipx: bool = True
|
|
29
|
+
"""Whether to install pipx in the container (required for swe-rex runtime)."""
|
|
30
|
+
modal_sandbox_kwargs: dict[str, Any] = {}
|
|
31
|
+
"""Additional arguments to pass to `modal.Sandbox.create`."""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class SwerexModalEnvironment:
|
|
35
|
+
def __init__(self, **kwargs):
|
|
36
|
+
"""This class executes bash commands in a Modal sandbox using SWE-ReX for remote execution.
|
|
37
|
+
|
|
38
|
+
Modal (https://modal.com) provides serverless cloud compute that can be used to run
|
|
39
|
+
sandboxed environments. This environment class uses SWE-ReX's ModalDeployment to
|
|
40
|
+
create and manage Modal sandboxes for command execution.
|
|
41
|
+
|
|
42
|
+
This is useful for:
|
|
43
|
+
- Training coding agents at scale with remote execution
|
|
44
|
+
- Running evaluations in isolated cloud environments
|
|
45
|
+
- Parallel execution across many instances
|
|
46
|
+
|
|
47
|
+
See `SwerexModalEnvironmentConfig` for keyword arguments.
|
|
48
|
+
"""
|
|
49
|
+
self.config = SwerexModalEnvironmentConfig(**kwargs)
|
|
50
|
+
self.deployment = ModalDeployment(
|
|
51
|
+
image=self.config.image,
|
|
52
|
+
startup_timeout=self.config.startup_timeout,
|
|
53
|
+
runtime_timeout=self.config.runtime_timeout,
|
|
54
|
+
deployment_timeout=self.config.deployment_timeout,
|
|
55
|
+
install_pipx=self.config.install_pipx,
|
|
56
|
+
modal_sandbox_kwargs=self.config.modal_sandbox_kwargs,
|
|
57
|
+
)
|
|
58
|
+
asyncio.run(self.deployment.start())
|
|
59
|
+
|
|
60
|
+
def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
|
|
61
|
+
"""Execute a command in the environment and return the raw output."""
|
|
62
|
+
output = asyncio.run(
|
|
63
|
+
self.deployment.runtime.execute(
|
|
64
|
+
RexCommand(
|
|
65
|
+
command=command,
|
|
66
|
+
shell=True,
|
|
67
|
+
check=False,
|
|
68
|
+
cwd=cwd or self.config.cwd,
|
|
69
|
+
timeout=timeout or self.config.timeout,
|
|
70
|
+
merge_output_streams=True,
|
|
71
|
+
env=self.config.env if self.config.env else None,
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
)
|
|
75
|
+
return {
|
|
76
|
+
"output": output.stdout,
|
|
77
|
+
"returncode": output.exit_code,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
def get_template_vars(self) -> dict[str, Any]:
|
|
81
|
+
return self.config.model_dump()
|
|
82
|
+
|
|
83
|
+
def stop(self):
|
|
84
|
+
async def _stop():
|
|
85
|
+
await asyncio.wait_for(self.deployment.stop(), timeout=10)
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
asyncio.run(_stop())
|
|
89
|
+
except Exception:
|
|
90
|
+
pass
|