mini-swe-agent 1.17.4__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
  2. mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
  3. {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +1 -1
  4. mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
  5. minisweagent/__init__.py +19 -26
  6. minisweagent/agents/default.py +128 -113
  7. minisweagent/agents/interactive.py +119 -58
  8. minisweagent/config/README.md +3 -4
  9. minisweagent/config/__init__.py +36 -1
  10. minisweagent/config/benchmarks/swebench.yaml +156 -0
  11. minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
  12. minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
  13. minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
  14. minisweagent/config/default.yaml +24 -21
  15. minisweagent/config/inspector.tcss +42 -0
  16. minisweagent/config/mini.yaml +53 -71
  17. minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
  18. minisweagent/environments/__init__.py +1 -0
  19. minisweagent/environments/docker.py +67 -20
  20. minisweagent/environments/extra/bubblewrap.py +86 -47
  21. minisweagent/environments/extra/swerex_docker.py +53 -20
  22. minisweagent/environments/extra/swerex_modal.py +90 -0
  23. minisweagent/environments/local.py +62 -21
  24. minisweagent/environments/singularity.py +59 -18
  25. minisweagent/exceptions.py +22 -0
  26. minisweagent/models/__init__.py +6 -7
  27. minisweagent/models/extra/roulette.py +20 -17
  28. minisweagent/models/litellm_model.py +90 -44
  29. minisweagent/models/litellm_response_model.py +80 -0
  30. minisweagent/models/litellm_textbased_model.py +45 -0
  31. minisweagent/models/openrouter_model.py +87 -45
  32. minisweagent/models/openrouter_response_model.py +123 -0
  33. minisweagent/models/openrouter_textbased_model.py +76 -0
  34. minisweagent/models/portkey_model.py +84 -42
  35. minisweagent/models/portkey_response_model.py +163 -0
  36. minisweagent/models/requesty_model.py +91 -41
  37. minisweagent/models/test_models.py +246 -19
  38. minisweagent/models/utils/actions_text.py +60 -0
  39. minisweagent/models/utils/actions_toolcall.py +102 -0
  40. minisweagent/models/utils/actions_toolcall_response.py +110 -0
  41. minisweagent/models/utils/anthropic_utils.py +28 -0
  42. minisweagent/models/utils/cache_control.py +15 -2
  43. minisweagent/models/utils/content_string.py +74 -0
  44. minisweagent/models/utils/openai_multimodal.py +50 -0
  45. minisweagent/models/utils/retry.py +25 -0
  46. minisweagent/run/benchmarks/__init__.py +1 -0
  47. minisweagent/run/{extra → benchmarks}/swebench.py +57 -36
  48. minisweagent/run/benchmarks/swebench_single.py +89 -0
  49. minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
  50. minisweagent/run/hello_world.py +6 -0
  51. minisweagent/run/mini.py +54 -63
  52. minisweagent/run/utilities/__init__.py +1 -0
  53. minisweagent/run/{extra → utilities}/config.py +2 -0
  54. minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
  55. minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
  56. minisweagent/utils/serialize.py +26 -0
  57. mini_swe_agent-1.17.4.dist-info/RECORD +0 -61
  58. mini_swe_agent-1.17.4.dist-info/entry_points.txt +0 -5
  59. minisweagent/agents/interactive_textual.py +0 -450
  60. minisweagent/config/extra/swebench_roulette.yaml +0 -233
  61. minisweagent/config/mini.tcss +0 -86
  62. minisweagent/models/anthropic.py +0 -35
  63. minisweagent/models/litellm_response_api_model.py +0 -82
  64. minisweagent/models/portkey_response_api_model.py +0 -75
  65. minisweagent/models/utils/key_per_thread.py +0 -20
  66. minisweagent/models/utils/openai_utils.py +0 -41
  67. minisweagent/run/extra/swebench_single.py +0 -79
  68. minisweagent/run/github_issue.py +0 -87
  69. minisweagent/run/utils/__init__.py +0 -0
  70. minisweagent/run/utils/save.py +0 -78
  71. {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
  72. {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
  73. /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
  74. /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
@@ -9,25 +9,28 @@ agent:
9
9
  <format_example>
10
10
  Your reasoning and analysis here. Explain why you want to perform the action.
11
11
 
12
- ```bash
12
+ ```mswea_bash_command
13
13
  your_command_here
14
14
  ```
15
15
  </format_example>
16
16
 
17
17
  Failure to follow these rules will cause your response to be rejected.
18
- To finish, issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
19
- without any other command.
20
18
  instance_template: |
21
19
  Please solve this issue: {{task}}
22
20
 
23
21
  You can execute bash commands and edit files to implement the necessary changes.
24
22
 
25
23
  ## Recommended Workflow
24
+
25
+ This workflows should be done step-by-step so that you can iterate on your changes and any possible problems.
26
+
26
27
  1. Analyze the codebase by finding and reading relevant files
27
28
  2. Create a script to reproduce the issue
28
29
  3. Edit the source code to resolve the issue
29
30
  4. Verify your fix works by running your script again
30
31
  5. Test edge cases to ensure your fix is robust
32
+ 6. Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
33
+ Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
31
34
 
32
35
  ## Important Rules
33
36
 
@@ -35,8 +38,10 @@ agent:
35
38
  2. The action must be enclosed in triple backticks
36
39
  3. Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
37
40
  However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
38
- 4. To finish, issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
39
- Do not combine it with any other command.
41
+
42
+ <system_information>
43
+ {{system}} {{release}} {{version}} {{machine}}
44
+ </system_information>
40
45
 
41
46
  ## Formatting your response
42
47
 
@@ -45,7 +50,7 @@ agent:
45
50
  <example_response>
46
51
  THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
47
52
 
48
- ```bash
53
+ ```mswea_bash_command
49
54
  ls -la
50
55
  ```
51
56
  </example_response>
@@ -54,7 +59,7 @@ agent:
54
59
 
55
60
  ### Create a new file:
56
61
 
57
- ```bash
62
+ ```mswea_bash_command
58
63
  cat <<'EOF' > newfile.py
59
64
  import numpy as np
60
65
  hello = "world"
@@ -64,7 +69,13 @@ agent:
64
69
 
65
70
  ### Edit files with sed:
66
71
 
67
- ```bash
72
+ {%- if system == "Darwin" -%}
73
+ <important>
74
+ You are on MacOS. For all the below examples, you need to use `sed -i ''` instead of `sed -i`.
75
+ </important>
76
+ {%- endif -%}
77
+
78
+ ```mswea_bash_command
68
79
  # Replace all occurrences
69
80
  sed -i 's/old_string/new_string/g' filename.py
70
81
 
@@ -80,17 +91,31 @@ agent:
80
91
 
81
92
  ### View file content:
82
93
 
83
- ```bash
94
+ ```mswea_bash_command
84
95
  # View specific lines with numbers
85
96
  nl -ba filename.py | sed -n '10,20p'
86
97
  ```
87
98
 
88
99
  ### Any other command you want to run
89
100
 
90
- ```bash
101
+ ```mswea_bash_command
91
102
  anything
92
103
  ```
93
- action_observation_template: |
104
+ step_limit: 0
105
+ cost_limit: 3.
106
+ mode: confirm
107
+ environment:
108
+ env:
109
+ PAGER: cat
110
+ MANPAGER: cat
111
+ LESS: -R
112
+ PIP_PROGRESS_BAR: 'off'
113
+ TQDM_DISABLE: '1'
114
+ model:
115
+ observation_template: |
116
+ {% if output.exception_info -%}
117
+ <exception>{{output.exception_info}}</exception>
118
+ {% endif -%}
94
119
  <returncode>{{output.returncode}}</returncode>
95
120
  {% if output.output | length < 10000 -%}
96
121
  <output>
@@ -115,31 +140,20 @@ agent:
115
140
  {{ output.output[-5000:] }}
116
141
  </output_tail>
117
142
  {%- endif -%}
143
+ model_kwargs:
144
+ drop_params: true
118
145
  format_error_template: |
119
146
  Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
120
- If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
121
- without any other command.
122
- Else, please format your response exactly as follows:
147
+
148
+ Please format your action in triple backticks as shown in <response_example>.
123
149
 
124
150
  <response_example>
125
151
  Here are some thoughts about why you want to perform the action.
126
152
 
127
- ```bash
153
+ ```mswea_bash_command
128
154
  <action>
129
155
  ```
130
156
  </response_example>
131
- step_limit: 0.
132
- cost_limit: 0.
133
157
 
134
- environment:
135
- image: "python:3.11"
136
- cwd: "/testbed"
137
- env:
138
- PAGER: cat
139
- MANPAGER: cat
140
- LESS: -R
141
- PIP_PROGRESS_BAR: 'off'
142
- TQDM_DISABLE: '1'
143
- model:
144
- model_kwargs:
145
- drop_params: true
158
+ If you have completed your assignment, please consult the first message about how to
159
+ submit your solution (you will not be able to continue working on this task after that).
@@ -10,6 +10,7 @@ _ENVIRONMENT_MAPPING = {
10
10
  "singularity": "minisweagent.environments.singularity.SingularityEnvironment",
11
11
  "local": "minisweagent.environments.local.LocalEnvironment",
12
12
  "swerex_docker": "minisweagent.environments.extra.swerex_docker.SwerexDockerEnvironment",
13
+ "swerex_modal": "minisweagent.environments.extra.swerex_modal.SwerexModalEnvironment",
13
14
  "bubblewrap": "minisweagent.environments.extra.bubblewrap.BubblewrapEnvironment",
14
15
  }
15
16
 
@@ -1,20 +1,24 @@
1
1
  import logging
2
2
  import os
3
+ import platform
3
4
  import shlex
4
5
  import subprocess
5
6
  import uuid
6
- from dataclasses import asdict, dataclass, field
7
7
  from typing import Any
8
8
 
9
+ from pydantic import BaseModel
9
10
 
10
- @dataclass
11
- class DockerEnvironmentConfig:
11
+ from minisweagent.exceptions import Submitted
12
+ from minisweagent.utils.serialize import recursive_merge
13
+
14
+
15
+ class DockerEnvironmentConfig(BaseModel):
12
16
  image: str
13
17
  cwd: str = "/"
14
18
  """Working directory in which to execute commands."""
15
- env: dict[str, str] = field(default_factory=dict)
19
+ env: dict[str, str] = {}
16
20
  """Environment variables to set in the container."""
17
- forward_env: list[str] = field(default_factory=list)
21
+ forward_env: list[str] = []
18
22
  """Environment variables to forward to the container.
19
23
  Variables are only forwarded if they are set in the host environment.
20
24
  In case of conflict with `env`, the `env` variables take precedence.
@@ -23,7 +27,7 @@ class DockerEnvironmentConfig:
23
27
  """Timeout for executing commands in the container."""
24
28
  executable: str = os.getenv("MSWEA_DOCKER_EXECUTABLE", "docker")
25
29
  """Path to the docker/container executable."""
26
- run_args: list[str] = field(default_factory=lambda: ["--rm"])
30
+ run_args: list[str] = ["--rm"]
27
31
  """Additional arguments to pass to the docker/container executable.
28
32
  Default is ["--rm"], which removes the container after it exits.
29
33
  """
@@ -31,6 +35,11 @@ class DockerEnvironmentConfig:
31
35
  """Max duration to keep container running. Uses the same format as the sleep command."""
32
36
  pull_timeout: int = 120
33
37
  """Timeout in seconds for pulling images."""
38
+ interpreter: list[str] = ["bash", "-lc"]
39
+ """Interpreter to use to execute commands. Default is ["bash", "-lc"].
40
+ The actual command will be appended as argument to this. Override this to e.g., modify shell flags
41
+ (e.g., to remove the `-l` flag to disable login shell) or to use python instead of bash to interpret commands.
42
+ """
34
43
 
35
44
 
36
45
  class DockerEnvironment:
@@ -49,8 +58,18 @@ class DockerEnvironment:
49
58
  self.config = config_class(**kwargs)
50
59
  self._start_container()
51
60
 
52
- def get_template_vars(self) -> dict[str, Any]:
53
- return asdict(self.config)
61
+ def get_template_vars(self, **kwargs) -> dict[str, Any]:
62
+ return recursive_merge(self.config.model_dump(), platform.uname()._asdict(), kwargs)
63
+
64
+ def serialize(self) -> dict:
65
+ return {
66
+ "info": {
67
+ "config": {
68
+ "environment": self.config.model_dump(mode="json"),
69
+ "environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
70
+ }
71
+ }
72
+ }
54
73
 
55
74
  def _start_container(self):
56
75
  """Start the Docker container and return the container ID."""
@@ -79,8 +98,9 @@ class DockerEnvironment:
79
98
  self.logger.info(f"Started container {container_name} with ID {result.stdout.strip()}")
80
99
  self.container_id = result.stdout.strip()
81
100
 
82
- def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
101
+ def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
83
102
  """Execute a command in the Docker container and return the result as a dict."""
103
+ command = action.get("command", "")
84
104
  cwd = cwd or self.config.cwd
85
105
  assert self.container_id, "Container not started"
86
106
 
@@ -90,18 +110,45 @@ class DockerEnvironment:
90
110
  cmd.extend(["-e", f"{key}={value}"])
91
111
  for key, value in self.config.env.items():
92
112
  cmd.extend(["-e", f"{key}={value}"])
93
- cmd.extend([self.container_id, "bash", "-lc", command])
113
+ cmd.extend([self.container_id, *self.config.interpreter, command])
94
114
 
95
- result = subprocess.run(
96
- cmd,
97
- text=True,
98
- timeout=timeout or self.config.timeout,
99
- encoding="utf-8",
100
- errors="replace",
101
- stdout=subprocess.PIPE,
102
- stderr=subprocess.STDOUT,
103
- )
104
- return {"output": result.stdout, "returncode": result.returncode}
115
+ try:
116
+ result = subprocess.run(
117
+ cmd,
118
+ text=True,
119
+ timeout=timeout or self.config.timeout,
120
+ encoding="utf-8",
121
+ errors="replace",
122
+ stdout=subprocess.PIPE,
123
+ stderr=subprocess.STDOUT,
124
+ )
125
+ output = {"output": result.stdout, "returncode": result.returncode, "exception_info": ""}
126
+ except Exception as e:
127
+ raw_output = getattr(e, "output", None)
128
+ raw_output = (
129
+ raw_output.decode("utf-8", errors="replace") if isinstance(raw_output, bytes) else (raw_output or "")
130
+ )
131
+ output = {
132
+ "output": raw_output,
133
+ "returncode": -1,
134
+ "exception_info": f"An error occurred while executing the command: {e}",
135
+ "extra": {"exception_type": type(e).__name__, "exception": str(e)},
136
+ }
137
+ self._check_finished(output)
138
+ return output
139
+
140
+ def _check_finished(self, output: dict):
141
+ """Raises Submitted if the output indicates task completion."""
142
+ lines = output.get("output", "").lstrip().splitlines(keepends=True)
143
+ if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0:
144
+ submission = "".join(lines[1:])
145
+ raise Submitted(
146
+ {
147
+ "role": "exit",
148
+ "content": submission,
149
+ "extra": {"exit_status": "Submitted", "submission": submission},
150
+ }
151
+ )
105
152
 
106
153
  def cleanup(self):
107
154
  """Stop and remove the Docker container."""
@@ -17,51 +17,52 @@ import shutil
17
17
  import subprocess
18
18
  import tempfile
19
19
  import uuid
20
- from dataclasses import asdict, dataclass, field
21
20
  from pathlib import Path
22
21
  from typing import Any
23
22
 
23
+ from pydantic import BaseModel
24
24
 
25
- @dataclass
26
- class BubblewrapEnvironmentConfig:
25
+ from minisweagent.exceptions import Submitted
26
+ from minisweagent.utils.serialize import recursive_merge
27
+
28
+
29
+ class BubblewrapEnvironmentConfig(BaseModel):
27
30
  cwd: str = ""
28
31
  """Working directory for the sandbox."""
29
- env: dict[str, str] = field(default_factory=dict)
32
+ env: dict[str, str] = {}
30
33
  """Dictionary of environment variables to set in the sandbox."""
31
34
  timeout: int = 30
32
35
  """Timeout for the command in seconds."""
33
36
  executable: str = os.getenv("MSWEA_BUBBLEWRAP_EXECUTABLE", "bwrap")
34
37
  """Path to the bubblewrap executable."""
35
- wrapper_args: list[str] = field(
36
- default_factory=lambda: [
37
- "--unshare-user-try",
38
- "--ro-bind",
39
- "/usr",
40
- "/usr",
41
- "--ro-bind",
42
- "/bin",
43
- "/bin",
44
- "--ro-bind",
45
- "/lib",
46
- "/lib",
47
- "--ro-bind",
48
- "/lib64",
49
- "/lib64",
50
- "--ro-bind",
51
- "/etc",
52
- "/etc",
53
- "--tmpfs",
54
- "/tmp",
55
- "--proc",
56
- "/proc",
57
- "--dev",
58
- "/dev",
59
- "--new-session",
60
- "--setenv",
61
- "PATH",
62
- "/usr/local/bin:/usr/sbin:/usr/bin:/bin",
63
- ]
64
- )
38
+ wrapper_args: list[str] = [
39
+ "--unshare-user-try",
40
+ "--ro-bind",
41
+ "/usr",
42
+ "/usr",
43
+ "--ro-bind",
44
+ "/bin",
45
+ "/bin",
46
+ "--ro-bind",
47
+ "/lib",
48
+ "/lib",
49
+ "--ro-bind",
50
+ "/lib64",
51
+ "/lib64",
52
+ "--ro-bind",
53
+ "/etc",
54
+ "/etc",
55
+ "--tmpfs",
56
+ "/tmp",
57
+ "--proc",
58
+ "/proc",
59
+ "--dev",
60
+ "/dev",
61
+ "--new-session",
62
+ "--setenv",
63
+ "PATH",
64
+ "/usr/local/bin:/usr/sbin:/usr/bin:/bin",
65
+ ]
65
66
  """Arguments to pass to the bubblewrap executable."""
66
67
 
67
68
 
@@ -77,8 +78,9 @@ class BubblewrapEnvironment:
77
78
  self.working_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
78
79
  self.working_dir.mkdir(parents=True)
79
80
 
80
- def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
81
+ def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
81
82
  """Execute a command in the bubblewrap environment and return the result as a dict."""
83
+ command = action.get("command", "")
82
84
  cwd = cwd or self.config.cwd or str(self.working_dir)
83
85
 
84
86
  cmd = [self.config.executable] + self.config.wrapper_args + ["--bind", cwd, cwd, "--chdir", cwd]
@@ -89,16 +91,43 @@ class BubblewrapEnvironment:
89
91
 
90
92
  cmd.extend(["bash", "-c", command])
91
93
 
92
- result = subprocess.run(
93
- cmd,
94
- text=True,
95
- timeout=timeout or self.config.timeout,
96
- encoding="utf-8",
97
- errors="replace",
98
- stdout=subprocess.PIPE,
99
- stderr=subprocess.STDOUT,
100
- )
101
- return {"output": result.stdout, "returncode": result.returncode}
94
+ try:
95
+ result = subprocess.run(
96
+ cmd,
97
+ text=True,
98
+ timeout=timeout or self.config.timeout,
99
+ encoding="utf-8",
100
+ errors="replace",
101
+ stdout=subprocess.PIPE,
102
+ stderr=subprocess.STDOUT,
103
+ )
104
+ output = {"output": result.stdout, "returncode": result.returncode, "exception_info": ""}
105
+ except Exception as e:
106
+ raw_output = getattr(e, "output", None)
107
+ raw_output = (
108
+ raw_output.decode("utf-8", errors="replace") if isinstance(raw_output, bytes) else (raw_output or "")
109
+ )
110
+ output = {
111
+ "output": raw_output,
112
+ "returncode": -1,
113
+ "exception_info": f"An error occurred while executing the command: {e}",
114
+ "extra": {"exception_type": type(e).__name__, "exception": str(e)},
115
+ }
116
+ self._check_finished(output)
117
+ return output
118
+
119
+ def _check_finished(self, output: dict):
120
+ """Raises Submitted if the output indicates task completion."""
121
+ lines = output.get("output", "").lstrip().splitlines(keepends=True)
122
+ if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0:
123
+ submission = "".join(lines[1:])
124
+ raise Submitted(
125
+ {
126
+ "role": "exit",
127
+ "content": submission,
128
+ "extra": {"exit_status": "Submitted", "submission": submission},
129
+ }
130
+ )
102
131
 
103
132
  def cleanup(self):
104
133
  if self.working_dir.exists():
@@ -108,5 +137,15 @@ class BubblewrapEnvironment:
108
137
  """Cleanup working_dir when object is destroyed."""
109
138
  self.cleanup()
110
139
 
111
- def get_template_vars(self) -> dict[str, Any]:
112
- return asdict(self.config) | platform.uname()._asdict()
140
+ def get_template_vars(self, **kwargs) -> dict[str, Any]:
141
+ return recursive_merge(self.config.model_dump(), platform.uname()._asdict(), kwargs)
142
+
143
+ def serialize(self) -> dict:
144
+ return {
145
+ "info": {
146
+ "config": {
147
+ "environment": self.config.model_dump(mode="json"),
148
+ "environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
149
+ }
150
+ }
151
+ }
@@ -1,19 +1,21 @@
1
1
  import asyncio
2
- from dataclasses import asdict, dataclass, field
3
2
  from typing import Any
4
3
 
4
+ from pydantic import BaseModel
5
5
  from swerex.deployment.docker import DockerDeployment
6
6
  from swerex.runtime.abstract import Command as RexCommand
7
7
 
8
+ from minisweagent.exceptions import Submitted
9
+ from minisweagent.utils.serialize import recursive_merge
8
10
 
9
- @dataclass
10
- class SwerexDockerEnvironmentConfig:
11
+
12
+ class SwerexDockerEnvironmentConfig(BaseModel):
11
13
  image: str
12
14
  cwd: str = "/"
13
15
  """Working directory in which to execute commands."""
14
16
  timeout: int = 30
15
17
  """Timeout for executing commands in the container."""
16
- deployment_extra_kwargs: dict[str, Any] = field(default_factory=dict)
18
+ deployment_extra_kwargs: dict[str, Any] = {}
17
19
  """Extra kwargs to pass to DockerDeployment."""
18
20
 
19
21
 
@@ -24,24 +26,55 @@ class SwerexDockerEnvironment:
24
26
  self.deployment = DockerDeployment(image=self.config.image, **self.config.deployment_extra_kwargs)
25
27
  asyncio.run(self.deployment.start())
26
28
 
27
- def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
29
+ def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
28
30
  """Execute a command in the environment and return the raw output."""
29
- output = asyncio.run(
30
- self.deployment.runtime.execute(
31
- RexCommand(
32
- command=command,
33
- shell=True,
34
- check=False,
35
- cwd=cwd or self.config.cwd,
36
- timeout=timeout or self.config.timeout,
37
- merge_output_streams=True,
31
+ command = action.get("command", "")
32
+ try:
33
+ result = asyncio.run(
34
+ self.deployment.runtime.execute(
35
+ RexCommand(
36
+ command=command,
37
+ shell=True,
38
+ check=False,
39
+ cwd=cwd or self.config.cwd,
40
+ timeout=timeout or self.config.timeout,
41
+ merge_output_streams=True,
42
+ )
38
43
  )
39
44
  )
40
- )
45
+ output = {"output": result.stdout, "returncode": result.exit_code, "exception_info": ""}
46
+ except Exception as e:
47
+ output = {
48
+ "output": str(e) if str(e) else "",
49
+ "returncode": -1,
50
+ "exception_info": f"An error occurred while executing the command: {e}",
51
+ "extra": {"exception_type": type(e).__name__, "exception": str(e)},
52
+ }
53
+ self._check_finished(output)
54
+ return output
55
+
56
+ def _check_finished(self, output: dict):
57
+ """Raises Submitted if the output indicates task completion."""
58
+ lines = output.get("output", "").lstrip().splitlines(keepends=True)
59
+ if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0:
60
+ submission = "".join(lines[1:])
61
+ raise Submitted(
62
+ {
63
+ "role": "exit",
64
+ "content": submission,
65
+ "extra": {"exit_status": "Submitted", "submission": submission},
66
+ }
67
+ )
68
+
69
+ def get_template_vars(self, **kwargs) -> dict[str, Any]:
70
+ return recursive_merge(self.config.model_dump(), kwargs)
71
+
72
+ def serialize(self) -> dict:
41
73
  return {
42
- "output": output.stdout,
43
- "returncode": output.exit_code,
74
+ "info": {
75
+ "config": {
76
+ "environment": self.config.model_dump(mode="json"),
77
+ "environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
78
+ }
79
+ }
44
80
  }
45
-
46
- def get_template_vars(self) -> dict[str, Any]:
47
- return asdict(self.config)
@@ -0,0 +1,90 @@
1
+ import asyncio
2
+ from typing import Any
3
+
4
+ from pydantic import BaseModel
5
+ from swerex.deployment.modal import ModalDeployment
6
+ from swerex.runtime.abstract import Command as RexCommand
7
+
8
+
9
+ class SwerexModalEnvironmentConfig(BaseModel):
10
+ image: str
11
+ """Image to use for the deployment. Can be:
12
+ - Dockerhub image name (e.g. `python:3.11-slim`)
13
+ - ECR image name (e.g. `123456789012.dkr.ecr.us-east-1.amazonaws.com/my-image:tag`)
14
+ - Path to a Dockerfile
15
+ """
16
+ cwd: str = "/"
17
+ """Working directory in which to execute commands."""
18
+ timeout: int = 30
19
+ """Timeout for executing commands in the container."""
20
+ env: dict[str, str] = {}
21
+ """Environment variables to set when executing commands."""
22
+ startup_timeout: float = 60.0
23
+ """The time to wait for the runtime to start."""
24
+ runtime_timeout: float = 3600.0
25
+ """The runtime timeout (how long the Modal sandbox can stay alive)."""
26
+ deployment_timeout: float = 3600.0
27
+ """The deployment timeout."""
28
+ install_pipx: bool = True
29
+ """Whether to install pipx in the container (required for swe-rex runtime)."""
30
+ modal_sandbox_kwargs: dict[str, Any] = {}
31
+ """Additional arguments to pass to `modal.Sandbox.create`."""
32
+
33
+
34
+ class SwerexModalEnvironment:
35
+ def __init__(self, **kwargs):
36
+ """This class executes bash commands in a Modal sandbox using SWE-ReX for remote execution.
37
+
38
+ Modal (https://modal.com) provides serverless cloud compute that can be used to run
39
+ sandboxed environments. This environment class uses SWE-ReX's ModalDeployment to
40
+ create and manage Modal sandboxes for command execution.
41
+
42
+ This is useful for:
43
+ - Training coding agents at scale with remote execution
44
+ - Running evaluations in isolated cloud environments
45
+ - Parallel execution across many instances
46
+
47
+ See `SwerexModalEnvironmentConfig` for keyword arguments.
48
+ """
49
+ self.config = SwerexModalEnvironmentConfig(**kwargs)
50
+ self.deployment = ModalDeployment(
51
+ image=self.config.image,
52
+ startup_timeout=self.config.startup_timeout,
53
+ runtime_timeout=self.config.runtime_timeout,
54
+ deployment_timeout=self.config.deployment_timeout,
55
+ install_pipx=self.config.install_pipx,
56
+ modal_sandbox_kwargs=self.config.modal_sandbox_kwargs,
57
+ )
58
+ asyncio.run(self.deployment.start())
59
+
60
+ def execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
61
+ """Execute a command in the environment and return the raw output."""
62
+ output = asyncio.run(
63
+ self.deployment.runtime.execute(
64
+ RexCommand(
65
+ command=command,
66
+ shell=True,
67
+ check=False,
68
+ cwd=cwd or self.config.cwd,
69
+ timeout=timeout or self.config.timeout,
70
+ merge_output_streams=True,
71
+ env=self.config.env if self.config.env else None,
72
+ )
73
+ )
74
+ )
75
+ return {
76
+ "output": output.stdout,
77
+ "returncode": output.exit_code,
78
+ }
79
+
80
+ def get_template_vars(self) -> dict[str, Any]:
81
+ return self.config.model_dump()
82
+
83
+ def stop(self):
84
+ async def _stop():
85
+ await asyncio.wait_for(self.deployment.stop(), timeout=10)
86
+
87
+ try:
88
+ asyncio.run(_stop())
89
+ except Exception:
90
+ pass