mini-swe-agent 1.9.0__tar.gz → 1.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {mini_swe_agent-1.9.0/src/mini_swe_agent.egg-info → mini_swe_agent-1.10.0}/PKG-INFO +3 -2
  2. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/README.md +1 -1
  3. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/pyproject.toml +1 -0
  4. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0/src/mini_swe_agent.egg-info}/PKG-INFO +3 -2
  5. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/SOURCES.txt +4 -0
  6. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/requires.txt +1 -0
  7. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/__init__.py +14 -2
  8. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/agents/interactive.py +2 -2
  9. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/agents/interactive_textual.py +8 -4
  10. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/extra/swebench.yaml +1 -1
  11. mini_swe_agent-1.10.0/src/minisweagent/config/extra/swebench_roulette.yaml +233 -0
  12. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/__init__.py +1 -0
  13. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/docker.py +7 -7
  14. mini_swe_agent-1.10.0/src/minisweagent/environments/extra/bubblewrap.py +112 -0
  15. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/singularity.py +31 -13
  16. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/__init__.py +36 -10
  17. mini_swe_agent-1.10.0/src/minisweagent/models/extra/roulette.py +62 -0
  18. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/litellm_model.py +9 -1
  19. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/extra/swebench.py +6 -3
  20. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/extra/swebench_single.py +10 -2
  21. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/github_issue.py +1 -1
  22. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/mini.py +14 -17
  23. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/utils/save.py +22 -0
  24. mini_swe_agent-1.10.0/src/minisweagent/utils/__init__.py +0 -0
  25. mini_swe_agent-1.10.0/src/minisweagent/utils/log.py +36 -0
  26. mini_swe_agent-1.9.0/src/minisweagent/utils/log.py +0 -32
  27. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/LICENSE.md +0 -0
  28. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/setup.cfg +0 -0
  29. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/dependency_links.txt +0 -0
  30. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/entry_points.txt +0 -0
  31. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/mini_swe_agent.egg-info/top_level.txt +0 -0
  32. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/__main__.py +0 -0
  33. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/agents/__init__.py +0 -0
  34. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/agents/default.py +0 -0
  35. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/README.md +0 -0
  36. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/__init__.py +0 -0
  37. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/default.yaml +0 -0
  38. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/extra/__init__.py +0 -0
  39. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/github_issue.yaml +0 -0
  40. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/mini.tcss +0 -0
  41. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/mini.yaml +0 -0
  42. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/config/mini_no_temp.yaml +0 -0
  43. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/extra/__init__.py +0 -0
  44. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/extra/swerex_docker.py +0 -0
  45. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/environments/local.py +0 -0
  46. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/anthropic.py +0 -0
  47. {mini_swe_agent-1.9.0/src/minisweagent/models/utils → mini_swe_agent-1.10.0/src/minisweagent/models/extra}/__init__.py +0 -0
  48. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/test_models.py +0 -0
  49. {mini_swe_agent-1.9.0/src/minisweagent/run/extra → mini_swe_agent-1.10.0/src/minisweagent/models/utils}/__init__.py +0 -0
  50. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/utils/cache_control.py +0 -0
  51. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/models/utils/key_per_thread.py +0 -0
  52. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/py.typed +0 -0
  53. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/__init__.py +0 -0
  54. {mini_swe_agent-1.9.0/src/minisweagent/run/extra/utils → mini_swe_agent-1.10.0/src/minisweagent/run/extra}/__init__.py +0 -0
  55. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/extra/config.py +0 -0
  56. {mini_swe_agent-1.9.0/src/minisweagent/run → mini_swe_agent-1.10.0/src/minisweagent/run/extra}/utils/__init__.py +0 -0
  57. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/extra/utils/batch_progress.py +0 -0
  58. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/hello_world.py +0 -0
  59. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/inspector.py +0 -0
  60. {mini_swe_agent-1.9.0 → mini_swe_agent-1.10.0}/src/minisweagent/run/mini_extra.py +0 -0
  61. {mini_swe_agent-1.9.0/src/minisweagent → mini_swe_agent-1.10.0/src/minisweagent/run}/utils/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.9.0
3
+ Version: 1.10.0
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -48,6 +48,7 @@ Requires-Dist: typer
48
48
  Requires-Dist: platformdirs
49
49
  Requires-Dist: textual
50
50
  Requires-Dist: prompt_toolkit
51
+ Requires-Dist: openai!=1.100.0,!=1.100.1
51
52
  Provides-Extra: full
52
53
  Requires-Dist: mini-swe-agent[dev]; extra == "full"
53
54
  Requires-Dist: swe-rex>=1.4.0; extra == "full"
@@ -72,7 +73,7 @@ Dynamic: license-file
72
73
 
73
74
  # The 100 line AI agent that solves GitHub issues & more
74
75
 
75
- 📣 [Run `mini` with GPT-5 and friends: Read here](https://mini-swe-agent.com/latest/quickstart/#gpt-5)
76
+ 📣 [New blogpost: Randomly switching between GPT-5 and Sonnet 4 boosts performance](https://www.swebench.com/SWE-bench/blog/2025/08/19/mini-roulette/)
76
77
 
77
78
  [![Docs](https://img.shields.io/badge/Docs-green?style=for-the-badge&logo=materialformkdocs&logoColor=white)](https://mini-swe-agent.com/latest/)
78
79
  [![Slack](https://img.shields.io/badge/Slack-4A154B?style=for-the-badge&logo=slack&logoColor=white)](https://join.slack.com/t/swe-bench/shared_invite/zt-36pj9bu5s-o3_yXPZbaH2wVnxnss1EkQ)
@@ -4,7 +4,7 @@
4
4
 
5
5
  # The 100 line AI agent that solves GitHub issues & more
6
6
 
7
- 📣 [Run `mini` with GPT-5 and friends: Read here](https://mini-swe-agent.com/latest/quickstart/#gpt-5)
7
+ 📣 [New blogpost: Randomly switching between GPT-5 and Sonnet 4 boosts performance](https://www.swebench.com/SWE-bench/blog/2025/08/19/mini-roulette/)
8
8
 
9
9
  [![Docs](https://img.shields.io/badge/Docs-green?style=for-the-badge&logo=materialformkdocs&logoColor=white)](https://mini-swe-agent.com/latest/)
10
10
  [![Slack](https://img.shields.io/badge/Slack-4A154B?style=for-the-badge&logo=slack&logoColor=white)](https://join.slack.com/t/swe-bench/shared_invite/zt-36pj9bu5s-o3_yXPZbaH2wVnxnss1EkQ)
@@ -42,6 +42,7 @@ dependencies = [
42
42
  "platformdirs",
43
43
  "textual",
44
44
  "prompt_toolkit",
45
+ "openai != 1.100.0,!=1.100.1", # https://github.com/SWE-agent/mini-swe-agent/issues/446
45
46
  ]
46
47
 
47
48
  [project.optional-dependencies]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.9.0
3
+ Version: 1.10.0
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -48,6 +48,7 @@ Requires-Dist: typer
48
48
  Requires-Dist: platformdirs
49
49
  Requires-Dist: textual
50
50
  Requires-Dist: prompt_toolkit
51
+ Requires-Dist: openai!=1.100.0,!=1.100.1
51
52
  Provides-Extra: full
52
53
  Requires-Dist: mini-swe-agent[dev]; extra == "full"
53
54
  Requires-Dist: swe-rex>=1.4.0; extra == "full"
@@ -72,7 +73,7 @@ Dynamic: license-file
72
73
 
73
74
  # The 100 line AI agent that solves GitHub issues & more
74
75
 
75
- 📣 [Run `mini` with GPT-5 and friends: Read here](https://mini-swe-agent.com/latest/quickstart/#gpt-5)
76
+ 📣 [New blogpost: Randomly switching between GPT-5 and Sonnet 4 boosts performance](https://www.swebench.com/SWE-bench/blog/2025/08/19/mini-roulette/)
76
77
 
77
78
  [![Docs](https://img.shields.io/badge/Docs-green?style=for-the-badge&logo=materialformkdocs&logoColor=white)](https://mini-swe-agent.com/latest/)
78
79
  [![Slack](https://img.shields.io/badge/Slack-4A154B?style=for-the-badge&logo=slack&logoColor=white)](https://join.slack.com/t/swe-bench/shared_invite/zt-36pj9bu5s-o3_yXPZbaH2wVnxnss1EkQ)
@@ -23,16 +23,20 @@ src/minisweagent/config/mini.yaml
23
23
  src/minisweagent/config/mini_no_temp.yaml
24
24
  src/minisweagent/config/extra/__init__.py
25
25
  src/minisweagent/config/extra/swebench.yaml
26
+ src/minisweagent/config/extra/swebench_roulette.yaml
26
27
  src/minisweagent/environments/__init__.py
27
28
  src/minisweagent/environments/docker.py
28
29
  src/minisweagent/environments/local.py
29
30
  src/minisweagent/environments/singularity.py
30
31
  src/minisweagent/environments/extra/__init__.py
32
+ src/minisweagent/environments/extra/bubblewrap.py
31
33
  src/minisweagent/environments/extra/swerex_docker.py
32
34
  src/minisweagent/models/__init__.py
33
35
  src/minisweagent/models/anthropic.py
34
36
  src/minisweagent/models/litellm_model.py
35
37
  src/minisweagent/models/test_models.py
38
+ src/minisweagent/models/extra/__init__.py
39
+ src/minisweagent/models/extra/roulette.py
36
40
  src/minisweagent/models/utils/__init__.py
37
41
  src/minisweagent/models/utils/cache_control.py
38
42
  src/minisweagent/models/utils/key_per_thread.py
@@ -9,6 +9,7 @@ typer
9
9
  platformdirs
10
10
  textual
11
11
  prompt_toolkit
12
+ openai!=1.100.0,!=1.100.1
12
13
 
13
14
  [dev]
14
15
  datasets
@@ -8,7 +8,7 @@ This file provides:
8
8
  unless you want the static type checking.
9
9
  """
10
10
 
11
- __version__ = "1.9.0"
11
+ __version__ = "1.10.0"
12
12
 
13
13
  import os
14
14
  from pathlib import Path
@@ -18,6 +18,8 @@ import dotenv
18
18
  from platformdirs import user_config_dir
19
19
  from rich.console import Console
20
20
 
21
+ from minisweagent.utils.log import logger
22
+
21
23
  package_dir = Path(__file__).resolve().parent
22
24
 
23
25
  global_config_dir = Path(os.getenv("MSWEA_GLOBAL_CONFIG_DIR") or user_config_dir("mini-swe-agent"))
@@ -64,8 +66,18 @@ class Agent(Protocol):
64
66
  model: Model
65
67
  env: Environment
66
68
  messages: list[dict[str, str]]
69
+ config: Any
67
70
 
68
71
  def run(self, task: str, **kwargs) -> tuple[str, str]: ...
69
72
 
70
73
 
71
- __all__ = ["Agent", "Model", "Environment", "package_dir", "__version__", "global_config_file", "global_config_dir"]
74
+ __all__ = [
75
+ "Agent",
76
+ "Model",
77
+ "Environment",
78
+ "package_dir",
79
+ "__version__",
80
+ "global_config_file",
81
+ "global_config_dir",
82
+ "logger",
83
+ ]
@@ -39,9 +39,9 @@ class InteractiveAgent(DefaultAgent):
39
39
  super().__init__(*args, config_class=config_class, **kwargs)
40
40
  self.cost_last_confirmed = 0.0
41
41
 
42
- def add_message(self, role: str, content: str):
42
+ def add_message(self, role: str, content: str, **kwargs):
43
43
  # Extend supermethod to print messages
44
- super().add_message(role, content)
44
+ super().add_message(role, content, **kwargs)
45
45
  if role == "assistant":
46
46
  console.print(
47
47
  f"\n[red][bold]mini-swe-agent[/bold] (step [bold]{self.model.n_calls}[/bold], [bold]${self.model.cost:.2f}[/bold]):[/red]\n",
@@ -44,8 +44,8 @@ class _TextualAgent(DefaultAgent):
44
44
  super().__init__(*args, config_class=TextualAgentConfig, **kwargs)
45
45
  self._current_action_from_human = False
46
46
 
47
- def add_message(self, role: str, content: str):
48
- super().add_message(role, content)
47
+ def add_message(self, role: str, content: str, **kwargs):
48
+ super().add_message(role, content, **kwargs)
49
49
  if self.app.agent_state != "UNINITIALIZED":
50
50
  self.app.call_from_thread(self.app.on_message_added)
51
51
 
@@ -276,13 +276,17 @@ class TextualAgent(App):
276
276
 
277
277
  self._vscroll = VerticalScroll()
278
278
 
279
- def run(self, task: str) -> tuple[str, str]:
280
- threading.Thread(target=lambda: self.agent.run(task), daemon=True).start()
279
+ def run(self, task: str, **kwargs) -> tuple[str, str]:
280
+ threading.Thread(target=lambda: self.agent.run(task, **kwargs), daemon=True).start()
281
281
  super().run()
282
282
  return self.exit_status, self.result
283
283
 
284
284
  # --- Basics ---
285
285
 
286
+ @property
287
+ def config(self):
288
+ return self.agent.config
289
+
286
290
  @property
287
291
  def i_step(self) -> int:
288
292
  """Current step index."""
@@ -36,7 +36,7 @@ agent:
36
36
  2. Provide exactly ONE bash command to execute
37
37
 
38
38
  ## Important Boundaries
39
- - MODIFY: Regular source code files in {{working_dir}}
39
+ - MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
40
40
  - DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
41
41
 
42
42
  ## Recommended Workflow
@@ -0,0 +1,233 @@
1
+ agent:
2
+ system_template: |
3
+ You are a helpful assistant that can interact multiple times with a computer shell to solve programming tasks.
4
+ Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||).
5
+
6
+ Include a THOUGHT section before your command where you explain your reasoning process.
7
+ Format your response as shown in <format_example>.
8
+
9
+ <format_example>
10
+ THOUGHT: Your reasoning and analysis here
11
+
12
+ ```bash
13
+ your_command_here
14
+ ```
15
+ </format_example>
16
+
17
+ Failure to follow these rules will cause your response to be rejected.
18
+ instance_template: |
19
+ <pr_description>
20
+ Consider the following PR description:
21
+ {{task}}
22
+ </pr_description>
23
+
24
+ <instructions>
25
+ # Task Instructions
26
+
27
+ ## Overview
28
+ You're a software engineer interacting continuously with a computer by submitting commands.
29
+ You'll be helping implement necessary changes to meet requirements in the PR description.
30
+ Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
31
+
32
+ IMPORTANT: This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.
33
+
34
+ For each response:
35
+ 1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
36
+ 2. Provide exactly ONE bash command to execute
37
+
38
+ ## Important Boundaries
39
+ - MODIFY: Regular source code files in {{working_dir}}
40
+ - DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
41
+
42
+ ## Recommended Workflow
43
+ 1. Analyze the codebase by finding and reading relevant files
44
+ 2. Create a script to reproduce the issue
45
+ 3. Edit the source code to resolve the issue
46
+ 4. Verify your fix works by running your script again
47
+ 5. Test edge cases to ensure your fix is robust
48
+
49
+ ## Command Execution Rules
50
+ You are operating in an environment where
51
+ 1. You write a single command
52
+ 2. The system executes that command in a subshell
53
+ 3. You see the result
54
+ 4. You write your next command
55
+
56
+ Each response should include:
57
+ 1. A **THOUGHT** section where you explain your reasoning and plan
58
+ 2. A single bash code block with your command
59
+
60
+ Format your responses like this:
61
+
62
+ <format_example>
63
+ THOUGHT: Here I explain my reasoning process, analysis of the current situation,
64
+ and what I'm trying to accomplish with the command below.
65
+
66
+ ```bash
67
+ your_command_here
68
+ ```
69
+ </format_example>
70
+
71
+ Commands must be specified in a single bash code block:
72
+
73
+ ```bash
74
+ your_command_here
75
+ ```
76
+
77
+ **CRITICAL REQUIREMENTS:**
78
+ - Your response SHOULD include a THOUGHT section explaining your reasoning
79
+ - Your response MUST include EXACTLY ONE bash code block
80
+ - This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
81
+ - If you include zero or multiple bash blocks, or no command at all, YOUR RESPONSE WILL FAIL
82
+ - Do NOT try to run multiple independent commands in separate blocks in one response
83
+ - Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
84
+ - However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
85
+
86
+ Example of a CORRECT response:
87
+ <example_response>
88
+ THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
89
+
90
+ ```bash
91
+ ls -la
92
+ ```
93
+ </example_response>
94
+
95
+ Example of an INCORRECT response:
96
+ <example_response>
97
+ THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
98
+
99
+ ```bash
100
+ ls -la
101
+ ```
102
+
103
+ Now I'll read the file:
104
+
105
+ ```bash
106
+ cat file.txt
107
+ ```
108
+ </example_response>
109
+
110
+ If you need to run multiple commands, either:
111
+ 1. Combine them in one block using && or ||
112
+ ```bash
113
+ command1 && command2 || echo "Error occurred"
114
+ ```
115
+
116
+ 2. Wait for the first command to complete, see its output, then issue the next command in your following response.
117
+
118
+ ## Environment Details
119
+ - You have a full Linux shell environment
120
+ - Always use non-interactive flags (-y, -f) for commands
121
+ - Avoid interactive tools like vi, nano, or any that require user input
122
+ - If a command isn't available, you can install it
123
+
124
+ ## Useful Command Examples
125
+
126
+ ### Create a new file:
127
+ ```bash
128
+ cat <<'EOF' > newfile.py
129
+ import numpy as np
130
+ hello = "world"
131
+ print(hello)
132
+ EOF
133
+ ```
134
+
135
+ ### Edit files with sed:
136
+ ```bash
137
+ # Replace all occurrences
138
+ sed -i 's/old_string/new_string/g' filename.py
139
+
140
+ # Replace only first occurrence
141
+ sed -i 's/old_string/new_string/' filename.py
142
+
143
+ # Replace first occurrence on line 1
144
+ sed -i '1s/old_string/new_string/' filename.py
145
+
146
+ # Replace all occurrences in lines 1-10
147
+ sed -i '1,10s/old_string/new_string/g' filename.py
148
+ ```
149
+
150
+ ### View file content:
151
+ ```bash
152
+ # View specific lines with numbers
153
+ nl -ba filename.py | sed -n '10,20p'
154
+ ```
155
+
156
+ ### Any other command you want to run
157
+ ```bash
158
+ anything
159
+ ```
160
+
161
+ ## Submission
162
+ When you've completed your work (reading, editing, testing), and cannot make further progress
163
+ issue exactly the following command:
164
+
165
+ ```bash
166
+ echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached
167
+ ```
168
+
169
+ This command will submit your work.
170
+ You cannot continue working (reading, editing, testing) in any way on this task after submitting.
171
+ </instructions>
172
+ action_observation_template: |
173
+ <returncode>{{output.returncode}}</returncode>
174
+ {% if output.output | length < 10000 -%}
175
+ <output>
176
+ {{ output.output -}}
177
+ </output>
178
+ {%- else -%}
179
+ <warning>
180
+ The output of your last command was too long.
181
+ Please try a different command that produces less output.
182
+ If you're looking at a file you can try use head, tail or sed to view a smaller number of lines selectively.
183
+ If you're using grep or find and it produced too much output, you can use a more selective search pattern.
184
+ If you really need to see something from the full command's output, you can redirect output to a file and then search in that file.
185
+ </warning>
186
+ {%- set elided_chars = output.output | length - 10000 -%}
187
+ <output_head>
188
+ {{ output.output[:5000] }}
189
+ </output_head>
190
+ <elided_chars>
191
+ {{ elided_chars }} characters elided
192
+ </elided_chars>
193
+ <output_tail>
194
+ {{ output.output[-5000:] }}
195
+ </output_tail>
196
+ {%- endif -%}
197
+ format_error_template: |
198
+ Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
199
+
200
+ Please format your action in triple backticks as shown in <response_example>.
201
+
202
+ <response_example>
203
+ Here are some thoughts about why you want to perform the action.
204
+
205
+ ```bash
206
+ <action>
207
+ ```
208
+ </response_example>
209
+
210
+ If you have completed your assignment, please consult the first message about how to
211
+ submit your solution (you will not be able to continue working on this task after that).
212
+ step_limit: 250
213
+ cost_limit: 3.
214
+
215
+ environment:
216
+ cwd: "/testbed"
217
+ timeout: 60
218
+ env:
219
+ PAGER: cat
220
+ MANPAGER: cat
221
+ LESS: -R
222
+ PIP_PROGRESS_BAR: 'off'
223
+ TQDM_DISABLE: '1'
224
+ environment_class: docker
225
+
226
+ model:
227
+ model_name: "roulette"
228
+ model_class: "minisweagent.models.extra.roulette.RouletteModel"
229
+ model_kwargs:
230
+ - model_name: "claude-sonnet-4-20250514"
231
+ model_kwargs:
232
+ temperature: 0.
233
+ - model_name: "gpt-5"
@@ -10,6 +10,7 @@ _ENVIRONMENT_MAPPING = {
10
10
  "singularity": "minisweagent.environments.singularity.SingularityEnvironment",
11
11
  "local": "minisweagent.environments.local.LocalEnvironment",
12
12
  "swerex_docker": "minisweagent.environments.extra.swerex_docker.SwerexDockerEnvironment",
13
+ "bubblewrap": "minisweagent.environments.extra.bubblewrap.BubblewrapEnvironment",
13
14
  }
14
15
 
15
16
 
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  import shlex
3
4
  import subprocess
@@ -5,8 +6,6 @@ import uuid
5
6
  from dataclasses import asdict, dataclass, field
6
7
  from typing import Any
7
8
 
8
- from minisweagent.utils.log import get_logger
9
-
10
9
 
11
10
  @dataclass
12
11
  class DockerEnvironmentConfig:
@@ -24,18 +23,20 @@ class DockerEnvironmentConfig:
24
23
  """Timeout for executing commands in the container."""
25
24
  executable: str = os.getenv("MSWEA_DOCKER_EXECUTABLE", "docker")
26
25
  """Path to the docker/container executable."""
27
- run_args: list[str] = field(default_factory=list)
28
- """Additional arguments to pass to the docker/container executable."""
26
+ run_args: list[str] = field(default_factory=lambda: ["--rm"])
27
+ """Additional arguments to pass to the docker/container executable.
28
+ Default is ["--rm"], which removes the container after it exits.
29
+ """
29
30
  container_timeout: str = "2h"
30
31
  """Max duration to keep container running. Uses the same format as the sleep command."""
31
32
 
32
33
 
33
34
  class DockerEnvironment:
34
- def __init__(self, *, config_class: type = DockerEnvironmentConfig, **kwargs):
35
+ def __init__(self, *, config_class: type = DockerEnvironmentConfig, logger: logging.Logger | None = None, **kwargs):
35
36
  """This class executes bash commands in a Docker container using direct docker commands.
36
37
  See `DockerEnvironmentConfig` for keyword arguments.
37
38
  """
38
- self.logger = get_logger("minisweagent.environment")
39
+ self.logger = logger or logging.getLogger("minisweagent.environment")
39
40
  self.container_id: str | None = None
40
41
  self.config = config_class(**kwargs)
41
42
  self._start_container()
@@ -97,7 +98,6 @@ class DockerEnvironment:
97
98
  def cleanup(self):
98
99
  """Stop and remove the Docker container."""
99
100
  if getattr(self, "container_id", None) is not None: # if init fails early, container_id might not be set
100
- self.logger.info(f"Stopping container {self.container_id}")
101
101
  cmd = f"(timeout 60 {self.config.executable} stop {self.container_id} || {self.config.executable} rm -f {self.container_id}) >/dev/null 2>&1 &"
102
102
  subprocess.Popen(cmd, shell=True)
103
103
 
@@ -0,0 +1,112 @@
1
+ """
2
+ [Bubblewrap](https://github.com/containers/bubblewrap) is a low-level, unprivileged sandboxing tool for Linux that enables running applications
3
+ in isolated environments with restricted access to the operating system and user data.
4
+ This environment uses bubblewrap to execute commands in a sandboxed environment.
5
+
6
+ !!! warning
7
+ This environment is experimental.
8
+
9
+ !!! warning
10
+ This environment is not supported on Windows.
11
+ """
12
+
13
+ import logging
14
+ import os
15
+ import platform
16
+ import shutil
17
+ import subprocess
18
+ import tempfile
19
+ import uuid
20
+ from dataclasses import asdict, dataclass, field
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+
25
+ @dataclass
26
+ class BubblewrapEnvironmentConfig:
27
+ cwd: str = ""
28
+ """Working directory for the sandbox."""
29
+ env: dict[str, str] = field(default_factory=dict)
30
+ """Dictionary of environment variables to set in the sandbox."""
31
+ timeout: int = 30
32
+ """Timeout for the command in seconds."""
33
+ executable: str = os.getenv("MSWEA_BUBBLEWRAP_EXECUTABLE", "bwrap")
34
+ """Path to the bubblewrap executable."""
35
+ wrapper_args: list[str] = field(
36
+ default_factory=lambda: [
37
+ "--unshare-user-try",
38
+ "--ro-bind",
39
+ "/usr",
40
+ "/usr",
41
+ "--ro-bind",
42
+ "/bin",
43
+ "/bin",
44
+ "--ro-bind",
45
+ "/lib",
46
+ "/lib",
47
+ "--ro-bind",
48
+ "/lib64",
49
+ "/lib64",
50
+ "--ro-bind",
51
+ "/etc",
52
+ "/etc",
53
+ "--tmpfs",
54
+ "/tmp",
55
+ "--proc",
56
+ "/proc",
57
+ "--dev",
58
+ "/dev",
59
+ "--new-session",
60
+ "--setenv",
61
+ "PATH",
62
+ "/usr/local/bin:/usr/sbin:/usr/bin:/bin",
63
+ ]
64
+ )
65
+ """Arguments to pass to the bubblewrap executable."""
66
+
67
+
68
+ class BubblewrapEnvironment:
69
+ def __init__(
70
+ self, *, config_class: type = BubblewrapEnvironmentConfig, logger: logging.Logger | None = None, **kwargs
71
+ ):
72
+ """This class executes bash commands in a bubblewrap environment and a separate working
73
+ directory for each environment. See `BubblewrapEnvironmentConfig` for kwargs.
74
+ """
75
+ self.logger = logger or logging.getLogger("minisweagent.environment")
76
+ self.config = config_class(**kwargs)
77
+ self.working_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
78
+ self.working_dir.mkdir(parents=True)
79
+
80
+ def execute(self, command: str, cwd: str = "") -> dict[str, Any]:
81
+ """Execute a command in the bubblewrap environment and return the result as a dict."""
82
+ cwd = cwd or self.config.cwd or str(self.working_dir)
83
+
84
+ cmd = [self.config.executable] + self.config.wrapper_args + ["--bind", cwd, cwd, "--chdir", cwd]
85
+
86
+ # Add environment variables
87
+ for key, value in self.config.env.items():
88
+ cmd.extend(["--setenv", key, value])
89
+
90
+ cmd.extend(["bash", "-c", command])
91
+
92
+ result = subprocess.run(
93
+ cmd,
94
+ text=True,
95
+ timeout=self.config.timeout,
96
+ encoding="utf-8",
97
+ errors="replace",
98
+ stdout=subprocess.PIPE,
99
+ stderr=subprocess.STDOUT,
100
+ )
101
+ return {"output": result.stdout, "returncode": result.returncode}
102
+
103
+ def cleanup(self):
104
+ if self.working_dir.exists():
105
+ shutil.rmtree(self.working_dir)
106
+
107
+ def __del__(self):
108
+ """Cleanup working_dir when object is destroyed."""
109
+ self.cleanup()
110
+
111
+ def get_template_vars(self) -> dict[str, Any]:
112
+ return asdict(self.config) | platform.uname()._asdict()
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
+ import logging
3
4
  import os
4
5
  import shutil
5
6
  import subprocess
@@ -9,8 +10,6 @@ from dataclasses import asdict, dataclass, field
9
10
  from pathlib import Path
10
11
  from typing import Any
11
12
 
12
- from minisweagent.utils.log import get_logger
13
-
14
13
 
15
14
  @dataclass
16
15
  class SingularityEnvironmentConfig:
@@ -24,18 +23,39 @@ class SingularityEnvironmentConfig:
24
23
  """Timeout for executing commands in the container."""
25
24
  executable: str = os.getenv("MSWEA_SINGULARITY_EXECUTABLE", "singularity")
26
25
  """Path to the singularity executable."""
26
+ sandbox_build_retries: int = 3
27
+ """Number of retries for building the sandbox if an error occurs."""
27
28
 
28
29
 
29
30
  class SingularityEnvironment:
30
- def __init__(self, **kwargs):
31
+ def __init__(
32
+ self, *, config_class: type = SingularityEnvironmentConfig, logger: logging.Logger | None = None, **kwargs
33
+ ):
31
34
  """Singularity environment. See `SingularityEnvironmentConfig` for kwargs."""
32
- self.logger = get_logger("minisweagent.environment")
33
- self.config = SingularityEnvironmentConfig(**kwargs)
34
- self.sandbox_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
35
- subprocess.run(
36
- [self.config.executable, "build", "--sandbox", self.sandbox_dir, self.config.image],
37
- check=True,
38
- )
35
+ self.logger = logger or logging.getLogger("minisweagent.environment")
36
+ self.config = config_class(**kwargs)
37
+ self.sandbox_dir = self._build_sandbox()
38
+
39
+ def _build_sandbox(self) -> Path:
40
+ # Building the sandbox can fail (very rarely), so we retry it
41
+ max_retries = self.config.sandbox_build_retries
42
+ for attempt in range(max_retries):
43
+ sandbox_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
44
+ try:
45
+ subprocess.run(
46
+ [self.config.executable, "build", "--sandbox", sandbox_dir, self.config.image],
47
+ check=True,
48
+ capture_output=True,
49
+ )
50
+ break
51
+ except subprocess.CalledProcessError as e:
52
+ shutil.rmtree(sandbox_dir, ignore_errors=True)
53
+ self.logger.error(
54
+ f"Error building image {self.config.image}, stdout: {e.stdout}, stderr: {e.stderr} (attempt {attempt + 1}/{max_retries})"
55
+ )
56
+ if attempt == max_retries - 1:
57
+ raise
58
+ return sandbox_dir
39
59
 
40
60
  def get_template_vars(self) -> dict[str, Any]:
41
61
  return asdict(self.config)
@@ -70,9 +90,7 @@ class SingularityEnvironment:
70
90
  return {"output": result.stdout, "returncode": result.returncode}
71
91
 
72
92
  def cleanup(self):
73
- if self.sandbox_dir.exists():
74
- self.logger.info(f"Removing sandbox {self.sandbox_dir}")
75
- shutil.rmtree(self.sandbox_dir)
93
+ shutil.rmtree(self.sandbox_dir, ignore_errors=True)
76
94
 
77
95
  def __del__(self):
78
96
  """Cleanup sandbox when object is destroyed."""
@@ -3,6 +3,7 @@ You can ignore this file completely if you explicitly set your model in your run
3
3
  """
4
4
 
5
5
  import copy
6
+ import importlib
6
7
  import os
7
8
  import threading
8
9
 
@@ -49,12 +50,12 @@ def get_model(input_model_name: str | None = None, config: dict | None = None) -
49
50
  config = copy.deepcopy(config)
50
51
  config["model_name"] = resolved_model_name
51
52
 
52
- # API key resolution (from env -> config -> None)
53
- if "model_kwargs" not in config:
54
- config["model_kwargs"] = {}
55
- if from_env := os.getenv("MSWEA_MODEL_API_KEY"):
56
- config["model_kwargs"]["api_key"] = from_env
57
- return get_model_class(resolved_model_name)(**config)
53
+ model_class = get_model_class(resolved_model_name, config.pop("model_class", ""))
54
+
55
+ if (from_env := os.getenv("MSWEA_MODEL_API_KEY")) and not str(type(model_class)).endswith("DeterministicModel"):
56
+ config.setdefault("model_kwargs", {})["api_key"] = from_env
57
+
58
+ return model_class(**config)
58
59
 
59
60
 
60
61
  def get_model_name(input_model_name: str | None = None, config: dict | None = None) -> str:
@@ -63,19 +64,44 @@ def get_model_name(input_model_name: str | None = None, config: dict | None = No
63
64
  config = {}
64
65
  if input_model_name:
65
66
  return input_model_name
66
- if from_env := os.getenv("MSWEA_MODEL_NAME"):
67
- return from_env
68
67
  if from_config := config.get("model_name"):
69
68
  return from_config
69
+ if from_env := os.getenv("MSWEA_MODEL_NAME"):
70
+ return from_env
70
71
  raise ValueError("No default model set. Please run `mini-extra config setup` to set one.")
71
72
 
72
73
 
73
- def get_model_class(model_name: str) -> type:
74
- """Select the best model class for a given model name."""
74
+ _MODEL_CLASS_MAPPING = {
75
+ "anthropic": "minisweagent.models.anthropic.AnthropicModel",
76
+ "litellm": "minisweagent.models.litellm_model.LitellmModel",
77
+ "deterministic": "minisweagent.models.test_models.DeterministicModel",
78
+ }
79
+
80
+
81
+ def get_model_class(model_name: str, model_class: str = "") -> type:
82
+ """Select the best model class.
83
+
84
+ If a model_class is provided (as shortcut name, or as full import path,
85
+ e.g., "anthropic" or "minisweagent.models.anthropic.AnthropicModel"),
86
+ it takes precedence over the `model_name`.
87
+ Otherwise, the model_name is used to select the best model class.
88
+ """
89
+ if model_class:
90
+ full_path = _MODEL_CLASS_MAPPING.get(model_class, model_class)
91
+ try:
92
+ module_name, class_name = full_path.rsplit(".", 1)
93
+ module = importlib.import_module(module_name)
94
+ return getattr(module, class_name)
95
+ except (ValueError, ImportError, AttributeError):
96
+ msg = f"Unknown model class: {model_class} (resolved to {full_path}, available: {_MODEL_CLASS_MAPPING})"
97
+ raise ValueError(msg)
98
+
75
99
  if any(s in model_name.lower() for s in ["anthropic", "sonnet", "opus", "claude"]):
76
100
  from minisweagent.models.anthropic import AnthropicModel
77
101
 
78
102
  return AnthropicModel
103
+
104
+ # Default to LitellmModel
79
105
  from minisweagent.models.litellm_model import LitellmModel
80
106
 
81
107
  return LitellmModel
@@ -0,0 +1,62 @@
1
+ import random
2
+ from collections.abc import Callable
3
+ from dataclasses import asdict, dataclass
4
+
5
+ from minisweagent import Model
6
+ from minisweagent.models import get_model
7
+
8
+
9
+ @dataclass
10
+ class RouletteModelConfig:
11
+ model_kwargs: list[dict]
12
+ """The models to choose from"""
13
+ model_name: str = "roulette"
14
+
15
+
16
+ class RouletteModel:
17
+ def __init__(self, *, config_class: Callable = RouletteModelConfig, **kwargs):
18
+ """This "meta"-model randomly selects one of the models at every call"""
19
+ self.config = config_class(**kwargs)
20
+ self.models = [get_model(config=config) for config in self.config.model_kwargs]
21
+
22
+ @property
23
+ def cost(self) -> float:
24
+ return sum(model.cost for model in self.models)
25
+
26
+ @property
27
+ def n_calls(self) -> int:
28
+ return sum(model.n_calls for model in self.models)
29
+
30
+ def get_template_vars(self) -> dict:
31
+ return asdict(self.config) | {"n_model_calls": self.n_calls, "model_cost": self.cost}
32
+
33
+ def select_model(self) -> Model:
34
+ return random.choice(self.models)
35
+
36
+ def query(self, *args, **kwargs) -> dict:
37
+ model = self.select_model()
38
+ response = model.query(*args, **kwargs)
39
+ response["model_name"] = model.config.model_name
40
+ return response
41
+
42
+
43
+ @dataclass
44
+ class InterleavingModelConfig:
45
+ model_kwargs: list[dict]
46
+ sequence: list[int] | None = None
47
+ """If set to 0, 0, 1, we will return the first model 2 times, then the second model 1 time,
48
+ then the first model again, etc."""
49
+ model_name: str = "interleaving"
50
+
51
+
52
+ class InterleavingModel(RouletteModel):
53
+ def __init__(self, *, config_class: Callable = InterleavingModelConfig, **kwargs):
54
+ """This "meta"-model alternates between the models in the sequence for every call"""
55
+ super().__init__(config_class=config_class, **kwargs)
56
+
57
+ def select_model(self) -> Model:
58
+ if self.config.sequence is None:
59
+ i_model = self.n_calls % len(self.models)
60
+ else:
61
+ i_model = self.config.sequence[self.n_calls % len(self.config.sequence)]
62
+ return self.models[i_model]
@@ -61,7 +61,15 @@ class LitellmModel:
61
61
 
62
62
  def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
63
63
  response = self._query(messages, **kwargs)
64
- cost = litellm.cost_calculator.completion_cost(response)
64
+ try:
65
+ cost = litellm.cost_calculator.completion_cost(response)
66
+ except Exception as e:
67
+ logger.critical(
68
+ f"Error calculating cost for model {self.config.model_name}: {e}. "
69
+ "Please check the 'Updating the model registry' section in the documentation. "
70
+ "http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
71
+ )
72
+ raise
65
73
  self.n_calls += 1
66
74
  self.cost += cost
67
75
  GLOBAL_MODEL_STATS.add(cost)
@@ -24,7 +24,7 @@ from minisweagent.environments import get_environment
24
24
  from minisweagent.models import get_model
25
25
  from minisweagent.run.extra.utils.batch_progress import RunBatchProgressManager
26
26
  from minisweagent.run.utils.save import save_traj
27
- from minisweagent.utils.log import add_file_handlers, logger
27
+ from minisweagent.utils.log import add_file_handler, logger
28
28
 
29
29
  _HELP_TEXT = """Run mini-SWE-agent on SWEBench instances.
30
30
 
@@ -78,7 +78,7 @@ def get_swebench_docker_image_name(instance: dict) -> str:
78
78
 
79
79
  def get_sb_environment(config: dict, instance: dict) -> Environment:
80
80
  image_name = get_swebench_docker_image_name(instance)
81
- env_config = config.get("environment", {})
81
+ env_config = config.setdefault("environment", {})
82
82
  if env_config.get("environment_class") == "singularity":
83
83
  image_name = "docker://" + image_name
84
84
  env_config["image"] = image_name
@@ -190,6 +190,7 @@ def main(
190
190
  output: str = typer.Option("", "-o", "--output", help="Output directory", rich_help_panel="Basic"),
191
191
  workers: int = typer.Option(1, "-w", "--workers", help="Number of worker threads for parallel processing", rich_help_panel="Basic"),
192
192
  model: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
193
+ model_class: str | None = typer.Option(None, "-c", "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
193
194
  redo_existing: bool = typer.Option(False, "--redo-existing", help="Redo existing instances", rich_help_panel="Data selection"),
194
195
  config_spec: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
195
196
  environment_class: str | None = typer.Option( None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
@@ -198,7 +199,7 @@ def main(
198
199
  output_path = Path(output)
199
200
  output_path.mkdir(parents=True, exist_ok=True)
200
201
  logger.info(f"Results will be saved to {output_path}")
201
- add_file_handlers(output_path / "minisweagent.log")
202
+ add_file_handler(output_path / "minisweagent.log")
202
203
 
203
204
  dataset_path = DATASET_MAPPING.get(subset, subset)
204
205
  logger.info(f"Loading dataset {dataset_path}, split {split}...")
@@ -217,6 +218,8 @@ def main(
217
218
  config.setdefault("environment", {})["environment_class"] = environment_class
218
219
  if model is not None:
219
220
  config.setdefault("model", {})["model_name"] = model
221
+ if model_class is not None:
222
+ config.setdefault("model", {})["model_class"] = model_class
220
223
 
221
224
  progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")
222
225
 
@@ -1,5 +1,6 @@
1
1
  """Run on a single SWE-Bench instance."""
2
2
 
3
+ import traceback
3
4
  from pathlib import Path
4
5
 
5
6
  import typer
@@ -29,6 +30,7 @@ def main(
29
30
  split: str = typer.Option("dev", "--split", help="Dataset split", rich_help_panel="Data selection"),
30
31
  instance_spec: str = typer.Option(0, "-i", "--instance", help="SWE-Bench instance ID or index", rich_help_panel="Data selection"),
31
32
  model_name: str | None = typer.Option(None, "-m", "--model", help="Model to use", rich_help_panel="Basic"),
33
+ model_class: str | None = typer.Option(None, "-c", "--model-class", help="Model class to use (e.g., 'anthropic' or 'minisweagent.models.anthropic.AnthropicModel')", rich_help_panel="Advanced"),
32
34
  config_path: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
33
35
  environment_class: str | None = typer.Option(None, "--environment-class", rich_help_panel="Advanced"),
34
36
  exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Basic"),
@@ -49,6 +51,8 @@ def main(
49
51
  config = yaml.safe_load(get_config_path(config_path).read_text())
50
52
  if environment_class is not None:
51
53
  config.setdefault("environment", {})["environment_class"] = environment_class
54
+ if model_class is not None:
55
+ config.setdefault("model", {})["model_class"] = model_class
52
56
  if exit_immediately:
53
57
  config.setdefault("agent", {})["confirm_exit"] = False
54
58
  env = get_sb_environment(config, instance)
@@ -58,11 +62,15 @@ def main(
58
62
  **({"mode": "yolo"} | config.get("agent", {})),
59
63
  )
60
64
 
61
- exit_status, result = None, None
65
+ exit_status, result, extra_info = None, None, None
62
66
  try:
63
67
  exit_status, result = agent.run(instance["problem_statement"]) # type: ignore[arg-type]
68
+ except Exception as e:
69
+ logger.error(f"Error processing instance {instance_spec}: {e}", exc_info=True)
70
+ exit_status, result = type(e).__name__, str(e)
71
+ extra_info = {"traceback": traceback.format_exc()}
64
72
  finally:
65
- save_traj(agent, output, exit_status=exit_status, result=result) # type: ignore[arg-type]
73
+ save_traj(agent, output, exit_status=exit_status, result=result, extra_info=extra_info) # type: ignore[arg-type]
66
74
 
67
75
 
68
76
  if __name__ == "__main__":
@@ -48,7 +48,7 @@ def main(
48
48
  configure_if_first_time()
49
49
 
50
50
  _config = yaml.safe_load(get_config_path(config).read_text())
51
- _agent_config = _config.get("agent", {})
51
+ _agent_config = _config.setdefault("agent", {})
52
52
  if yolo:
53
53
  _agent_config["mode"] = "yolo"
54
54
 
@@ -4,6 +4,7 @@
4
4
  # Read this first: https://mini-swe-agent.com/latest/usage/mini/ (usage)
5
5
 
6
6
  import os
7
+ import traceback
7
8
  from pathlib import Path
8
9
  from typing import Any
9
10
 
@@ -22,6 +23,7 @@ from minisweagent.environments.local import LocalEnvironment
22
23
  from minisweagent.models import get_model
23
24
  from minisweagent.run.extra.config import configure_if_first_time
24
25
  from minisweagent.run.utils.save import save_traj
26
+ from minisweagent.utils.log import logger
25
27
 
26
28
  DEFAULT_CONFIG = Path(os.getenv("MSWEA_MINI_CONFIG_PATH", builtin_config_dir / "mini.yaml"))
27
29
  DEFAULT_OUTPUT = global_config_dir / "last_mini_run.traj.json"
@@ -41,29 +43,19 @@ More information about the usage: [bold green]https://mini-swe-agent.com/latest/
41
43
  """
42
44
 
43
45
 
46
+ # fmt: off
44
47
  @app.command(help=_HELP_TEXT)
45
48
  def main(
46
- visual: bool = typer.Option(
47
- False,
48
- "-v",
49
- "--visual",
50
- help="Toggle (pager-style) UI (Textual) depending on the MSWEA_VISUAL_MODE_DEFAULT environment setting",
51
- ),
52
- model_name: str | None = typer.Option(
53
- None,
54
- "-m",
55
- "--model",
56
- help="Model to use",
57
- ),
49
+ visual: bool = typer.Option(False, "-v", "--visual", help="Toggle (pager-style) UI (Textual) depending on the MSWEA_VISUAL_MODE_DEFAULT environment setting",),
50
+ model_name: str | None = typer.Option( None, "-m", "--model", help="Model to use",),
58
51
  task: str | None = typer.Option(None, "-t", "--task", help="Task/problem statement", show_default=False),
59
52
  yolo: bool = typer.Option(False, "-y", "--yolo", help="Run without confirmation"),
60
53
  cost_limit: float | None = typer.Option(None, "-l", "--cost-limit", help="Cost limit. Set to 0 to disable."),
61
54
  config_spec: Path = typer.Option(DEFAULT_CONFIG, "-c", "--config", help="Path to config file"),
62
55
  output: Path | None = typer.Option(DEFAULT_OUTPUT, "-o", "--output", help="Output trajectory file"),
63
- exit_immediately: bool = typer.Option(
64
- False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting."
65
- ),
56
+ exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting."),
66
57
  ) -> Any:
58
+ # fmt: on
67
59
  configure_if_first_time()
68
60
  config = yaml.safe_load(get_config_path(config_spec).read_text())
69
61
 
@@ -92,13 +84,18 @@ def main(
92
84
  agent_class = InteractiveAgent
93
85
  if visual == (os.getenv("MSWEA_VISUAL_MODE_DEFAULT", "false") == "false"):
94
86
  agent_class = TextualAgent
95
- exit_status, result = None, None
87
+
96
88
  agent = agent_class(model, env, **config.get("agent", {}))
89
+ exit_status, result, extra_info = None, None, None
97
90
  try:
98
91
  exit_status, result = agent.run(task) # type: ignore[arg-type]
92
+ except Exception as e:
93
+ logger.error(f"Error running agent: {e}", exc_info=True)
94
+ exit_status, result = type(e).__name__, str(e)
95
+ extra_info = {"traceback": traceback.format_exc()}
99
96
  finally:
100
97
  if output:
101
- save_traj(agent, output, exit_status=exit_status, result=result) # type: ignore[arg-type]
98
+ save_traj(agent, output, exit_status=exit_status, result=result, extra_info=extra_info) # type: ignore[arg-type]
102
99
  return agent
103
100
 
104
101
 
@@ -1,10 +1,24 @@
1
+ import dataclasses
1
2
  import json
2
3
  from collections.abc import Callable
3
4
  from pathlib import Path
5
+ from typing import Any
4
6
 
5
7
  from minisweagent import Agent, __version__
6
8
 
7
9
 
10
+ def _get_class_name_with_module(obj: Any) -> str:
11
+ """Get the full class name with module path."""
12
+ return f"{obj.__class__.__module__}.{obj.__class__.__name__}"
13
+
14
+
15
+ def _asdict(obj: Any) -> dict:
16
+ """Convert config objects to dicts."""
17
+ if dataclasses.is_dataclass(obj):
18
+ return dataclasses.asdict(obj) # type: ignore[arg-type]
19
+ return obj # let's try our luck
20
+
21
+
8
22
  def save_traj(
9
23
  agent: Agent | None,
10
24
  path: Path,
@@ -45,6 +59,14 @@ def save_traj(
45
59
  data["info"]["model_stats"]["instance_cost"] = agent.model.cost
46
60
  data["info"]["model_stats"]["api_calls"] = agent.model.n_calls
47
61
  data["messages"] = agent.messages
62
+ data["info"]["config"] = {
63
+ "agent": _asdict(agent.config),
64
+ "model": _asdict(agent.model.config),
65
+ "environment": _asdict(agent.env.config),
66
+ "agent_type": _get_class_name_with_module(agent),
67
+ "model_type": _get_class_name_with_module(agent.model),
68
+ "environment_type": _get_class_name_with_module(agent.env),
69
+ }
48
70
  if extra_info:
49
71
  data["info"].update(extra_info)
50
72
 
@@ -0,0 +1,36 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ from rich.logging import RichHandler
5
+
6
+
7
+ def _setup_root_logger() -> None:
8
+ logger = logging.getLogger("minisweagent")
9
+ logger.setLevel(logging.DEBUG)
10
+ _handler = RichHandler(
11
+ show_path=False,
12
+ show_time=False,
13
+ show_level=False,
14
+ markup=True,
15
+ )
16
+ _formatter = logging.Formatter("%(name)s: %(levelname)s: %(message)s")
17
+ _handler.setFormatter(_formatter)
18
+ logger.addHandler(_handler)
19
+
20
+
21
+ def add_file_handler(path: Path | str, level: int = logging.DEBUG, *, print_path: bool = True) -> None:
22
+ logger = logging.getLogger("minisweagent")
23
+ handler = logging.FileHandler(path)
24
+ handler.setLevel(level)
25
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
26
+ handler.setFormatter(formatter)
27
+ logger.addHandler(handler)
28
+ if print_path:
29
+ print(f"Logging to '{path}'")
30
+
31
+
32
+ _setup_root_logger()
33
+ logger = logging.getLogger("minisweagent")
34
+
35
+
36
+ __all__ = ["logger"]
@@ -1,32 +0,0 @@
1
- import logging
2
- from pathlib import Path
3
-
4
- MINI_LOGGERS = {}
5
- _EXTRA_HANDLERS = []
6
-
7
-
8
- def get_logger(name: str) -> logging.Logger:
9
- if name in MINI_LOGGERS:
10
- return MINI_LOGGERS[name]
11
- logger = logging.getLogger(name)
12
- logger.setLevel(logging.DEBUG)
13
- handler = logging.StreamHandler()
14
- formatter = logging.Formatter("%(name)s: %(levelname)s: %(message)s")
15
- handler.setFormatter(formatter)
16
- logger.addHandler(handler)
17
- for handler in _EXTRA_HANDLERS:
18
- logger.addHandler(handler)
19
- MINI_LOGGERS[name] = logger
20
- return logger
21
-
22
-
23
- def add_file_handlers(path: Path):
24
- handler = logging.FileHandler(path)
25
- formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
26
- handler.setFormatter(formatter)
27
- _EXTRA_HANDLERS.append(handler)
28
- for logger in MINI_LOGGERS.values():
29
- logger.addHandler(handler)
30
-
31
-
32
- logger = get_logger("minisweagent")