mini-swe-agent 1.15.0__tar.gz → 1.16.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {mini_swe_agent-1.15.0/src/mini_swe_agent.egg-info → mini_swe_agent-1.16.0}/PKG-INFO +4 -2
  2. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/README.md +2 -1
  3. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/pyproject.toml +1 -0
  4. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0/src/mini_swe_agent.egg-info}/PKG-INFO +4 -2
  5. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/mini_swe_agent.egg-info/SOURCES.txt +5 -0
  6. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/mini_swe_agent.egg-info/requires.txt +1 -0
  7. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/__init__.py +1 -1
  8. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/agents/default.py +3 -3
  9. mini_swe_agent-1.16.0/src/minisweagent/config/extra/swebench_xml.yaml +215 -0
  10. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/environments/docker.py +7 -1
  11. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/__init__.py +3 -0
  12. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/extra/roulette.py +2 -3
  13. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/litellm_model.py +2 -1
  14. mini_swe_agent-1.16.0/src/minisweagent/models/litellm_response_api_model.py +80 -0
  15. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/portkey_model.py +5 -7
  16. mini_swe_agent-1.16.0/src/minisweagent/models/portkey_response_api_model.py +74 -0
  17. mini_swe_agent-1.16.0/src/minisweagent/models/requesty_model.py +119 -0
  18. mini_swe_agent-1.16.0/src/minisweagent/models/utils/openai_utils.py +41 -0
  19. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/extra/config.py +2 -2
  20. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/LICENSE.md +0 -0
  21. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/setup.cfg +0 -0
  22. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/mini_swe_agent.egg-info/dependency_links.txt +0 -0
  23. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/mini_swe_agent.egg-info/entry_points.txt +0 -0
  24. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/mini_swe_agent.egg-info/top_level.txt +0 -0
  25. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/__main__.py +0 -0
  26. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/agents/__init__.py +0 -0
  27. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/agents/interactive.py +0 -0
  28. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/agents/interactive_textual.py +0 -0
  29. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/config/README.md +0 -0
  30. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/config/__init__.py +0 -0
  31. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/config/default.yaml +0 -0
  32. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/config/extra/__init__.py +0 -0
  33. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/config/extra/swebench.yaml +0 -0
  34. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/config/extra/swebench_roulette.yaml +0 -0
  35. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/config/github_issue.yaml +0 -0
  36. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/config/mini.tcss +0 -0
  37. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/config/mini.yaml +0 -0
  38. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/config/mini_no_temp.yaml +0 -0
  39. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/environments/__init__.py +0 -0
  40. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/environments/extra/__init__.py +0 -0
  41. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/environments/extra/bubblewrap.py +0 -0
  42. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/environments/extra/swerex_docker.py +0 -0
  43. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/environments/local.py +0 -0
  44. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/environments/singularity.py +0 -0
  45. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/anthropic.py +0 -0
  46. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/extra/__init__.py +0 -0
  47. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/openrouter_model.py +0 -0
  48. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/test_models.py +0 -0
  49. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/utils/__init__.py +0 -0
  50. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/utils/cache_control.py +0 -0
  51. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/models/utils/key_per_thread.py +0 -0
  52. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/py.typed +0 -0
  53. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/__init__.py +0 -0
  54. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/extra/__init__.py +0 -0
  55. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/extra/swebench.py +0 -0
  56. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/extra/swebench_single.py +0 -0
  57. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/extra/utils/__init__.py +0 -0
  58. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/extra/utils/batch_progress.py +0 -0
  59. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/github_issue.py +0 -0
  60. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/hello_world.py +0 -0
  61. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/inspector.py +0 -0
  62. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/mini.py +0 -0
  63. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/mini_extra.py +0 -0
  64. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/utils/__init__.py +0 -0
  65. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/run/utils/save.py +0 -0
  66. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/utils/__init__.py +0 -0
  67. {mini_swe_agent-1.15.0 → mini_swe_agent-1.16.0}/src/minisweagent/utils/log.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.15.0
3
+ Version: 1.16.0
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -66,6 +66,7 @@ Requires-Dist: mike; extra == "dev"
66
66
  Requires-Dist: mkdocs-material; extra == "dev"
67
67
  Requires-Dist: mkdocs-glightbox; extra == "dev"
68
68
  Requires-Dist: mkdocs-redirects; extra == "dev"
69
+ Requires-Dist: portkey-ai; extra == "dev"
69
70
  Dynamic: license-file
70
71
 
71
72
  <div align="center">
@@ -74,6 +75,7 @@ Dynamic: license-file
74
75
 
75
76
  # The 100 line AI agent that solves GitHub issues & more
76
77
 
78
+ 📣 [Gemini 3 Pro reaches 74% on SWE-bench verified with mini-swe-agent!](https://x.com/KLieret/status/1991164693839270372)<br/>
77
79
  📣 [New blogpost: Randomly switching between GPT-5 and Sonnet 4 boosts performance](https://www.swebench.com/SWE-bench/blog/2025/08/19/mini-roulette/)
78
80
 
79
81
  [![Docs](https://img.shields.io/badge/Docs-green?style=for-the-badge&logo=materialformkdocs&logoColor=white)](https://mini-swe-agent.com/latest/)
@@ -94,7 +96,7 @@ Here's some details:
94
96
 
95
97
  - **Minimal**: Just [100 lines of python](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (+100 total for [env](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
96
98
  [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), [script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
97
- - **Powerful:** Resolves >70% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
99
+ - **Powerful:** Resolves >74% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
98
100
  - **Convenient:** Comes with UIs that turn this into your daily dev swiss army knife!
99
101
  - **Deployable:** In addition to local envs, you can use **docker**, **podman**, **singularity**, **apptainer**, and more
100
102
  - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
@@ -4,6 +4,7 @@
4
4
 
5
5
  # The 100 line AI agent that solves GitHub issues & more
6
6
 
7
+ 📣 [Gemini 3 Pro reaches 74% on SWE-bench verified with mini-swe-agent!](https://x.com/KLieret/status/1991164693839270372)<br/>
7
8
  📣 [New blogpost: Randomly switching between GPT-5 and Sonnet 4 boosts performance](https://www.swebench.com/SWE-bench/blog/2025/08/19/mini-roulette/)
8
9
 
9
10
  [![Docs](https://img.shields.io/badge/Docs-green?style=for-the-badge&logo=materialformkdocs&logoColor=white)](https://mini-swe-agent.com/latest/)
@@ -24,7 +25,7 @@ Here's some details:
24
25
 
25
26
  - **Minimal**: Just [100 lines of python](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (+100 total for [env](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
26
27
  [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), [script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
27
- - **Powerful:** Resolves >70% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
28
+ - **Powerful:** Resolves >74% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
28
29
  - **Convenient:** Comes with UIs that turn this into your daily dev swiss army knife!
29
30
  - **Deployable:** In addition to local envs, you can use **docker**, **podman**, **singularity**, **apptainer**, and more
30
31
  - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
@@ -65,6 +65,7 @@ dev = [
65
65
  "mkdocs-material",
66
66
  "mkdocs-glightbox",
67
67
  "mkdocs-redirects",
68
+ "portkey-ai",
68
69
  ]
69
70
 
70
71
  [project.urls]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.15.0
3
+ Version: 1.16.0
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -66,6 +66,7 @@ Requires-Dist: mike; extra == "dev"
66
66
  Requires-Dist: mkdocs-material; extra == "dev"
67
67
  Requires-Dist: mkdocs-glightbox; extra == "dev"
68
68
  Requires-Dist: mkdocs-redirects; extra == "dev"
69
+ Requires-Dist: portkey-ai; extra == "dev"
69
70
  Dynamic: license-file
70
71
 
71
72
  <div align="center">
@@ -74,6 +75,7 @@ Dynamic: license-file
74
75
 
75
76
  # The 100 line AI agent that solves GitHub issues & more
76
77
 
78
+ 📣 [Gemini 3 Pro reaches 74% on SWE-bench verified with mini-swe-agent!](https://x.com/KLieret/status/1991164693839270372)<br/>
77
79
  📣 [New blogpost: Randomly switching between GPT-5 and Sonnet 4 boosts performance](https://www.swebench.com/SWE-bench/blog/2025/08/19/mini-roulette/)
78
80
 
79
81
  [![Docs](https://img.shields.io/badge/Docs-green?style=for-the-badge&logo=materialformkdocs&logoColor=white)](https://mini-swe-agent.com/latest/)
@@ -94,7 +96,7 @@ Here's some details:
94
96
 
95
97
  - **Minimal**: Just [100 lines of python](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/agents/default.py) (+100 total for [env](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/environments/local.py),
96
98
  [model](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/models/litellm_model.py), [script](https://github.com/SWE-agent/mini-swe-agent/blob/main/src/minisweagent/run/hello_world.py)) — no fancy dependencies!
97
- - **Powerful:** Resolves >70% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
99
+ - **Powerful:** Resolves >74% of GitHub issues in the [SWE-bench verified benchmark](https://www.swebench.com/) ([leaderboard](https://swe-bench.com/)).
98
100
  - **Convenient:** Comes with UIs that turn this into your daily dev swiss army knife!
99
101
  - **Deployable:** In addition to local envs, you can use **docker**, **podman**, **singularity**, **apptainer**, and more
100
102
  - **Tested:** [![Codecov](https://img.shields.io/codecov/c/github/swe-agent/mini-swe-agent?style=flat-square)](https://codecov.io/gh/SWE-agent/mini-swe-agent)
@@ -24,6 +24,7 @@ src/minisweagent/config/mini_no_temp.yaml
24
24
  src/minisweagent/config/extra/__init__.py
25
25
  src/minisweagent/config/extra/swebench.yaml
26
26
  src/minisweagent/config/extra/swebench_roulette.yaml
27
+ src/minisweagent/config/extra/swebench_xml.yaml
27
28
  src/minisweagent/environments/__init__.py
28
29
  src/minisweagent/environments/docker.py
29
30
  src/minisweagent/environments/local.py
@@ -34,14 +35,18 @@ src/minisweagent/environments/extra/swerex_docker.py
34
35
  src/minisweagent/models/__init__.py
35
36
  src/minisweagent/models/anthropic.py
36
37
  src/minisweagent/models/litellm_model.py
38
+ src/minisweagent/models/litellm_response_api_model.py
37
39
  src/minisweagent/models/openrouter_model.py
38
40
  src/minisweagent/models/portkey_model.py
41
+ src/minisweagent/models/portkey_response_api_model.py
42
+ src/minisweagent/models/requesty_model.py
39
43
  src/minisweagent/models/test_models.py
40
44
  src/minisweagent/models/extra/__init__.py
41
45
  src/minisweagent/models/extra/roulette.py
42
46
  src/minisweagent/models/utils/__init__.py
43
47
  src/minisweagent/models/utils/cache_control.py
44
48
  src/minisweagent/models/utils/key_per_thread.py
49
+ src/minisweagent/models/utils/openai_utils.py
45
50
  src/minisweagent/run/__init__.py
46
51
  src/minisweagent/run/github_issue.py
47
52
  src/minisweagent/run/hello_world.py
@@ -25,6 +25,7 @@ mike
25
25
  mkdocs-material
26
26
  mkdocs-glightbox
27
27
  mkdocs-redirects
28
+ portkey-ai
28
29
 
29
30
  [full]
30
31
  mini-swe-agent[dev]
@@ -8,7 +8,7 @@ This file provides:
8
8
  unless you want the static type checking.
9
9
  """
10
10
 
11
- __version__ = "1.15.0"
11
+ __version__ = "1.16.0"
12
12
 
13
13
  import os
14
14
  from pathlib import Path
@@ -2,7 +2,6 @@
2
2
 
3
3
  import re
4
4
  import subprocess
5
- from collections.abc import Callable
6
5
  from dataclasses import asdict, dataclass
7
6
 
8
7
  from jinja2 import StrictUndefined, Template
@@ -25,6 +24,7 @@ class AgentConfig:
25
24
  )
26
25
  format_error_template: str = "Please always provide EXACTLY ONE action in triple backticks."
27
26
  action_observation_template: str = "Observation: {{output}}"
27
+ action_regex: str = r"```bash\s*\n(.*?)\n```"
28
28
  step_limit: int = 0
29
29
  cost_limit: float = 3.0
30
30
 
@@ -54,7 +54,7 @@ class LimitsExceeded(TerminatingException):
54
54
 
55
55
 
56
56
  class DefaultAgent:
57
- def __init__(self, model: Model, env: Environment, *, config_class: Callable = AgentConfig, **kwargs):
57
+ def __init__(self, model: Model, env: Environment, *, config_class: type = AgentConfig, **kwargs):
58
58
  self.config = config_class(**kwargs)
59
59
  self.messages: list[dict] = []
60
60
  self.model = model
@@ -106,7 +106,7 @@ class DefaultAgent:
106
106
 
107
107
  def parse_action(self, response: dict) -> dict:
108
108
  """Parse the action from the message. Returns the action."""
109
- actions = re.findall(r"```bash\s*\n(.*?)\n```", response["content"], re.DOTALL)
109
+ actions = re.findall(self.config.action_regex, response["content"], re.DOTALL)
110
110
  if len(actions) == 1:
111
111
  return {"action": actions[0].strip(), **response}
112
112
  raise FormatError(self.render_template(self.config.format_error_template, actions=actions))
@@ -0,0 +1,215 @@
1
+ agent:
2
+ system_template: |
3
+ You are a helpful assistant that can interact multiple times with a computer shell to solve programming tasks.
4
+ Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||).
5
+
6
+ Include a THOUGHT section before your command where you explain your reasoning process.
7
+ Format your response as shown in <format_example>.
8
+
9
+ <format_example>
10
+ THOUGHT: Your reasoning and analysis here
11
+
12
+ <bash_code>your_command_here</bash_code>
13
+ </format_example>
14
+
15
+ Failure to follow these rules will cause your response to be rejected.
16
+ instance_template: |
17
+ <pr_description>
18
+ Consider the following PR description:
19
+ {{task}}
20
+ </pr_description>
21
+
22
+ <instructions>
23
+ # Task Instructions
24
+
25
+ ## Overview
26
+ You're a software engineer interacting continuously with a computer by submitting commands.
27
+ You'll be helping implement necessary changes to meet requirements in the PR description.
28
+ Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
29
+
30
+ IMPORTANT: This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.
31
+
32
+ For each response:
33
+ 1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
34
+ 2. Provide exactly ONE bash command to execute
35
+
36
+ ## Important Boundaries
37
+ - MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
38
+ - DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
39
+
40
+ ## Recommended Workflow
41
+ 1. Analyze the codebase by finding and reading relevant files
42
+ 2. Create a script to reproduce the issue
43
+ 3. Edit the source code to resolve the issue
44
+ 4. Verify your fix works by running your script again
45
+ 5. Test edge cases to ensure your fix is robust
46
+
47
+ ## Command Execution Rules
48
+ You are operating in an environment where
49
+ 1. You write a single command
50
+ 2. The system executes that command in a subshell
51
+ 3. You see the result
52
+ 4. You write your next command
53
+
54
+ Each response should include:
55
+ 1. A **THOUGHT** section where you explain your reasoning and plan
56
+ 2. A single bash code block with your command
57
+
58
+ Format your responses like included within the <format_example> block:
59
+
60
+ <format_example>
61
+ THOUGHT: Here I explain my reasoning process, analysis of the current situation,
62
+ and what I'm trying to accomplish with the command below.
63
+
64
+ <bash_code>your_command_here </bash_code></format_example>
65
+
66
+ **CRITICAL REQUIREMENTS:**
67
+ - Your response SHOULD include a THOUGHT section explaining your reasoning
68
+ - Your response MUST include EXACTLY ONE bash code block
69
+ - This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
70
+ - If you include zero or multiple bash blocks, or no command at all, YOUR RESPONSE WILL FAIL
71
+ - Do NOT try to run multiple independent commands in separate blocks in one response
72
+ - Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
73
+ - However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
74
+
75
+ Example of a CORRECT response:
76
+
77
+ <example_response>
78
+ THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
79
+
80
+ <bash_code>ls -la</bash_code>
81
+ </example_response>
82
+
83
+ Example of an INCORRECT response:
84
+
85
+ <example_response>
86
+ THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
87
+
88
+ <bash_code>ls -la</bash_code>
89
+
90
+ Now I'll read the file:
91
+
92
+ <bash_code>cat file.txt</bash_code>
93
+ </example_response>
94
+
95
+ If you need to run multiple commands, either:
96
+
97
+ 1. Combine them in one block using && or ||
98
+
99
+ <bash_code>command1 && command2 || echo "Error occurred"</bash_code>
100
+
101
+ 2. Wait for the first command to complete, see its output, then issue the next command in your following response.
102
+
103
+ ## Environment Details
104
+
105
+ - You have a full Linux shell environment
106
+ - Always use non-interactive flags (-y, -f) for commands
107
+ - Avoid interactive tools like vi, nano, or any that require user input
108
+ - If a command isn't available, you can install it
109
+
110
+ ## Useful Command Examples
111
+
112
+ ### Create a new file:
113
+
114
+ <bash_code>cat <<'EOF' > newfile.py
115
+ import numpy as np
116
+ hello = "world"
117
+ print(hello)
118
+ EOF</bash_code>
119
+
120
+ ### Edit files with sed:
121
+
122
+ Replace all occurrences
123
+
124
+ <bash_code>sed -i 's/old_string/new_string/g' filename.py</bash_code>
125
+
126
+ Replace only first occurrence
127
+
128
+ <bash_code>sed -i 's/old_string/new_string/' filename.py</bash_code>
129
+
130
+ Replace first occurrence on line 1
131
+
132
+ <bash_code>sed -i '1s/old_string/new_string/' filename.py</bash_code>
133
+
134
+ Replace all occurrences in lines 1-10
135
+
136
+ <bash_code>sed -i '1,10s/old_string/new_string/g' filename.py</bash_code>
137
+
138
+ ### View file content:
139
+
140
+ View specific lines with numbers
141
+
142
+ <bash_code> nl -ba filename.py | sed -n '10,20p'</bash_code>
143
+
144
+ Any other command you want to run
145
+
146
+ <bash_code>anything</bash_code>
147
+
148
+ ## Submission
149
+
150
+ When you've completed your work (reading, editing, testing), and cannot make further progress
151
+ issue exactly the following command:
152
+
153
+ <bash_code>echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached</bash_code>
154
+
155
+ This command will submit your work.
156
+ You cannot continue working (reading, editing, testing) in any way on this task after submitting.
157
+ </instructions>
158
+ action_observation_template: |
159
+ <returncode>{{output.returncode}}</returncode>
160
+ {% if output.output | length < 10000 -%}
161
+ <output>
162
+ {{ output.output -}}
163
+ </output>
164
+ {%- else -%}
165
+ <warning>
166
+ The output of your last command was too long.
167
+ Please try a different command that produces less output.
168
+ If you're looking at a file you can try use head, tail or sed to view a smaller number of lines selectively.
169
+ If you're using grep or find and it produced too much output, you can use a more selective search pattern.
170
+ If you really need to see something from the full command's output, you can redirect output to a file and then search in that file.
171
+ </warning>
172
+ {%- set elided_chars = output.output | length - 10000 -%}
173
+ <output_head>
174
+ {{ output.output[:5000] }}
175
+ </output_head>
176
+ <elided_chars>
177
+ {{ elided_chars }} characters elided
178
+ </elided_chars>
179
+ <output_tail>
180
+ {{ output.output[-5000:] }}
181
+ </output_tail>
182
+ {%- endif -%}
183
+ format_error_template: |
184
+ Please always provide EXACTLY ONE action in the `<bash_code>` block, found {{actions|length}} actions.
185
+
186
+ Please format your action in a `<bash_code>` block as shown in <response_example>.
187
+
188
+ <response_example>
189
+ Here are some thoughts about why you want to perform the action.
190
+
191
+ <bash_code>ls -la</bash_code>
192
+ </response_example>
193
+
194
+ If you have completed your assignment, please consult the first message about how to
195
+ submit your solution (you will not be able to continue working on this task after that).
196
+ step_limit: 250
197
+ cost_limit: 3.
198
+ action_regex: <bash_code>(.*?)</bash_code>
199
+
200
+ environment:
201
+ cwd: "/testbed"
202
+ timeout: 60
203
+ env:
204
+ PAGER: cat
205
+ MANPAGER: cat
206
+ LESS: -R
207
+ PIP_PROGRESS_BAR: 'off'
208
+ TQDM_DISABLE: '1'
209
+ environment_class: docker
210
+
211
+ model:
212
+ model_name: "minimax/minimax-m2"
213
+ model_class: openrouter
214
+ model_kwargs:
215
+ temperature: 0.0
@@ -34,7 +34,13 @@ class DockerEnvironmentConfig:
34
34
 
35
35
 
36
36
  class DockerEnvironment:
37
- def __init__(self, *, config_class: type = DockerEnvironmentConfig, logger: logging.Logger | None = None, **kwargs):
37
+ def __init__(
38
+ self,
39
+ *,
40
+ config_class: type = DockerEnvironmentConfig,
41
+ logger: logging.Logger | None = None,
42
+ **kwargs,
43
+ ):
38
44
  """This class executes bash commands in a Docker container using direct docker commands.
39
45
  See `DockerEnvironmentConfig` for keyword arguments.
40
46
  """
@@ -81,8 +81,11 @@ def get_model_name(input_model_name: str | None = None, config: dict | None = No
81
81
  _MODEL_CLASS_MAPPING = {
82
82
  "anthropic": "minisweagent.models.anthropic.AnthropicModel",
83
83
  "litellm": "minisweagent.models.litellm_model.LitellmModel",
84
+ "litellm_response": "minisweagent.models.litellm_response_api_model.LitellmResponseAPIModel",
84
85
  "openrouter": "minisweagent.models.openrouter_model.OpenRouterModel",
85
86
  "portkey": "minisweagent.models.portkey_model.PortkeyModel",
87
+ "portkey_response": "minisweagent.models.portkey_response_api_model.PortkeyResponseAPIModel",
88
+ "requesty": "minisweagent.models.requesty_model.RequestyModel",
86
89
  "deterministic": "minisweagent.models.test_models.DeterministicModel",
87
90
  }
88
91
 
@@ -1,5 +1,4 @@
1
1
  import random
2
- from collections.abc import Callable
3
2
  from dataclasses import asdict, dataclass
4
3
 
5
4
  from minisweagent import Model
@@ -14,7 +13,7 @@ class RouletteModelConfig:
14
13
 
15
14
 
16
15
  class RouletteModel:
17
- def __init__(self, *, config_class: Callable = RouletteModelConfig, **kwargs):
16
+ def __init__(self, *, config_class: type = RouletteModelConfig, **kwargs):
18
17
  """This "meta"-model randomly selects one of the models at every call"""
19
18
  self.config = config_class(**kwargs)
20
19
  self.models = [get_model(config=config) for config in self.config.model_kwargs]
@@ -50,7 +49,7 @@ class InterleavingModelConfig:
50
49
 
51
50
 
52
51
  class InterleavingModel(RouletteModel):
53
- def __init__(self, *, config_class: Callable = InterleavingModelConfig, **kwargs):
52
+ def __init__(self, *, config_class: type = InterleavingModelConfig, **kwargs):
54
53
  """This "meta"-model alternates between the models in the sequence for every call"""
55
54
  super().__init__(config_class=config_class, **kwargs)
56
55
 
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
+ from collections.abc import Callable
4
5
  from dataclasses import asdict, dataclass, field
5
6
  from pathlib import Path
6
7
  from typing import Any, Literal
@@ -32,7 +33,7 @@ class LitellmModelConfig:
32
33
 
33
34
 
34
35
  class LitellmModel:
35
- def __init__(self, *, config_class: type = LitellmModelConfig, **kwargs):
36
+ def __init__(self, *, config_class: Callable = LitellmModelConfig, **kwargs):
36
37
  self.config = config_class(**kwargs)
37
38
  self.cost = 0.0
38
39
  self.n_calls = 0
@@ -0,0 +1,80 @@
1
+ import logging
2
+ from collections.abc import Callable
3
+ from dataclasses import dataclass
4
+
5
+ import litellm
6
+ from tenacity import (
7
+ before_sleep_log,
8
+ retry,
9
+ retry_if_not_exception_type,
10
+ stop_after_attempt,
11
+ wait_exponential,
12
+ )
13
+
14
+ from minisweagent.models.litellm_model import LitellmModel, LitellmModelConfig
15
+ from minisweagent.models.utils.openai_utils import coerce_responses_text
16
+
17
+ logger = logging.getLogger("litellm_response_api_model")
18
+
19
+
20
+ @dataclass
21
+ class LitellmResponseAPIModelConfig(LitellmModelConfig):
22
+ pass
23
+
24
+
25
+ class LitellmResponseAPIModel(LitellmModel):
26
+ def __init__(self, *, config_class: Callable = LitellmResponseAPIModelConfig, **kwargs):
27
+ super().__init__(config_class=config_class, **kwargs)
28
+ self._previous_response_id: str | None = None
29
+
30
+ @retry(
31
+ stop=stop_after_attempt(10),
32
+ wait=wait_exponential(multiplier=1, min=4, max=60),
33
+ before_sleep=before_sleep_log(logger, logging.WARNING),
34
+ retry=retry_if_not_exception_type(
35
+ (
36
+ litellm.exceptions.UnsupportedParamsError,
37
+ litellm.exceptions.NotFoundError,
38
+ litellm.exceptions.PermissionDeniedError,
39
+ litellm.exceptions.ContextWindowExceededError,
40
+ litellm.exceptions.APIError,
41
+ litellm.exceptions.AuthenticationError,
42
+ KeyboardInterrupt,
43
+ )
44
+ ),
45
+ )
46
+ def _query(self, messages: list[dict[str, str]], **kwargs):
47
+ try:
48
+ resp = litellm.responses(
49
+ model=self.config.model_name,
50
+ input=messages if self._previous_response_id is None else messages[-1:],
51
+ previous_response_id=self._previous_response_id,
52
+ **(self.config.model_kwargs | kwargs),
53
+ )
54
+ self._previous_response_id = getattr(resp, "id", None)
55
+ return resp
56
+ except litellm.exceptions.AuthenticationError as e:
57
+ e.message += " You can permanently set your API key with `mini-extra config set KEY VALUE`."
58
+ raise e
59
+
60
+ def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
61
+ response = self._query(messages, **kwargs)
62
+ print(response)
63
+ text = coerce_responses_text(response)
64
+ try:
65
+ cost = litellm.cost_calculator.completion_cost(response)
66
+ except Exception as e:
67
+ logger.critical(
68
+ f"Error calculating cost for model {self.config.model_name}: {e}. "
69
+ "Please check the 'Updating the model registry' section in the documentation. "
70
+ "http://bit.ly/4p31bi4 Still stuck? Please open a github issue for help!"
71
+ )
72
+ raise
73
+ self.n_calls += 1
74
+ self.cost += cost
75
+ from minisweagent.models import GLOBAL_MODEL_STATS
76
+
77
+ GLOBAL_MODEL_STATS.add(cost)
78
+ return {
79
+ "content": text,
80
+ }
@@ -22,7 +22,9 @@ logger = logging.getLogger("portkey_model")
22
22
  try:
23
23
  from portkey_ai import Portkey
24
24
  except ImportError:
25
- Portkey = None
25
+ raise ImportError(
26
+ "The portkey-ai package is required to use PortkeyModel. Please install it with: pip install portkey-ai"
27
+ )
26
28
 
27
29
 
28
30
  @dataclass
@@ -45,12 +47,8 @@ class PortkeyModelConfig:
45
47
 
46
48
 
47
49
  class PortkeyModel:
48
- def __init__(self, **kwargs):
49
- if Portkey is None:
50
- raise ImportError(
51
- "The portkey-ai package is required to use PortkeyModel. Please install it with: pip install portkey-ai"
52
- )
53
- self.config = PortkeyModelConfig(**kwargs)
50
+ def __init__(self, *, config_class: type = PortkeyModelConfig, **kwargs):
51
+ self.config = config_class(**kwargs)
54
52
  self.cost = 0.0
55
53
  self.n_calls = 0
56
54
  if self.config.litellm_model_registry and Path(self.config.litellm_model_registry).is_file():
@@ -0,0 +1,74 @@
1
+ import logging
2
+ import os
3
+ from dataclasses import dataclass
4
+
5
+ import litellm
6
+ from tenacity import (
7
+ before_sleep_log,
8
+ retry,
9
+ retry_if_not_exception_type,
10
+ stop_after_attempt,
11
+ wait_exponential,
12
+ )
13
+
14
+ from minisweagent.models import GLOBAL_MODEL_STATS
15
+ from minisweagent.models.portkey_model import PortkeyModel, PortkeyModelConfig
16
+ from minisweagent.models.utils.cache_control import set_cache_control
17
+ from minisweagent.models.utils.openai_utils import coerce_responses_text
18
+
19
+ logger = logging.getLogger("portkey_response_api_model")
20
+
21
+
22
+ @dataclass
23
+ class PortkeyResponseAPIModelConfig(PortkeyModelConfig):
24
+ pass
25
+
26
+
27
+ class PortkeyResponseAPIModel(PortkeyModel):
28
+ def __init__(self, *, config_class: type = PortkeyResponseAPIModelConfig, **kwargs):
29
+ super().__init__(config_class=config_class, **kwargs)
30
+ self._previous_response_id: str | None = None
31
+
32
+ @retry(
33
+ stop=stop_after_attempt(int(os.getenv("MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT", "10"))),
34
+ wait=wait_exponential(multiplier=1, min=4, max=60),
35
+ before_sleep=before_sleep_log(logger, logging.WARNING),
36
+ retry=retry_if_not_exception_type((KeyboardInterrupt, TypeError, ValueError)),
37
+ )
38
+ def _query(self, messages: list[dict[str, str]], **kwargs):
39
+ input_messages = messages if self._previous_response_id is None else messages[-1:]
40
+ resp = self.client.responses.create(
41
+ model=self.config.model_name,
42
+ input=input_messages,
43
+ previous_response_id=self._previous_response_id,
44
+ **(self.config.model_kwargs | kwargs),
45
+ )
46
+ self._previous_response_id = getattr(resp, "id", None)
47
+ return resp
48
+
49
+ def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
50
+ if self.config.set_cache_control:
51
+ messages = set_cache_control(messages, mode=self.config.set_cache_control)
52
+ response = self._query(messages, **kwargs)
53
+ text = coerce_responses_text(response)
54
+ try:
55
+ cost = litellm.cost_calculator.completion_cost(response)
56
+ assert cost > 0.0, f"Cost is not positive: {cost}"
57
+ except Exception as e:
58
+ if self.config.cost_tracking != "ignore_errors":
59
+ raise RuntimeError(
60
+ f"Error calculating cost for model {self.config.model_name}: {e}. "
61
+ "You can ignore this issue from your config file with cost_tracking: 'ignore_errors' or "
62
+ "globally with export MSWEA_COST_TRACKING='ignore_errors' to ignore this error. "
63
+ ) from e
64
+ cost = 0.0
65
+ self.n_calls += 1
66
+ self.cost += cost
67
+ GLOBAL_MODEL_STATS.add(cost)
68
+ return {
69
+ "content": text,
70
+ "extra": {
71
+ "response": response.model_dump() if hasattr(response, "model_dump") else {},
72
+ "cost": cost,
73
+ },
74
+ }
@@ -0,0 +1,119 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ from dataclasses import asdict, dataclass, field
5
+ from typing import Any
6
+
7
+ import requests
8
+ from tenacity import (
9
+ before_sleep_log,
10
+ retry,
11
+ retry_if_not_exception_type,
12
+ stop_after_attempt,
13
+ wait_exponential,
14
+ )
15
+
16
+ from minisweagent.models import GLOBAL_MODEL_STATS
17
+
18
+ logger = logging.getLogger("requesty_model")
19
+
20
+
21
+ @dataclass
22
+ class RequestyModelConfig:
23
+ model_name: str
24
+ model_kwargs: dict[str, Any] = field(default_factory=dict)
25
+
26
+
27
+ class RequestyAPIError(Exception):
28
+ """Custom exception for Requesty API errors."""
29
+
30
+ pass
31
+
32
+
33
+ class RequestyAuthenticationError(Exception):
34
+ """Custom exception for Requesty authentication errors."""
35
+
36
+ pass
37
+
38
+
39
+ class RequestyRateLimitError(Exception):
40
+ """Custom exception for Requesty rate limit errors."""
41
+
42
+ pass
43
+
44
+
45
+ class RequestyModel:
46
+ def __init__(self, **kwargs):
47
+ self.config = RequestyModelConfig(**kwargs)
48
+ self.cost = 0.0
49
+ self.n_calls = 0
50
+ self._api_url = "https://router.requesty.ai/v1/chat/completions"
51
+ self._api_key = os.getenv("REQUESTY_API_KEY", "")
52
+
53
+ @retry(
54
+ stop=stop_after_attempt(10),
55
+ wait=wait_exponential(multiplier=1, min=4, max=60),
56
+ before_sleep=before_sleep_log(logger, logging.WARNING),
57
+ retry=retry_if_not_exception_type(
58
+ (
59
+ RequestyAuthenticationError,
60
+ KeyboardInterrupt,
61
+ )
62
+ ),
63
+ )
64
+ def _query(self, messages: list[dict[str, str]], **kwargs):
65
+ headers = {
66
+ "Authorization": f"Bearer {self._api_key}",
67
+ "Content-Type": "application/json",
68
+ "HTTP-Referer": "https://github.com/SWE-agent/mini-swe-agent",
69
+ "X-Title": "mini-swe-agent",
70
+ }
71
+
72
+ payload = {
73
+ "model": self.config.model_name,
74
+ "messages": messages,
75
+ **(self.config.model_kwargs | kwargs),
76
+ }
77
+
78
+ try:
79
+ response = requests.post(self._api_url, headers=headers, data=json.dumps(payload), timeout=60)
80
+ response.raise_for_status()
81
+ return response.json()
82
+ except requests.exceptions.HTTPError as e:
83
+ if response.status_code == 401:
84
+ error_msg = "Authentication failed. You can permanently set your API key with `mini-extra config set REQUESTY_API_KEY YOUR_KEY`."
85
+ raise RequestyAuthenticationError(error_msg) from e
86
+ elif response.status_code == 429:
87
+ raise RequestyRateLimitError("Rate limit exceeded") from e
88
+ else:
89
+ raise RequestyAPIError(f"HTTP {response.status_code}: {response.text}") from e
90
+ except requests.exceptions.RequestException as e:
91
+ raise RequestyAPIError(f"Request failed: {e}") from e
92
+
93
+ def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
94
+ response = self._query(messages, **kwargs)
95
+
96
+ # Extract cost from usage information
97
+ usage = response.get("usage", {})
98
+ cost = usage.get("cost", 0.0)
99
+
100
+ # If cost is not available, raise an error
101
+ if cost == 0.0:
102
+ raise RequestyAPIError(
103
+ f"No cost information available from Requesty API for model {self.config.model_name}. "
104
+ "Cost tracking is required but not provided by the API response."
105
+ )
106
+
107
+ self.n_calls += 1
108
+ self.cost += cost
109
+ GLOBAL_MODEL_STATS.add(cost)
110
+
111
+ return {
112
+ "content": response["choices"][0]["message"]["content"] or "",
113
+ "extra": {
114
+ "response": response, # already is json
115
+ },
116
+ }
117
+
118
+ def get_template_vars(self) -> dict[str, Any]:
119
+ return asdict(self.config) | {"n_model_calls": self.n_calls, "model_cost": self.cost}
@@ -0,0 +1,41 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ from openai.types.responses.response_output_message import ResponseOutputMessage
5
+
6
+ logger = logging.getLogger("openai_utils")
7
+
8
+
9
+ def coerce_responses_text(resp: Any) -> str:
10
+ """Helper to normalize OpenAI Responses API result to text.
11
+
12
+ Works with both OpenAI client responses and LiteLLM/Portkey responses.
13
+ """
14
+ text = getattr(resp, "output_text", None)
15
+ if isinstance(text, str) and text:
16
+ return text
17
+
18
+ try:
19
+ output = []
20
+ for item in resp.output:
21
+ if isinstance(item, dict):
22
+ content = item.get("content", [])
23
+ elif isinstance(item, ResponseOutputMessage):
24
+ content = item.content
25
+ else:
26
+ continue
27
+
28
+ for content_item in content:
29
+ if isinstance(content_item, dict):
30
+ text_val = content_item.get("text")
31
+ elif hasattr(content_item, "text"):
32
+ text_val = content_item.text
33
+ else:
34
+ continue
35
+
36
+ if text_val:
37
+ output.append(text_val)
38
+ return "\n\n".join(output) or ""
39
+ except (AttributeError, IndexError, TypeError):
40
+ logger.warning(f"Could not extract text from response: {resp}")
41
+ return ""
@@ -35,9 +35,9 @@ Here's a few popular models and the required API keys:
35
35
 
36
36
  [bold green]anthropic/claude-sonnet-4-5-20250929[/bold green] ([bold green]ANTHROPIC_API_KEY[/bold green])
37
37
  [bold green]openai/gpt-5[/bold green] or [bold green]openai/gpt-5-mini[/bold green] ([bold green]OPENAI_API_KEY[/bold green])
38
- [bold green]gemini/gemini-2.5-pro[/bold green] ([bold green]GEMINI_API_KEY[/bold green])
38
+ [bold green]gemini/gemini-3-pro-preview[/bold green] ([bold green]GEMINI_API_KEY[/bold green])
39
39
 
40
- [bold]Note: Please always include the provider in the model name.[/bold]
40
+ [bold]Note: Please always include the provider (e.g., "openai/") in the model name.[/bold]
41
41
 
42
42
  [bold yellow]You can leave any setting blank to skip it.[/bold yellow]
43
43