mini-swe-agent 1.17.4__py3-none-any.whl → 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
- mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +1 -1
- mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
- minisweagent/__init__.py +19 -26
- minisweagent/agents/default.py +128 -113
- minisweagent/agents/interactive.py +119 -58
- minisweagent/config/README.md +3 -4
- minisweagent/config/__init__.py +36 -1
- minisweagent/config/benchmarks/swebench.yaml +156 -0
- minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
- minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
- minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
- minisweagent/config/default.yaml +24 -21
- minisweagent/config/inspector.tcss +42 -0
- minisweagent/config/mini.yaml +53 -71
- minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
- minisweagent/environments/__init__.py +1 -0
- minisweagent/environments/docker.py +67 -20
- minisweagent/environments/extra/bubblewrap.py +86 -47
- minisweagent/environments/extra/swerex_docker.py +53 -20
- minisweagent/environments/extra/swerex_modal.py +90 -0
- minisweagent/environments/local.py +62 -21
- minisweagent/environments/singularity.py +59 -18
- minisweagent/exceptions.py +22 -0
- minisweagent/models/__init__.py +6 -7
- minisweagent/models/extra/roulette.py +20 -17
- minisweagent/models/litellm_model.py +90 -44
- minisweagent/models/litellm_response_model.py +80 -0
- minisweagent/models/litellm_textbased_model.py +45 -0
- minisweagent/models/openrouter_model.py +87 -45
- minisweagent/models/openrouter_response_model.py +123 -0
- minisweagent/models/openrouter_textbased_model.py +76 -0
- minisweagent/models/portkey_model.py +84 -42
- minisweagent/models/portkey_response_model.py +163 -0
- minisweagent/models/requesty_model.py +91 -41
- minisweagent/models/test_models.py +246 -19
- minisweagent/models/utils/actions_text.py +60 -0
- minisweagent/models/utils/actions_toolcall.py +102 -0
- minisweagent/models/utils/actions_toolcall_response.py +110 -0
- minisweagent/models/utils/anthropic_utils.py +28 -0
- minisweagent/models/utils/cache_control.py +15 -2
- minisweagent/models/utils/content_string.py +74 -0
- minisweagent/models/utils/openai_multimodal.py +50 -0
- minisweagent/models/utils/retry.py +25 -0
- minisweagent/run/benchmarks/__init__.py +1 -0
- minisweagent/run/{extra → benchmarks}/swebench.py +57 -36
- minisweagent/run/benchmarks/swebench_single.py +89 -0
- minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
- minisweagent/run/hello_world.py +6 -0
- minisweagent/run/mini.py +54 -63
- minisweagent/run/utilities/__init__.py +1 -0
- minisweagent/run/{extra → utilities}/config.py +2 -0
- minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
- minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
- minisweagent/utils/serialize.py +26 -0
- mini_swe_agent-1.17.4.dist-info/RECORD +0 -61
- mini_swe_agent-1.17.4.dist-info/entry_points.txt +0 -5
- minisweagent/agents/interactive_textual.py +0 -450
- minisweagent/config/extra/swebench_roulette.yaml +0 -233
- minisweagent/config/mini.tcss +0 -86
- minisweagent/models/anthropic.py +0 -35
- minisweagent/models/litellm_response_api_model.py +0 -82
- minisweagent/models/portkey_response_api_model.py +0 -75
- minisweagent/models/utils/key_per_thread.py +0 -20
- minisweagent/models/utils/openai_utils.py +0 -41
- minisweagent/run/extra/swebench_single.py +0 -79
- minisweagent/run/github_issue.py +0 -87
- minisweagent/run/utils/__init__.py +0 -0
- minisweagent/run/utils/save.py +0 -78
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
- {mini_swe_agent-1.17.4.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
- /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
- /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
minisweagent/agents/default.py
CHANGED
|
@@ -1,140 +1,155 @@
|
|
|
1
|
-
"""Basic agent class. See https://mini-swe-agent.com/latest/advanced/control_flow/ for visual explanation
|
|
1
|
+
"""Basic agent class. See https://mini-swe-agent.com/latest/advanced/control_flow/ for visual explanation
|
|
2
|
+
or https://minimal-agent.com for a tutorial on the basic building principles.
|
|
3
|
+
"""
|
|
2
4
|
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
from
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import traceback
|
|
8
|
+
from pathlib import Path
|
|
7
9
|
|
|
8
10
|
from jinja2 import StrictUndefined, Template
|
|
11
|
+
from pydantic import BaseModel
|
|
9
12
|
|
|
10
|
-
from minisweagent import Environment, Model
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
@dataclass
|
|
14
|
-
class AgentConfig:
|
|
15
|
-
# The default settings are the bare minimum to run the agent. Take a look at the config files for improved settings.
|
|
16
|
-
system_template: str = "You are a helpful assistant that can do anything."
|
|
17
|
-
instance_template: str = (
|
|
18
|
-
"Your task: {{task}}. Please reply with a single shell command in triple backticks. "
|
|
19
|
-
"To finish, the first line of the output of the shell command must be 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'."
|
|
20
|
-
)
|
|
21
|
-
timeout_template: str = (
|
|
22
|
-
"The last command <command>{{action['action']}}</command> timed out and has been killed.\n"
|
|
23
|
-
"The output of the command was:\n"
|
|
24
|
-
"{% if output | length < 10000 -%}\n"
|
|
25
|
-
"<output>\n{{output}}\n</output>\n"
|
|
26
|
-
"{%- else -%}\n"
|
|
27
|
-
"<warning>Output was too long and has been truncated.</warning>\n"
|
|
28
|
-
"<output_head>\n{{ output[:5000] }}\n</output_head>\n"
|
|
29
|
-
"<elided_chars>{{ output | length - 10000 }} characters elided</elided_chars>\n"
|
|
30
|
-
"<output_tail>\n{{ output[-5000:] }}\n</output_tail>\n"
|
|
31
|
-
"{%- endif %}\n"
|
|
32
|
-
"Please try another command and make sure to avoid those requiring interactive input."
|
|
33
|
-
)
|
|
34
|
-
format_error_template: str = "Please always provide EXACTLY ONE action in triple backticks."
|
|
35
|
-
action_observation_template: str = "Observation: {{output}}"
|
|
36
|
-
action_regex: str = r"```bash\s*\n(.*?)\n```"
|
|
37
|
-
step_limit: int = 0
|
|
38
|
-
cost_limit: float = 3.0
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class NonTerminatingException(Exception):
|
|
42
|
-
"""Raised for conditions that can be handled by the agent."""
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class FormatError(NonTerminatingException):
|
|
46
|
-
"""Raised when the LM's output is not in the expected format."""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class ExecutionTimeoutError(NonTerminatingException):
|
|
50
|
-
"""Raised when the action execution timed out."""
|
|
51
|
-
|
|
13
|
+
from minisweagent import Environment, Model, __version__
|
|
14
|
+
from minisweagent.exceptions import InterruptAgentFlow, LimitsExceeded
|
|
15
|
+
from minisweagent.utils.serialize import recursive_merge
|
|
52
16
|
|
|
53
|
-
class TerminatingException(Exception):
|
|
54
|
-
"""Raised for conditions that terminate the agent."""
|
|
55
17
|
|
|
18
|
+
class AgentConfig(BaseModel):
|
|
19
|
+
"""Check the config files in minisweagent/config for example settings."""
|
|
56
20
|
|
|
57
|
-
|
|
58
|
-
"""
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
"""
|
|
21
|
+
system_template: str
|
|
22
|
+
"""Template for the system message (the first message)."""
|
|
23
|
+
instance_template: str
|
|
24
|
+
"""Template for the first user message specifying the task (the second message overall)."""
|
|
25
|
+
step_limit: int = 0
|
|
26
|
+
"""Maximum number of steps the agent can take."""
|
|
27
|
+
cost_limit: float = 3.0
|
|
28
|
+
"""Stop agent after exceeding (!) this cost."""
|
|
29
|
+
output_path: Path | None = None
|
|
30
|
+
"""Save the trajectory to this path."""
|
|
63
31
|
|
|
64
32
|
|
|
65
33
|
class DefaultAgent:
|
|
66
34
|
def __init__(self, model: Model, env: Environment, *, config_class: type = AgentConfig, **kwargs):
|
|
35
|
+
"""See the `AgentConfig` class for permitted keyword arguments."""
|
|
67
36
|
self.config = config_class(**kwargs)
|
|
68
37
|
self.messages: list[dict] = []
|
|
69
38
|
self.model = model
|
|
70
39
|
self.env = env
|
|
71
40
|
self.extra_template_vars = {}
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
41
|
+
self.logger = logging.getLogger("agent")
|
|
42
|
+
self.cost = 0.0
|
|
43
|
+
self.n_calls = 0
|
|
44
|
+
|
|
45
|
+
def get_template_vars(self, **kwargs) -> dict:
|
|
46
|
+
return recursive_merge(
|
|
47
|
+
self.config.model_dump(),
|
|
48
|
+
self.env.get_template_vars(),
|
|
49
|
+
self.model.get_template_vars(),
|
|
50
|
+
{"n_model_calls": self.n_calls, "model_cost": self.cost},
|
|
51
|
+
self.extra_template_vars,
|
|
52
|
+
kwargs,
|
|
77
53
|
)
|
|
78
54
|
|
|
79
|
-
def
|
|
80
|
-
|
|
55
|
+
def _render_template(self, template: str) -> str:
|
|
56
|
+
return Template(template, undefined=StrictUndefined).render(**self.get_template_vars())
|
|
57
|
+
|
|
58
|
+
def add_messages(self, *messages: dict) -> list[dict]:
|
|
59
|
+
self.logger.debug(messages) # set log level to debug to see
|
|
60
|
+
self.messages.extend(messages)
|
|
61
|
+
return list(messages)
|
|
62
|
+
|
|
63
|
+
def handle_uncaught_exception(self, e: Exception) -> list[dict]:
|
|
64
|
+
return self.add_messages(
|
|
65
|
+
self.model.format_message(
|
|
66
|
+
role="exit",
|
|
67
|
+
content=str(e),
|
|
68
|
+
extra={
|
|
69
|
+
"exit_status": type(e).__name__,
|
|
70
|
+
"submission": "",
|
|
71
|
+
"exception_str": str(e),
|
|
72
|
+
"traceback": traceback.format_exc(),
|
|
73
|
+
},
|
|
74
|
+
)
|
|
75
|
+
)
|
|
81
76
|
|
|
82
|
-
def run(self, task: str, **kwargs) ->
|
|
83
|
-
"""Run step() until agent is finished.
|
|
77
|
+
def run(self, task: str = "", **kwargs) -> dict:
|
|
78
|
+
"""Run step() until agent is finished. Returns dictionary with exit_status, submission keys."""
|
|
84
79
|
self.extra_template_vars |= {"task": task, **kwargs}
|
|
85
80
|
self.messages = []
|
|
86
|
-
self.
|
|
87
|
-
|
|
81
|
+
self.add_messages(
|
|
82
|
+
self.model.format_message(role="system", content=self._render_template(self.config.system_template)),
|
|
83
|
+
self.model.format_message(role="user", content=self._render_template(self.config.instance_template)),
|
|
84
|
+
)
|
|
88
85
|
while True:
|
|
89
86
|
try:
|
|
90
87
|
self.step()
|
|
91
|
-
except
|
|
92
|
-
self.
|
|
93
|
-
except
|
|
94
|
-
self.
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
88
|
+
except InterruptAgentFlow as e:
|
|
89
|
+
self.add_messages(*e.messages)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
self.handle_uncaught_exception(e)
|
|
92
|
+
raise
|
|
93
|
+
finally:
|
|
94
|
+
self.save(self.config.output_path)
|
|
95
|
+
if self.messages[-1].get("role") == "exit":
|
|
96
|
+
break
|
|
97
|
+
return self.messages[-1].get("extra", {})
|
|
98
|
+
|
|
99
|
+
def step(self) -> list[dict]:
|
|
100
|
+
"""Query the LM, execute actions."""
|
|
101
|
+
return self.execute_actions(self.query())
|
|
100
102
|
|
|
101
103
|
def query(self) -> dict:
|
|
102
|
-
"""Query the model and return
|
|
103
|
-
if 0 < self.config.step_limit <= self.
|
|
104
|
-
raise LimitsExceeded(
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
"""Execute the action and return the observation."""
|
|
111
|
-
output = self.execute_action(self.parse_action(response))
|
|
112
|
-
observation = self.render_template(self.config.action_observation_template, output=output)
|
|
113
|
-
self.add_message("user", observation)
|
|
114
|
-
return output
|
|
115
|
-
|
|
116
|
-
def parse_action(self, response: dict) -> dict:
|
|
117
|
-
"""Parse the action from the message. Returns the action."""
|
|
118
|
-
actions = re.findall(self.config.action_regex, response["content"], re.DOTALL)
|
|
119
|
-
if len(actions) == 1:
|
|
120
|
-
return {"action": actions[0].strip(), **response}
|
|
121
|
-
raise FormatError(self.render_template(self.config.format_error_template, actions=actions))
|
|
122
|
-
|
|
123
|
-
def execute_action(self, action: dict) -> dict:
|
|
124
|
-
try:
|
|
125
|
-
output = self.env.execute(action["action"])
|
|
126
|
-
except (TimeoutError, subprocess.TimeoutExpired) as e:
|
|
127
|
-
output = e.output.decode("utf-8", errors="replace") if getattr(e, "output", None) else ""
|
|
128
|
-
raise ExecutionTimeoutError(
|
|
129
|
-
self.render_template(self.config.timeout_template, action=action, output=output)
|
|
104
|
+
"""Query the model and return model messages. Override to add hooks."""
|
|
105
|
+
if 0 < self.config.step_limit <= self.n_calls or 0 < self.config.cost_limit <= self.cost:
|
|
106
|
+
raise LimitsExceeded(
|
|
107
|
+
{
|
|
108
|
+
"role": "exit",
|
|
109
|
+
"content": "LimitsExceeded",
|
|
110
|
+
"extra": {"exit_status": "LimitsExceeded", "submission": ""},
|
|
111
|
+
}
|
|
130
112
|
)
|
|
131
|
-
self.
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
113
|
+
self.n_calls += 1
|
|
114
|
+
message = self.model.query(self.messages)
|
|
115
|
+
self.cost += message.get("extra", {}).get("cost", 0.0)
|
|
116
|
+
self.add_messages(message)
|
|
117
|
+
return message
|
|
118
|
+
|
|
119
|
+
def execute_actions(self, message: dict) -> list[dict]:
|
|
120
|
+
"""Execute actions in message, add observation messages, return them."""
|
|
121
|
+
outputs = [self.env.execute(action) for action in message.get("extra", {}).get("actions", [])]
|
|
122
|
+
return self.add_messages(*self.model.format_observation_messages(message, outputs, self.get_template_vars()))
|
|
123
|
+
|
|
124
|
+
def serialize(self, *extra_dicts) -> dict:
|
|
125
|
+
"""Serialize agent state to a json-compatible nested dictionary for saving."""
|
|
126
|
+
last_message = self.messages[-1] if self.messages else {}
|
|
127
|
+
last_extra = last_message.get("extra", {})
|
|
128
|
+
agent_data = {
|
|
129
|
+
"info": {
|
|
130
|
+
"model_stats": {
|
|
131
|
+
"instance_cost": self.cost,
|
|
132
|
+
"api_calls": self.n_calls,
|
|
133
|
+
},
|
|
134
|
+
"config": {
|
|
135
|
+
"agent": self.config.model_dump(mode="json"),
|
|
136
|
+
"agent_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
|
|
137
|
+
},
|
|
138
|
+
"mini_version": __version__,
|
|
139
|
+
"exit_status": last_extra.get("exit_status", ""),
|
|
140
|
+
"submission": last_extra.get("submission", ""),
|
|
141
|
+
},
|
|
142
|
+
"messages": self.messages,
|
|
143
|
+
"trajectory_format": "mini-swe-agent-1.1",
|
|
144
|
+
}
|
|
145
|
+
return recursive_merge(agent_data, self.model.serialize(), self.env.serialize(), *extra_dicts)
|
|
146
|
+
|
|
147
|
+
def save(self, path: Path | None, *extra_dicts) -> dict:
|
|
148
|
+
"""Save the trajectory of the agent to a file if path is given. Returns full serialized data.
|
|
149
|
+
You can pass additional dictionaries with extra data to be (recursively) merged into the output data.
|
|
150
|
+
"""
|
|
151
|
+
data = self.serialize(*extra_dicts)
|
|
152
|
+
if path:
|
|
153
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
154
|
+
path.write_text(json.dumps(data, indent=2))
|
|
155
|
+
return data
|
|
@@ -7,31 +7,45 @@ There are three modes:
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
import re
|
|
10
|
-
from
|
|
11
|
-
from typing import Literal
|
|
10
|
+
from typing import Literal, NoReturn
|
|
12
11
|
|
|
12
|
+
from prompt_toolkit.formatted_text import HTML
|
|
13
13
|
from prompt_toolkit.history import FileHistory
|
|
14
14
|
from prompt_toolkit.shortcuts import PromptSession
|
|
15
15
|
from rich.console import Console
|
|
16
16
|
from rich.rule import Rule
|
|
17
17
|
|
|
18
18
|
from minisweagent import global_config_dir
|
|
19
|
-
from minisweagent.agents.default import AgentConfig, DefaultAgent
|
|
19
|
+
from minisweagent.agents.default import AgentConfig, DefaultAgent
|
|
20
|
+
from minisweagent.exceptions import LimitsExceeded, Submitted, UserInterruption
|
|
21
|
+
from minisweagent.models.utils.content_string import get_content_string
|
|
20
22
|
|
|
21
23
|
console = Console(highlight=False)
|
|
22
|
-
|
|
24
|
+
_history = FileHistory(global_config_dir / "interactive_history.txt")
|
|
25
|
+
_prompt_session = PromptSession(history=_history)
|
|
26
|
+
_multiline_prompt_session = PromptSession(history=_history, multiline=True)
|
|
23
27
|
|
|
24
28
|
|
|
25
|
-
@dataclass
|
|
26
29
|
class InteractiveAgentConfig(AgentConfig):
|
|
27
30
|
mode: Literal["human", "confirm", "yolo"] = "confirm"
|
|
28
31
|
"""Whether to confirm actions."""
|
|
29
|
-
whitelist_actions: list[str] =
|
|
32
|
+
whitelist_actions: list[str] = []
|
|
30
33
|
"""Never confirm actions that match these regular expressions."""
|
|
31
34
|
confirm_exit: bool = True
|
|
32
35
|
"""If the agent wants to finish, do we ask for confirmation from user?"""
|
|
33
36
|
|
|
34
37
|
|
|
38
|
+
def _multiline_prompt() -> str:
|
|
39
|
+
return _multiline_prompt_session.prompt(
|
|
40
|
+
"",
|
|
41
|
+
bottom_toolbar=HTML(
|
|
42
|
+
"Submit message: <b fg='yellow' bg='black'>Esc, then Enter</b> | "
|
|
43
|
+
"Navigate history: <b fg='yellow' bg='black'>Arrow Up/Down</b> | "
|
|
44
|
+
"Search history: <b fg='yellow' bg='black'>Ctrl+R</b>"
|
|
45
|
+
),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
35
49
|
class InteractiveAgent(DefaultAgent):
|
|
36
50
|
_MODE_COMMANDS_MAPPING = {"/u": "human", "/c": "confirm", "/y": "yolo"}
|
|
37
51
|
|
|
@@ -39,28 +53,34 @@ class InteractiveAgent(DefaultAgent):
|
|
|
39
53
|
super().__init__(*args, config_class=config_class, **kwargs)
|
|
40
54
|
self.cost_last_confirmed = 0.0
|
|
41
55
|
|
|
42
|
-
def
|
|
56
|
+
def add_messages(self, *messages: dict) -> list[dict]:
|
|
43
57
|
# Extend supermethod to print messages
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
58
|
+
for msg in messages:
|
|
59
|
+
role, content = msg.get("role") or msg.get("type", "unknown"), get_content_string(msg)
|
|
60
|
+
if role == "assistant":
|
|
61
|
+
console.print(
|
|
62
|
+
f"\n[red][bold]mini-swe-agent[/bold] (step [bold]{self.n_calls}[/bold], [bold]${self.cost:.2f}[/bold]):[/red]\n",
|
|
63
|
+
end="",
|
|
64
|
+
highlight=False,
|
|
65
|
+
)
|
|
66
|
+
else:
|
|
67
|
+
console.print(f"\n[bold green]{role.capitalize()}[/bold green]:\n", end="", highlight=False)
|
|
68
|
+
console.print(content, highlight=False, markup=False)
|
|
69
|
+
return super().add_messages(*messages)
|
|
54
70
|
|
|
55
71
|
def query(self) -> dict:
|
|
56
72
|
# Extend supermethod to handle human mode
|
|
57
73
|
if self.config.mode == "human":
|
|
58
|
-
match command := self.
|
|
59
|
-
case "/y" | "/c":
|
|
74
|
+
match command := self._prompt_and_handle_slash_commands("[bold yellow]>[/bold yellow] "):
|
|
75
|
+
case "/y" | "/c":
|
|
60
76
|
pass
|
|
61
77
|
case _:
|
|
62
|
-
msg = {
|
|
63
|
-
|
|
78
|
+
msg = {
|
|
79
|
+
"role": "user",
|
|
80
|
+
"content": f"User command: \n```bash\n{command}\n```",
|
|
81
|
+
"extra": {"actions": [{"command": command}]},
|
|
82
|
+
}
|
|
83
|
+
self.add_messages(msg)
|
|
64
84
|
return msg
|
|
65
85
|
try:
|
|
66
86
|
with console.status("Waiting for the LM to respond..."):
|
|
@@ -68,86 +88,127 @@ class InteractiveAgent(DefaultAgent):
|
|
|
68
88
|
except LimitsExceeded:
|
|
69
89
|
console.print(
|
|
70
90
|
f"Limits exceeded. Limits: {self.config.step_limit} steps, ${self.config.cost_limit}.\n"
|
|
71
|
-
f"Current spend: {self.
|
|
91
|
+
f"Current spend: {self.n_calls} steps, ${self.cost:.2f}."
|
|
72
92
|
)
|
|
73
93
|
self.config.step_limit = int(input("New step limit: "))
|
|
74
94
|
self.config.cost_limit = float(input("New cost limit: "))
|
|
75
95
|
return super().query()
|
|
76
96
|
|
|
77
|
-
def step(self) -> dict:
|
|
97
|
+
def step(self) -> list[dict]:
|
|
78
98
|
# Override the step method to handle user interruption
|
|
79
99
|
try:
|
|
80
100
|
console.print(Rule())
|
|
81
101
|
return super().step()
|
|
82
102
|
except KeyboardInterrupt:
|
|
83
|
-
|
|
84
|
-
interruption_message = self._prompt_and_handle_special(
|
|
103
|
+
interruption_message = self._prompt_and_handle_slash_commands(
|
|
85
104
|
"\n\n[bold yellow]Interrupted.[/bold yellow] "
|
|
86
105
|
"[green]Type a comment/command[/green] (/h for available commands)"
|
|
87
106
|
"\n[bold yellow]>[/bold yellow] "
|
|
88
107
|
).strip()
|
|
89
108
|
if not interruption_message or interruption_message in self._MODE_COMMANDS_MAPPING:
|
|
90
109
|
interruption_message = "Temporary interruption caught."
|
|
91
|
-
raise
|
|
110
|
+
raise UserInterruption(
|
|
111
|
+
{
|
|
112
|
+
"role": "user",
|
|
113
|
+
"content": f"Interrupted by user: {interruption_message}",
|
|
114
|
+
"extra": {"interrupt_type": "UserInterruption"},
|
|
115
|
+
}
|
|
116
|
+
)
|
|
92
117
|
|
|
93
|
-
def
|
|
94
|
-
# Override
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
118
|
+
def execute_actions(self, message: dict) -> list[dict]:
|
|
119
|
+
# Override to handle user confirmation and confirm_exit, with try/finally to preserve partial outputs
|
|
120
|
+
actions = message.get("extra", {}).get("actions", [])
|
|
121
|
+
commands = [action["command"] for action in actions]
|
|
122
|
+
outputs = []
|
|
123
|
+
try:
|
|
124
|
+
self._ask_confirmation_or_interrupt(commands)
|
|
125
|
+
for action in actions:
|
|
126
|
+
outputs.append(self.env.execute(action))
|
|
127
|
+
except Submitted as e:
|
|
128
|
+
self._check_for_new_task_or_submit(e)
|
|
129
|
+
finally:
|
|
130
|
+
result = self.add_messages(
|
|
131
|
+
*self.model.format_observation_messages(message, outputs, self.get_template_vars())
|
|
132
|
+
)
|
|
133
|
+
return result
|
|
134
|
+
|
|
135
|
+
def _add_observation_messages(self, message: dict, outputs: list[dict]) -> list[dict]:
|
|
136
|
+
return self.add_messages(*self.model.format_observation_messages(message, outputs, self.get_template_vars()))
|
|
137
|
+
|
|
138
|
+
def _check_for_new_task_or_submit(self, e: Submitted) -> NoReturn:
|
|
139
|
+
"""Check if user wants to add a new task or submit."""
|
|
140
|
+
if self.config.confirm_exit:
|
|
141
|
+
message = (
|
|
142
|
+
"[bold yellow]Agent wants to finish.[/bold yellow] "
|
|
143
|
+
"[bold green]Type new task[/bold green] or [red][bold]Esc, then enter[/bold] to quit.[/red]\n"
|
|
144
|
+
"[bold yellow]>[/bold yellow] "
|
|
145
|
+
)
|
|
146
|
+
if new_task := self._prompt_and_handle_slash_commands(message, _multiline=True).strip():
|
|
147
|
+
raise UserInterruption(
|
|
148
|
+
{
|
|
149
|
+
"role": "user",
|
|
150
|
+
"content": f"The user added a new task: {new_task}",
|
|
151
|
+
"extra": {"interrupt_type": "UserNewTask"},
|
|
152
|
+
}
|
|
153
|
+
)
|
|
154
|
+
raise e
|
|
98
155
|
|
|
99
|
-
def
|
|
156
|
+
def _should_ask_confirmation(self, action: str) -> bool:
|
|
100
157
|
return self.config.mode == "confirm" and not any(re.match(r, action) for r in self.config.whitelist_actions)
|
|
101
158
|
|
|
102
|
-
def
|
|
159
|
+
def _ask_confirmation_or_interrupt(self, commands: list[str]) -> None:
|
|
160
|
+
commands_needing_confirmation = [c for c in commands if self._should_ask_confirmation(c)]
|
|
161
|
+
if not commands_needing_confirmation:
|
|
162
|
+
return
|
|
163
|
+
n = len(commands_needing_confirmation)
|
|
103
164
|
prompt = (
|
|
104
|
-
"[bold yellow]Execute?[/
|
|
105
|
-
"
|
|
165
|
+
f"[bold yellow]Execute {n} action(s)?[/] [green][bold]Enter[/] to confirm[/], "
|
|
166
|
+
"[red]type [bold]comment[/] to reject[/], or [blue][bold]/h[/] to show available commands[/]\n"
|
|
106
167
|
"[bold yellow]>[/bold yellow] "
|
|
107
168
|
)
|
|
108
|
-
match user_input := self.
|
|
169
|
+
match user_input := self._prompt_and_handle_slash_commands(prompt).strip():
|
|
109
170
|
case "" | "/y":
|
|
110
171
|
pass # confirmed, do nothing
|
|
111
172
|
case "/u": # Skip execution action and get back to query
|
|
112
|
-
raise
|
|
173
|
+
raise UserInterruption(
|
|
174
|
+
{
|
|
175
|
+
"role": "user",
|
|
176
|
+
"content": "Commands not executed. Switching to human mode",
|
|
177
|
+
"extra": {"interrupt_type": "UserRejection"},
|
|
178
|
+
}
|
|
179
|
+
)
|
|
113
180
|
case _:
|
|
114
|
-
raise
|
|
115
|
-
|
|
181
|
+
raise UserInterruption(
|
|
182
|
+
{
|
|
183
|
+
"role": "user",
|
|
184
|
+
"content": f"Commands not executed. The user rejected your commands with the following message: {user_input}",
|
|
185
|
+
"extra": {"interrupt_type": "UserRejection"},
|
|
186
|
+
}
|
|
116
187
|
)
|
|
117
188
|
|
|
118
|
-
def
|
|
189
|
+
def _prompt_and_handle_slash_commands(self, prompt: str, *, _multiline: bool = False) -> str:
|
|
119
190
|
"""Prompts the user, takes care of /h (followed by requery) and sets the mode. Returns the user input."""
|
|
120
191
|
console.print(prompt, end="")
|
|
121
|
-
|
|
192
|
+
if _multiline:
|
|
193
|
+
return _multiline_prompt()
|
|
194
|
+
user_input = _prompt_session.prompt("")
|
|
195
|
+
if user_input == "/m":
|
|
196
|
+
return self._prompt_and_handle_slash_commands(prompt, _multiline=True)
|
|
122
197
|
if user_input == "/h":
|
|
123
198
|
console.print(
|
|
124
199
|
f"Current mode: [bold green]{self.config.mode}[/bold green]\n"
|
|
125
200
|
f"[bold green]/y[/bold green] to switch to [bold yellow]yolo[/bold yellow] mode (execute LM commands without confirmation)\n"
|
|
126
201
|
f"[bold green]/c[/bold green] to switch to [bold yellow]confirmation[/bold yellow] mode (ask for confirmation before executing LM commands)\n"
|
|
127
202
|
f"[bold green]/u[/bold green] to switch to [bold yellow]human[/bold yellow] mode (execute commands issued by the user)\n"
|
|
203
|
+
f"[bold green]/m[/bold green] to enter multiline comment",
|
|
128
204
|
)
|
|
129
|
-
return self.
|
|
205
|
+
return self._prompt_and_handle_slash_commands(prompt)
|
|
130
206
|
if user_input in self._MODE_COMMANDS_MAPPING:
|
|
131
207
|
if self.config.mode == self._MODE_COMMANDS_MAPPING[user_input]:
|
|
132
|
-
return self.
|
|
208
|
+
return self._prompt_and_handle_slash_commands(
|
|
133
209
|
f"[bold red]Already in {self.config.mode} mode.[/bold red]\n{prompt}"
|
|
134
210
|
)
|
|
135
211
|
self.config.mode = self._MODE_COMMANDS_MAPPING[user_input]
|
|
136
212
|
console.print(f"Switched to [bold green]{self.config.mode}[/bold green] mode.")
|
|
137
213
|
return user_input
|
|
138
214
|
return user_input
|
|
139
|
-
|
|
140
|
-
def has_finished(self, output: dict[str, str]):
|
|
141
|
-
try:
|
|
142
|
-
return super().has_finished(output)
|
|
143
|
-
except Submitted as e:
|
|
144
|
-
if self.config.confirm_exit:
|
|
145
|
-
console.print(
|
|
146
|
-
"[bold green]Agent wants to finish.[/bold green] "
|
|
147
|
-
"[green]Type a comment to give it a new task or press enter to quit.\n"
|
|
148
|
-
"[bold yellow]>[/bold yellow] ",
|
|
149
|
-
end="",
|
|
150
|
-
)
|
|
151
|
-
if new_task := self._prompt_and_handle_special("").strip():
|
|
152
|
-
raise NonTerminatingException(f"The user added a new task: {new_task}")
|
|
153
|
-
raise e
|
minisweagent/config/README.md
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
# Configs
|
|
2
2
|
|
|
3
|
-
* `mini.yaml` - Default config for `mini`/`agents/interactive.py`
|
|
3
|
+
* `mini.yaml` - Default config for `mini`/`agents/interactive.py` agent.
|
|
4
4
|
* `default.yaml` - Default config for the `default.py` agent.
|
|
5
|
-
* `github_issue.yaml` - Config for the `run/github_issue.py` entry point.
|
|
6
5
|
|
|
7
|
-
##
|
|
6
|
+
## Benchmarks
|
|
8
7
|
|
|
9
|
-
* `
|
|
8
|
+
* `benchmarks/swebench.yaml` - Config for the `run/benchmarks/swebench.py` entry point.
|
minisweagent/config/__init__.py
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
"""Configuration files and utilities for mini-SWE-agent."""
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import os
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
6
9
|
builtin_config_dir = Path(__file__).parent
|
|
7
10
|
|
|
8
11
|
|
|
@@ -16,6 +19,7 @@ def get_config_path(config_spec: str | Path) -> Path:
|
|
|
16
19
|
Path(os.getenv("MSWEA_CONFIG_DIR", ".")) / config_spec,
|
|
17
20
|
builtin_config_dir / config_spec,
|
|
18
21
|
builtin_config_dir / "extra" / config_spec,
|
|
22
|
+
builtin_config_dir / "benchmarks" / config_spec,
|
|
19
23
|
]
|
|
20
24
|
for candidate in candidates:
|
|
21
25
|
if candidate.exists():
|
|
@@ -24,4 +28,35 @@ def get_config_path(config_spec: str | Path) -> Path:
|
|
|
24
28
|
raise FileNotFoundError(f"Could not find config file for {config_spec} (tried: {candidates})")
|
|
25
29
|
|
|
26
30
|
|
|
27
|
-
|
|
31
|
+
def _key_value_spec_to_nested_dict(config_spec: str) -> dict:
|
|
32
|
+
"""Interpret key-value specs from the command line.
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
|
|
36
|
+
"model.model_name=anthropic/claude-sonnet-4-5-20250929" ->
|
|
37
|
+
{"model": {"model_name": "anthropic/claude-sonnet-4-5-20250929"}}
|
|
38
|
+
"""
|
|
39
|
+
key, value = config_spec.split("=", 1)
|
|
40
|
+
try:
|
|
41
|
+
value = json.loads(value)
|
|
42
|
+
except json.JSONDecodeError:
|
|
43
|
+
pass
|
|
44
|
+
keys = key.split(".")
|
|
45
|
+
result = {}
|
|
46
|
+
current = result
|
|
47
|
+
for k in keys[:-1]:
|
|
48
|
+
current[k] = {}
|
|
49
|
+
current = current[k]
|
|
50
|
+
current[keys[-1]] = value
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_config_from_spec(config_spec: str | Path) -> dict:
|
|
55
|
+
"""Get a config from a config spec."""
|
|
56
|
+
if isinstance(config_spec, str) and "=" in config_spec:
|
|
57
|
+
return _key_value_spec_to_nested_dict(config_spec)
|
|
58
|
+
path = get_config_path(config_spec)
|
|
59
|
+
return yaml.safe_load(path.read_text())
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
__all__ = ["builtin_config_dir", "get_config_path", "get_config_from_spec", "_key_value_spec_to_nested_dict"]
|