mini-swe-agent 1.17.5__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
  2. mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
  3. mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
  4. minisweagent/__init__.py +19 -26
  5. minisweagent/agents/default.py +128 -113
  6. minisweagent/agents/interactive.py +119 -58
  7. minisweagent/config/README.md +3 -4
  8. minisweagent/config/__init__.py +36 -1
  9. minisweagent/config/benchmarks/swebench.yaml +156 -0
  10. minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
  11. minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
  12. minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
  13. minisweagent/config/default.yaml +24 -21
  14. minisweagent/config/inspector.tcss +42 -0
  15. minisweagent/config/mini.yaml +53 -71
  16. minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
  17. minisweagent/environments/__init__.py +1 -0
  18. minisweagent/environments/docker.py +67 -20
  19. minisweagent/environments/extra/bubblewrap.py +86 -47
  20. minisweagent/environments/extra/swerex_docker.py +53 -20
  21. minisweagent/environments/extra/swerex_modal.py +90 -0
  22. minisweagent/environments/local.py +62 -21
  23. minisweagent/environments/singularity.py +59 -18
  24. minisweagent/exceptions.py +22 -0
  25. minisweagent/models/__init__.py +6 -7
  26. minisweagent/models/extra/roulette.py +20 -17
  27. minisweagent/models/litellm_model.py +90 -44
  28. minisweagent/models/litellm_response_model.py +80 -0
  29. minisweagent/models/litellm_textbased_model.py +45 -0
  30. minisweagent/models/openrouter_model.py +87 -45
  31. minisweagent/models/openrouter_response_model.py +123 -0
  32. minisweagent/models/openrouter_textbased_model.py +76 -0
  33. minisweagent/models/portkey_model.py +84 -42
  34. minisweagent/models/portkey_response_model.py +163 -0
  35. minisweagent/models/requesty_model.py +91 -41
  36. minisweagent/models/test_models.py +246 -19
  37. minisweagent/models/utils/actions_text.py +60 -0
  38. minisweagent/models/utils/actions_toolcall.py +102 -0
  39. minisweagent/models/utils/actions_toolcall_response.py +110 -0
  40. minisweagent/models/utils/anthropic_utils.py +28 -0
  41. minisweagent/models/utils/cache_control.py +15 -2
  42. minisweagent/models/utils/content_string.py +74 -0
  43. minisweagent/models/utils/openai_multimodal.py +50 -0
  44. minisweagent/models/utils/retry.py +25 -0
  45. minisweagent/run/benchmarks/__init__.py +1 -0
  46. minisweagent/run/{extra → benchmarks}/swebench.py +56 -35
  47. minisweagent/run/{extra → benchmarks}/swebench_single.py +36 -26
  48. minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
  49. minisweagent/run/hello_world.py +6 -0
  50. minisweagent/run/mini.py +54 -63
  51. minisweagent/run/utilities/__init__.py +1 -0
  52. minisweagent/run/{extra → utilities}/config.py +2 -0
  53. minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
  54. minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
  55. minisweagent/utils/serialize.py +26 -0
  56. mini_swe_agent-1.17.5.dist-info/RECORD +0 -61
  57. mini_swe_agent-1.17.5.dist-info/entry_points.txt +0 -5
  58. minisweagent/agents/interactive_textual.py +0 -450
  59. minisweagent/config/extra/swebench_roulette.yaml +0 -233
  60. minisweagent/config/mini.tcss +0 -86
  61. minisweagent/models/anthropic.py +0 -35
  62. minisweagent/models/litellm_response_api_model.py +0 -82
  63. minisweagent/models/portkey_response_api_model.py +0 -75
  64. minisweagent/models/utils/key_per_thread.py +0 -20
  65. minisweagent/models/utils/openai_utils.py +0 -41
  66. minisweagent/run/github_issue.py +0 -87
  67. minisweagent/run/utils/__init__.py +0 -0
  68. minisweagent/run/utils/save.py +0 -78
  69. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +0 -0
  70. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
  71. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
  72. /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
  73. /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
@@ -1,140 +1,155 @@
1
- """Basic agent class. See https://mini-swe-agent.com/latest/advanced/control_flow/ for visual explanation."""
1
+ """Basic agent class. See https://mini-swe-agent.com/latest/advanced/control_flow/ for visual explanation
2
+ or https://minimal-agent.com for a tutorial on the basic building principles.
3
+ """
2
4
 
3
- import re
4
- import subprocess
5
- import time
6
- from dataclasses import asdict, dataclass
5
+ import json
6
+ import logging
7
+ import traceback
8
+ from pathlib import Path
7
9
 
8
10
  from jinja2 import StrictUndefined, Template
11
+ from pydantic import BaseModel
9
12
 
10
- from minisweagent import Environment, Model
11
-
12
-
13
- @dataclass
14
- class AgentConfig:
15
- # The default settings are the bare minimum to run the agent. Take a look at the config files for improved settings.
16
- system_template: str = "You are a helpful assistant that can do anything."
17
- instance_template: str = (
18
- "Your task: {{task}}. Please reply with a single shell command in triple backticks. "
19
- "To finish, the first line of the output of the shell command must be 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'."
20
- )
21
- timeout_template: str = (
22
- "The last command <command>{{action['action']}}</command> timed out and has been killed.\n"
23
- "The output of the command was:\n"
24
- "{% if output | length < 10000 -%}\n"
25
- "<output>\n{{output}}\n</output>\n"
26
- "{%- else -%}\n"
27
- "<warning>Output was too long and has been truncated.</warning>\n"
28
- "<output_head>\n{{ output[:5000] }}\n</output_head>\n"
29
- "<elided_chars>{{ output | length - 10000 }} characters elided</elided_chars>\n"
30
- "<output_tail>\n{{ output[-5000:] }}\n</output_tail>\n"
31
- "{%- endif %}\n"
32
- "Please try another command and make sure to avoid those requiring interactive input."
33
- )
34
- format_error_template: str = "Please always provide EXACTLY ONE action in triple backticks."
35
- action_observation_template: str = "Observation: {{output}}"
36
- action_regex: str = r"```bash\s*\n(.*?)\n```"
37
- step_limit: int = 0
38
- cost_limit: float = 3.0
39
-
40
-
41
- class NonTerminatingException(Exception):
42
- """Raised for conditions that can be handled by the agent."""
43
-
44
-
45
- class FormatError(NonTerminatingException):
46
- """Raised when the LM's output is not in the expected format."""
47
-
48
-
49
- class ExecutionTimeoutError(NonTerminatingException):
50
- """Raised when the action execution timed out."""
51
-
13
+ from minisweagent import Environment, Model, __version__
14
+ from minisweagent.exceptions import InterruptAgentFlow, LimitsExceeded
15
+ from minisweagent.utils.serialize import recursive_merge
52
16
 
53
- class TerminatingException(Exception):
54
- """Raised for conditions that terminate the agent."""
55
17
 
18
+ class AgentConfig(BaseModel):
19
+ """Check the config files in minisweagent/config for example settings."""
56
20
 
57
- class Submitted(TerminatingException):
58
- """Raised when the LM declares that the agent has finished its task."""
59
-
60
-
61
- class LimitsExceeded(TerminatingException):
62
- """Raised when the agent has reached its cost or step limit."""
21
+ system_template: str
22
+ """Template for the system message (the first message)."""
23
+ instance_template: str
24
+ """Template for the first user message specifying the task (the second message overall)."""
25
+ step_limit: int = 0
26
+ """Maximum number of steps the agent can take."""
27
+ cost_limit: float = 3.0
28
+ """Stop agent after exceeding (!) this cost."""
29
+ output_path: Path | None = None
30
+ """Save the trajectory to this path."""
63
31
 
64
32
 
65
33
  class DefaultAgent:
66
34
  def __init__(self, model: Model, env: Environment, *, config_class: type = AgentConfig, **kwargs):
35
+ """See the `AgentConfig` class for permitted keyword arguments."""
67
36
  self.config = config_class(**kwargs)
68
37
  self.messages: list[dict] = []
69
38
  self.model = model
70
39
  self.env = env
71
40
  self.extra_template_vars = {}
72
-
73
- def render_template(self, template: str, **kwargs) -> str:
74
- template_vars = asdict(self.config) | self.env.get_template_vars() | self.model.get_template_vars()
75
- return Template(template, undefined=StrictUndefined).render(
76
- **kwargs, **template_vars, **self.extra_template_vars
41
+ self.logger = logging.getLogger("agent")
42
+ self.cost = 0.0
43
+ self.n_calls = 0
44
+
45
+ def get_template_vars(self, **kwargs) -> dict:
46
+ return recursive_merge(
47
+ self.config.model_dump(),
48
+ self.env.get_template_vars(),
49
+ self.model.get_template_vars(),
50
+ {"n_model_calls": self.n_calls, "model_cost": self.cost},
51
+ self.extra_template_vars,
52
+ kwargs,
77
53
  )
78
54
 
79
- def add_message(self, role: str, content: str, **kwargs):
80
- self.messages.append({"role": role, "content": content, "timestamp": time.time(), **kwargs})
55
+ def _render_template(self, template: str) -> str:
56
+ return Template(template, undefined=StrictUndefined).render(**self.get_template_vars())
57
+
58
+ def add_messages(self, *messages: dict) -> list[dict]:
59
+ self.logger.debug(messages) # set log level to debug to see
60
+ self.messages.extend(messages)
61
+ return list(messages)
62
+
63
+ def handle_uncaught_exception(self, e: Exception) -> list[dict]:
64
+ return self.add_messages(
65
+ self.model.format_message(
66
+ role="exit",
67
+ content=str(e),
68
+ extra={
69
+ "exit_status": type(e).__name__,
70
+ "submission": "",
71
+ "exception_str": str(e),
72
+ "traceback": traceback.format_exc(),
73
+ },
74
+ )
75
+ )
81
76
 
82
- def run(self, task: str, **kwargs) -> tuple[str, str]:
83
- """Run step() until agent is finished. Return exit status & message"""
77
+ def run(self, task: str = "", **kwargs) -> dict:
78
+ """Run step() until agent is finished. Returns dictionary with exit_status, submission keys."""
84
79
  self.extra_template_vars |= {"task": task, **kwargs}
85
80
  self.messages = []
86
- self.add_message("system", self.render_template(self.config.system_template))
87
- self.add_message("user", self.render_template(self.config.instance_template))
81
+ self.add_messages(
82
+ self.model.format_message(role="system", content=self._render_template(self.config.system_template)),
83
+ self.model.format_message(role="user", content=self._render_template(self.config.instance_template)),
84
+ )
88
85
  while True:
89
86
  try:
90
87
  self.step()
91
- except NonTerminatingException as e:
92
- self.add_message("user", str(e))
93
- except TerminatingException as e:
94
- self.add_message("user", str(e))
95
- return type(e).__name__, str(e)
96
-
97
- def step(self) -> dict:
98
- """Query the LM, execute the action, return the observation."""
99
- return self.get_observation(self.query())
88
+ except InterruptAgentFlow as e:
89
+ self.add_messages(*e.messages)
90
+ except Exception as e:
91
+ self.handle_uncaught_exception(e)
92
+ raise
93
+ finally:
94
+ self.save(self.config.output_path)
95
+ if self.messages[-1].get("role") == "exit":
96
+ break
97
+ return self.messages[-1].get("extra", {})
98
+
99
+ def step(self) -> list[dict]:
100
+ """Query the LM, execute actions."""
101
+ return self.execute_actions(self.query())
100
102
 
101
103
  def query(self) -> dict:
102
- """Query the model and return the response."""
103
- if 0 < self.config.step_limit <= self.model.n_calls or 0 < self.config.cost_limit <= self.model.cost:
104
- raise LimitsExceeded()
105
- response = self.model.query(self.messages)
106
- self.add_message("assistant", **response)
107
- return response
108
-
109
- def get_observation(self, response: dict) -> dict:
110
- """Execute the action and return the observation."""
111
- output = self.execute_action(self.parse_action(response))
112
- observation = self.render_template(self.config.action_observation_template, output=output)
113
- self.add_message("user", observation)
114
- return output
115
-
116
- def parse_action(self, response: dict) -> dict:
117
- """Parse the action from the message. Returns the action."""
118
- actions = re.findall(self.config.action_regex, response["content"], re.DOTALL)
119
- if len(actions) == 1:
120
- return {"action": actions[0].strip(), **response}
121
- raise FormatError(self.render_template(self.config.format_error_template, actions=actions))
122
-
123
- def execute_action(self, action: dict) -> dict:
124
- try:
125
- output = self.env.execute(action["action"])
126
- except (TimeoutError, subprocess.TimeoutExpired) as e:
127
- output = e.output.decode("utf-8", errors="replace") if getattr(e, "output", None) else ""
128
- raise ExecutionTimeoutError(
129
- self.render_template(self.config.timeout_template, action=action, output=output)
104
+ """Query the model and return model messages. Override to add hooks."""
105
+ if 0 < self.config.step_limit <= self.n_calls or 0 < self.config.cost_limit <= self.cost:
106
+ raise LimitsExceeded(
107
+ {
108
+ "role": "exit",
109
+ "content": "LimitsExceeded",
110
+ "extra": {"exit_status": "LimitsExceeded", "submission": ""},
111
+ }
130
112
  )
131
- self.has_finished(output)
132
- return output | {"action": action["action"]}
133
-
134
- def has_finished(self, output: dict[str, str]):
135
- """Raises Submitted exception with final output if the agent has finished its task."""
136
- lines = output.get("output", "").lstrip().splitlines(keepends=True)
137
- if lines and lines[0].strip() in ["MINI_SWE_AGENT_FINAL_OUTPUT", "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT"]:
138
- if output.get("returncode", 0) != 0:
139
- return # Command failed - let agent see error and retry
140
- raise Submitted("".join(lines[1:]))
113
+ self.n_calls += 1
114
+ message = self.model.query(self.messages)
115
+ self.cost += message.get("extra", {}).get("cost", 0.0)
116
+ self.add_messages(message)
117
+ return message
118
+
119
+ def execute_actions(self, message: dict) -> list[dict]:
120
+ """Execute actions in message, add observation messages, return them."""
121
+ outputs = [self.env.execute(action) for action in message.get("extra", {}).get("actions", [])]
122
+ return self.add_messages(*self.model.format_observation_messages(message, outputs, self.get_template_vars()))
123
+
124
+ def serialize(self, *extra_dicts) -> dict:
125
+ """Serialize agent state to a json-compatible nested dictionary for saving."""
126
+ last_message = self.messages[-1] if self.messages else {}
127
+ last_extra = last_message.get("extra", {})
128
+ agent_data = {
129
+ "info": {
130
+ "model_stats": {
131
+ "instance_cost": self.cost,
132
+ "api_calls": self.n_calls,
133
+ },
134
+ "config": {
135
+ "agent": self.config.model_dump(mode="json"),
136
+ "agent_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
137
+ },
138
+ "mini_version": __version__,
139
+ "exit_status": last_extra.get("exit_status", ""),
140
+ "submission": last_extra.get("submission", ""),
141
+ },
142
+ "messages": self.messages,
143
+ "trajectory_format": "mini-swe-agent-1.1",
144
+ }
145
+ return recursive_merge(agent_data, self.model.serialize(), self.env.serialize(), *extra_dicts)
146
+
147
+ def save(self, path: Path | None, *extra_dicts) -> dict:
148
+ """Save the trajectory of the agent to a file if path is given. Returns full serialized data.
149
+ You can pass additional dictionaries with extra data to be (recursively) merged into the output data.
150
+ """
151
+ data = self.serialize(*extra_dicts)
152
+ if path:
153
+ path.parent.mkdir(parents=True, exist_ok=True)
154
+ path.write_text(json.dumps(data, indent=2))
155
+ return data
@@ -7,31 +7,45 @@ There are three modes:
7
7
  """
8
8
 
9
9
  import re
10
- from dataclasses import dataclass, field
11
- from typing import Literal
10
+ from typing import Literal, NoReturn
12
11
 
12
+ from prompt_toolkit.formatted_text import HTML
13
13
  from prompt_toolkit.history import FileHistory
14
14
  from prompt_toolkit.shortcuts import PromptSession
15
15
  from rich.console import Console
16
16
  from rich.rule import Rule
17
17
 
18
18
  from minisweagent import global_config_dir
19
- from minisweagent.agents.default import AgentConfig, DefaultAgent, LimitsExceeded, NonTerminatingException, Submitted
19
+ from minisweagent.agents.default import AgentConfig, DefaultAgent
20
+ from minisweagent.exceptions import LimitsExceeded, Submitted, UserInterruption
21
+ from minisweagent.models.utils.content_string import get_content_string
20
22
 
21
23
  console = Console(highlight=False)
22
- prompt_session = PromptSession(history=FileHistory(global_config_dir / "interactive_history.txt"))
24
+ _history = FileHistory(global_config_dir / "interactive_history.txt")
25
+ _prompt_session = PromptSession(history=_history)
26
+ _multiline_prompt_session = PromptSession(history=_history, multiline=True)
23
27
 
24
28
 
25
- @dataclass
26
29
  class InteractiveAgentConfig(AgentConfig):
27
30
  mode: Literal["human", "confirm", "yolo"] = "confirm"
28
31
  """Whether to confirm actions."""
29
- whitelist_actions: list[str] = field(default_factory=list)
32
+ whitelist_actions: list[str] = []
30
33
  """Never confirm actions that match these regular expressions."""
31
34
  confirm_exit: bool = True
32
35
  """If the agent wants to finish, do we ask for confirmation from user?"""
33
36
 
34
37
 
38
+ def _multiline_prompt() -> str:
39
+ return _multiline_prompt_session.prompt(
40
+ "",
41
+ bottom_toolbar=HTML(
42
+ "Submit message: <b fg='yellow' bg='black'>Esc, then Enter</b> | "
43
+ "Navigate history: <b fg='yellow' bg='black'>Arrow Up/Down</b> | "
44
+ "Search history: <b fg='yellow' bg='black'>Ctrl+R</b>"
45
+ ),
46
+ )
47
+
48
+
35
49
  class InteractiveAgent(DefaultAgent):
36
50
  _MODE_COMMANDS_MAPPING = {"/u": "human", "/c": "confirm", "/y": "yolo"}
37
51
 
@@ -39,28 +53,34 @@ class InteractiveAgent(DefaultAgent):
39
53
  super().__init__(*args, config_class=config_class, **kwargs)
40
54
  self.cost_last_confirmed = 0.0
41
55
 
42
- def add_message(self, role: str, content: str, **kwargs):
56
+ def add_messages(self, *messages: dict) -> list[dict]:
43
57
  # Extend supermethod to print messages
44
- super().add_message(role, content, **kwargs)
45
- if role == "assistant":
46
- console.print(
47
- f"\n[red][bold]mini-swe-agent[/bold] (step [bold]{self.model.n_calls}[/bold], [bold]${self.model.cost:.2f}[/bold]):[/red]\n",
48
- end="",
49
- highlight=False,
50
- )
51
- else:
52
- console.print(f"\n[bold green]{role.capitalize()}[/bold green]:\n", end="", highlight=False)
53
- console.print(content, highlight=False, markup=False)
58
+ for msg in messages:
59
+ role, content = msg.get("role") or msg.get("type", "unknown"), get_content_string(msg)
60
+ if role == "assistant":
61
+ console.print(
62
+ f"\n[red][bold]mini-swe-agent[/bold] (step [bold]{self.n_calls}[/bold], [bold]${self.cost:.2f}[/bold]):[/red]\n",
63
+ end="",
64
+ highlight=False,
65
+ )
66
+ else:
67
+ console.print(f"\n[bold green]{role.capitalize()}[/bold green]:\n", end="", highlight=False)
68
+ console.print(content, highlight=False, markup=False)
69
+ return super().add_messages(*messages)
54
70
 
55
71
  def query(self) -> dict:
56
72
  # Extend supermethod to handle human mode
57
73
  if self.config.mode == "human":
58
- match command := self._prompt_and_handle_special("[bold yellow]>[/bold yellow] "):
59
- case "/y" | "/c": # Just go to the super query, which queries the LM for the next action
74
+ match command := self._prompt_and_handle_slash_commands("[bold yellow]>[/bold yellow] "):
75
+ case "/y" | "/c":
60
76
  pass
61
77
  case _:
62
- msg = {"content": f"\n```bash\n{command}\n```"}
63
- self.add_message("assistant", msg["content"])
78
+ msg = {
79
+ "role": "user",
80
+ "content": f"User command: \n```bash\n{command}\n```",
81
+ "extra": {"actions": [{"command": command}]},
82
+ }
83
+ self.add_messages(msg)
64
84
  return msg
65
85
  try:
66
86
  with console.status("Waiting for the LM to respond..."):
@@ -68,86 +88,127 @@ class InteractiveAgent(DefaultAgent):
68
88
  except LimitsExceeded:
69
89
  console.print(
70
90
  f"Limits exceeded. Limits: {self.config.step_limit} steps, ${self.config.cost_limit}.\n"
71
- f"Current spend: {self.model.n_calls} steps, ${self.model.cost:.2f}."
91
+ f"Current spend: {self.n_calls} steps, ${self.cost:.2f}."
72
92
  )
73
93
  self.config.step_limit = int(input("New step limit: "))
74
94
  self.config.cost_limit = float(input("New cost limit: "))
75
95
  return super().query()
76
96
 
77
- def step(self) -> dict:
97
+ def step(self) -> list[dict]:
78
98
  # Override the step method to handle user interruption
79
99
  try:
80
100
  console.print(Rule())
81
101
  return super().step()
82
102
  except KeyboardInterrupt:
83
- # We always add a message about the interrupt and then just proceed to the next step
84
- interruption_message = self._prompt_and_handle_special(
103
+ interruption_message = self._prompt_and_handle_slash_commands(
85
104
  "\n\n[bold yellow]Interrupted.[/bold yellow] "
86
105
  "[green]Type a comment/command[/green] (/h for available commands)"
87
106
  "\n[bold yellow]>[/bold yellow] "
88
107
  ).strip()
89
108
  if not interruption_message or interruption_message in self._MODE_COMMANDS_MAPPING:
90
109
  interruption_message = "Temporary interruption caught."
91
- raise NonTerminatingException(f"Interrupted by user: {interruption_message}")
110
+ raise UserInterruption(
111
+ {
112
+ "role": "user",
113
+ "content": f"Interrupted by user: {interruption_message}",
114
+ "extra": {"interrupt_type": "UserInterruption"},
115
+ }
116
+ )
92
117
 
93
- def execute_action(self, action: dict) -> dict:
94
- # Override the execute_action method to handle user confirmation
95
- if self.should_ask_confirmation(action["action"]):
96
- self.ask_confirmation()
97
- return super().execute_action(action)
118
+ def execute_actions(self, message: dict) -> list[dict]:
119
+ # Override to handle user confirmation and confirm_exit, with try/finally to preserve partial outputs
120
+ actions = message.get("extra", {}).get("actions", [])
121
+ commands = [action["command"] for action in actions]
122
+ outputs = []
123
+ try:
124
+ self._ask_confirmation_or_interrupt(commands)
125
+ for action in actions:
126
+ outputs.append(self.env.execute(action))
127
+ except Submitted as e:
128
+ self._check_for_new_task_or_submit(e)
129
+ finally:
130
+ result = self.add_messages(
131
+ *self.model.format_observation_messages(message, outputs, self.get_template_vars())
132
+ )
133
+ return result
134
+
135
+ def _add_observation_messages(self, message: dict, outputs: list[dict]) -> list[dict]:
136
+ return self.add_messages(*self.model.format_observation_messages(message, outputs, self.get_template_vars()))
137
+
138
+ def _check_for_new_task_or_submit(self, e: Submitted) -> NoReturn:
139
+ """Check if user wants to add a new task or submit."""
140
+ if self.config.confirm_exit:
141
+ message = (
142
+ "[bold yellow]Agent wants to finish.[/bold yellow] "
143
+ "[bold green]Type new task[/bold green] or [red][bold]Esc, then enter[/bold] to quit.[/red]\n"
144
+ "[bold yellow]>[/bold yellow] "
145
+ )
146
+ if new_task := self._prompt_and_handle_slash_commands(message, _multiline=True).strip():
147
+ raise UserInterruption(
148
+ {
149
+ "role": "user",
150
+ "content": f"The user added a new task: {new_task}",
151
+ "extra": {"interrupt_type": "UserNewTask"},
152
+ }
153
+ )
154
+ raise e
98
155
 
99
- def should_ask_confirmation(self, action: str) -> bool:
156
+ def _should_ask_confirmation(self, action: str) -> bool:
100
157
  return self.config.mode == "confirm" and not any(re.match(r, action) for r in self.config.whitelist_actions)
101
158
 
102
- def ask_confirmation(self) -> None:
159
+ def _ask_confirmation_or_interrupt(self, commands: list[str]) -> None:
160
+ commands_needing_confirmation = [c for c in commands if self._should_ask_confirmation(c)]
161
+ if not commands_needing_confirmation:
162
+ return
163
+ n = len(commands_needing_confirmation)
103
164
  prompt = (
104
- "[bold yellow]Execute?[/bold yellow] [green][bold]Enter[/bold] to confirm[/green], "
105
- "or [green]Type a comment/command[/green] (/h for available commands)\n"
165
+ f"[bold yellow]Execute {n} action(s)?[/] [green][bold]Enter[/] to confirm[/], "
166
+ "[red]type [bold]comment[/] to reject[/], or [blue][bold]/h[/] to show available commands[/]\n"
106
167
  "[bold yellow]>[/bold yellow] "
107
168
  )
108
- match user_input := self._prompt_and_handle_special(prompt).strip():
169
+ match user_input := self._prompt_and_handle_slash_commands(prompt).strip():
109
170
  case "" | "/y":
110
171
  pass # confirmed, do nothing
111
172
  case "/u": # Skip execution action and get back to query
112
- raise NonTerminatingException("Command not executed. Switching to human mode")
173
+ raise UserInterruption(
174
+ {
175
+ "role": "user",
176
+ "content": "Commands not executed. Switching to human mode",
177
+ "extra": {"interrupt_type": "UserRejection"},
178
+ }
179
+ )
113
180
  case _:
114
- raise NonTerminatingException(
115
- f"Command not executed. The user rejected your command with the following message: {user_input}"
181
+ raise UserInterruption(
182
+ {
183
+ "role": "user",
184
+ "content": f"Commands not executed. The user rejected your commands with the following message: {user_input}",
185
+ "extra": {"interrupt_type": "UserRejection"},
186
+ }
116
187
  )
117
188
 
118
- def _prompt_and_handle_special(self, prompt: str) -> str:
189
+ def _prompt_and_handle_slash_commands(self, prompt: str, *, _multiline: bool = False) -> str:
119
190
  """Prompts the user, takes care of /h (followed by requery) and sets the mode. Returns the user input."""
120
191
  console.print(prompt, end="")
121
- user_input = prompt_session.prompt("")
192
+ if _multiline:
193
+ return _multiline_prompt()
194
+ user_input = _prompt_session.prompt("")
195
+ if user_input == "/m":
196
+ return self._prompt_and_handle_slash_commands(prompt, _multiline=True)
122
197
  if user_input == "/h":
123
198
  console.print(
124
199
  f"Current mode: [bold green]{self.config.mode}[/bold green]\n"
125
200
  f"[bold green]/y[/bold green] to switch to [bold yellow]yolo[/bold yellow] mode (execute LM commands without confirmation)\n"
126
201
  f"[bold green]/c[/bold green] to switch to [bold yellow]confirmation[/bold yellow] mode (ask for confirmation before executing LM commands)\n"
127
202
  f"[bold green]/u[/bold green] to switch to [bold yellow]human[/bold yellow] mode (execute commands issued by the user)\n"
203
+ f"[bold green]/m[/bold green] to enter multiline comment",
128
204
  )
129
- return self._prompt_and_handle_special(prompt)
205
+ return self._prompt_and_handle_slash_commands(prompt)
130
206
  if user_input in self._MODE_COMMANDS_MAPPING:
131
207
  if self.config.mode == self._MODE_COMMANDS_MAPPING[user_input]:
132
- return self._prompt_and_handle_special(
208
+ return self._prompt_and_handle_slash_commands(
133
209
  f"[bold red]Already in {self.config.mode} mode.[/bold red]\n{prompt}"
134
210
  )
135
211
  self.config.mode = self._MODE_COMMANDS_MAPPING[user_input]
136
212
  console.print(f"Switched to [bold green]{self.config.mode}[/bold green] mode.")
137
213
  return user_input
138
214
  return user_input
139
-
140
- def has_finished(self, output: dict[str, str]):
141
- try:
142
- return super().has_finished(output)
143
- except Submitted as e:
144
- if self.config.confirm_exit:
145
- console.print(
146
- "[bold green]Agent wants to finish.[/bold green] "
147
- "[green]Type a comment to give it a new task or press enter to quit.\n"
148
- "[bold yellow]>[/bold yellow] ",
149
- end="",
150
- )
151
- if new_task := self._prompt_and_handle_special("").strip():
152
- raise NonTerminatingException(f"The user added a new task: {new_task}")
153
- raise e
@@ -1,9 +1,8 @@
1
1
  # Configs
2
2
 
3
- * `mini.yaml` - Default config for `mini`/`agents/interactive.py` or `mini -v`/`agents/interactive_textual.py` agent.
3
+ * `mini.yaml` - Default config for `mini`/`agents/interactive.py` agent.
4
4
  * `default.yaml` - Default config for the `default.py` agent.
5
- * `github_issue.yaml` - Config for the `run/github_issue.py` entry point.
6
5
 
7
- ## Extras
6
+ ## Benchmarks
8
7
 
9
- * `extra/swebench.yaml` - Config for the `run/extra/swebench.py` entry point.
8
+ * `benchmarks/swebench.yaml` - Config for the `run/benchmarks/swebench.py` entry point.
@@ -1,8 +1,11 @@
1
1
  """Configuration files and utilities for mini-SWE-agent."""
2
2
 
3
+ import json
3
4
  import os
4
5
  from pathlib import Path
5
6
 
7
+ import yaml
8
+
6
9
  builtin_config_dir = Path(__file__).parent
7
10
 
8
11
 
@@ -16,6 +19,7 @@ def get_config_path(config_spec: str | Path) -> Path:
16
19
  Path(os.getenv("MSWEA_CONFIG_DIR", ".")) / config_spec,
17
20
  builtin_config_dir / config_spec,
18
21
  builtin_config_dir / "extra" / config_spec,
22
+ builtin_config_dir / "benchmarks" / config_spec,
19
23
  ]
20
24
  for candidate in candidates:
21
25
  if candidate.exists():
@@ -24,4 +28,35 @@ def get_config_path(config_spec: str | Path) -> Path:
24
28
  raise FileNotFoundError(f"Could not find config file for {config_spec} (tried: {candidates})")
25
29
 
26
30
 
27
- __all__ = ["builtin_config_dir", "get_config_path"]
31
+ def _key_value_spec_to_nested_dict(config_spec: str) -> dict:
32
+ """Interpret key-value specs from the command line.
33
+
34
+ Example:
35
+
36
+ "model.model_name=anthropic/claude-sonnet-4-5-20250929" ->
37
+ {"model": {"model_name": "anthropic/claude-sonnet-4-5-20250929"}}
38
+ """
39
+ key, value = config_spec.split("=", 1)
40
+ try:
41
+ value = json.loads(value)
42
+ except json.JSONDecodeError:
43
+ pass
44
+ keys = key.split(".")
45
+ result = {}
46
+ current = result
47
+ for k in keys[:-1]:
48
+ current[k] = {}
49
+ current = current[k]
50
+ current[keys[-1]] = value
51
+ return result
52
+
53
+
54
+ def get_config_from_spec(config_spec: str | Path) -> dict:
55
+ """Get a config from a config spec."""
56
+ if isinstance(config_spec, str) and "=" in config_spec:
57
+ return _key_value_spec_to_nested_dict(config_spec)
58
+ path = get_config_path(config_spec)
59
+ return yaml.safe_load(path.read_text())
60
+
61
+
62
+ __all__ = ["builtin_config_dir", "get_config_path", "get_config_from_spec", "_key_value_spec_to_nested_dict"]