mini-swe-agent 1.17.5__py3-none-any.whl → 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/METADATA +36 -52
  2. mini_swe_agent-2.0.0a1.dist-info/RECORD +70 -0
  3. mini_swe_agent-2.0.0a1.dist-info/entry_points.txt +5 -0
  4. minisweagent/__init__.py +19 -26
  5. minisweagent/agents/default.py +128 -113
  6. minisweagent/agents/interactive.py +119 -58
  7. minisweagent/config/README.md +3 -4
  8. minisweagent/config/__init__.py +36 -1
  9. minisweagent/config/benchmarks/swebench.yaml +156 -0
  10. minisweagent/config/{extra/swebench.yaml → benchmarks/swebench_backticks.yaml} +69 -64
  11. minisweagent/config/benchmarks/swebench_modal.yaml +47 -0
  12. minisweagent/config/{extra → benchmarks}/swebench_xml.yaml +73 -70
  13. minisweagent/config/default.yaml +24 -21
  14. minisweagent/config/inspector.tcss +42 -0
  15. minisweagent/config/mini.yaml +53 -71
  16. minisweagent/config/{github_issue.yaml → mini_textbased.yaml} +43 -29
  17. minisweagent/environments/__init__.py +1 -0
  18. minisweagent/environments/docker.py +67 -20
  19. minisweagent/environments/extra/bubblewrap.py +86 -47
  20. minisweagent/environments/extra/swerex_docker.py +53 -20
  21. minisweagent/environments/extra/swerex_modal.py +90 -0
  22. minisweagent/environments/local.py +62 -21
  23. minisweagent/environments/singularity.py +59 -18
  24. minisweagent/exceptions.py +22 -0
  25. minisweagent/models/__init__.py +6 -7
  26. minisweagent/models/extra/roulette.py +20 -17
  27. minisweagent/models/litellm_model.py +90 -44
  28. minisweagent/models/litellm_response_model.py +80 -0
  29. minisweagent/models/litellm_textbased_model.py +45 -0
  30. minisweagent/models/openrouter_model.py +87 -45
  31. minisweagent/models/openrouter_response_model.py +123 -0
  32. minisweagent/models/openrouter_textbased_model.py +76 -0
  33. minisweagent/models/portkey_model.py +84 -42
  34. minisweagent/models/portkey_response_model.py +163 -0
  35. minisweagent/models/requesty_model.py +91 -41
  36. minisweagent/models/test_models.py +246 -19
  37. minisweagent/models/utils/actions_text.py +60 -0
  38. minisweagent/models/utils/actions_toolcall.py +102 -0
  39. minisweagent/models/utils/actions_toolcall_response.py +110 -0
  40. minisweagent/models/utils/anthropic_utils.py +28 -0
  41. minisweagent/models/utils/cache_control.py +15 -2
  42. minisweagent/models/utils/content_string.py +74 -0
  43. minisweagent/models/utils/openai_multimodal.py +50 -0
  44. minisweagent/models/utils/retry.py +25 -0
  45. minisweagent/run/benchmarks/__init__.py +1 -0
  46. minisweagent/run/{extra → benchmarks}/swebench.py +56 -35
  47. minisweagent/run/{extra → benchmarks}/swebench_single.py +36 -26
  48. minisweagent/run/{extra → benchmarks}/utils/batch_progress.py +1 -1
  49. minisweagent/run/hello_world.py +6 -0
  50. minisweagent/run/mini.py +54 -63
  51. minisweagent/run/utilities/__init__.py +1 -0
  52. minisweagent/run/{extra → utilities}/config.py +2 -0
  53. minisweagent/run/{inspector.py → utilities/inspector.py} +90 -11
  54. minisweagent/run/{mini_extra.py → utilities/mini_extra.py} +9 -5
  55. minisweagent/utils/serialize.py +26 -0
  56. mini_swe_agent-1.17.5.dist-info/RECORD +0 -61
  57. mini_swe_agent-1.17.5.dist-info/entry_points.txt +0 -5
  58. minisweagent/agents/interactive_textual.py +0 -450
  59. minisweagent/config/extra/swebench_roulette.yaml +0 -233
  60. minisweagent/config/mini.tcss +0 -86
  61. minisweagent/models/anthropic.py +0 -35
  62. minisweagent/models/litellm_response_api_model.py +0 -82
  63. minisweagent/models/portkey_response_api_model.py +0 -75
  64. minisweagent/models/utils/key_per_thread.py +0 -20
  65. minisweagent/models/utils/openai_utils.py +0 -41
  66. minisweagent/run/github_issue.py +0 -87
  67. minisweagent/run/utils/__init__.py +0 -0
  68. minisweagent/run/utils/save.py +0 -78
  69. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/WHEEL +0 -0
  70. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/licenses/LICENSE.md +0 -0
  71. {mini_swe_agent-1.17.5.dist-info → mini_swe_agent-2.0.0a1.dist-info}/top_level.txt +0 -0
  72. /minisweagent/config/{extra → benchmarks}/__init__.py +0 -0
  73. /minisweagent/run/{extra → benchmarks}/utils/__init__.py +0 -0
@@ -1,450 +0,0 @@
1
- """
2
- Extension of the `default.py` agent that uses Textual for an interactive TUI.
3
- For a simpler version of an interactive UI that does not require threading and more, see `interactive.py`.
4
- """
5
-
6
- import logging
7
- import os
8
- import re
9
- import threading
10
- import time
11
- import traceback
12
- from collections.abc import Iterable
13
- from dataclasses import dataclass, field
14
- from pathlib import Path
15
- from typing import Literal
16
-
17
- from rich.spinner import Spinner
18
- from rich.text import Text
19
- from textual.app import App, ComposeResult, SystemCommand
20
- from textual.binding import Binding
21
- from textual.containers import Container, Vertical, VerticalScroll
22
- from textual.css.query import NoMatches
23
- from textual.events import Key
24
- from textual.screen import Screen
25
- from textual.widgets import Footer, Header, Input, Static, TextArea
26
-
27
- from minisweagent.agents.default import AgentConfig, DefaultAgent, NonTerminatingException, Submitted
28
-
29
-
30
- @dataclass
31
- class TextualAgentConfig(AgentConfig):
32
- mode: Literal["confirm", "yolo"] = "confirm"
33
- """Mode for action execution: 'confirm' requires user confirmation, 'yolo' executes immediately."""
34
- whitelist_actions: list[str] = field(default_factory=list)
35
- """Never confirm actions that match these regular expressions."""
36
- confirm_exit: bool = True
37
- """If the agent wants to finish, do we ask for confirmation from user?"""
38
-
39
-
40
- class _TextualAgent(DefaultAgent):
41
- def __init__(self, app: "TextualAgent", *args, **kwargs):
42
- """Connects the DefaultAgent to the TextualApp."""
43
- self.app = app
44
- super().__init__(*args, config_class=TextualAgentConfig, **kwargs)
45
- self._current_action_from_human = False
46
-
47
- def add_message(self, role: str, content: str, **kwargs):
48
- super().add_message(role, content, **kwargs)
49
- if self.app.agent_state != "UNINITIALIZED":
50
- self.app.call_from_thread(self.app.on_message_added)
51
-
52
- def query(self) -> dict:
53
- if self.config.mode == "human":
54
- human_input = self.app.input_container.request_input("Enter your command:")
55
- self._current_action_from_human = True
56
- msg = {"content": f"\n```bash\n{human_input}\n```"}
57
- self.add_message("assistant", msg["content"])
58
- return msg
59
- self._current_action_from_human = False
60
- return super().query()
61
-
62
- def run(self, task: str, **kwargs) -> tuple[str, str]:
63
- try:
64
- exit_status, result = super().run(task, **kwargs)
65
- except Exception as e:
66
- result = str(e)
67
- self.app.call_from_thread(self.app.action_quit)
68
- print(traceback.format_exc())
69
- return "ERROR", result
70
- else:
71
- self.app.call_from_thread(self.app.on_agent_finished, exit_status, result)
72
- self.app.call_from_thread(self.app.action_quit)
73
- return exit_status, result
74
-
75
- def execute_action(self, action: dict) -> dict:
76
- if self.config.mode == "human" and not self._current_action_from_human: # threading, grrrrr
77
- raise NonTerminatingException("Command not executed because user switched to manual mode.")
78
- if (
79
- self.config.mode == "confirm"
80
- and action["action"].strip()
81
- and not any(re.match(r, action["action"]) for r in self.config.whitelist_actions)
82
- ):
83
- result = self.app.input_container.request_input("Press ENTER to confirm or provide rejection reason")
84
- if result: # Non-empty string means rejection
85
- raise NonTerminatingException(f"Command not executed: {result}")
86
- return super().execute_action(action)
87
-
88
- def has_finished(self, output: dict[str, str]):
89
- try:
90
- return super().has_finished(output)
91
- except Submitted as e:
92
- if self.config.confirm_exit:
93
- if new_task := self.app.input_container.request_input(
94
- "[bold green]Agent wants to finish.[/bold green] "
95
- "[green]Type a comment to give it a new task or press enter to quit.\n"
96
- ).strip():
97
- raise NonTerminatingException(f"The user added a new task: {new_task}")
98
- raise e
99
-
100
-
101
- class AddLogEmitCallback(logging.Handler):
102
- def __init__(self, callback):
103
- """Custom log handler that forwards messages via callback."""
104
- super().__init__()
105
- self.callback = callback
106
-
107
- def emit(self, record: logging.LogRecord):
108
- self.callback(record) # type: ignore[attr-defined]
109
-
110
-
111
- def _messages_to_steps(messages: list[dict]) -> list[list[dict]]:
112
- """Group messages into "pages" as shown by the UI."""
113
- steps = []
114
- current_step = []
115
- for message in messages:
116
- current_step.append(message)
117
- if message["role"] == "user":
118
- steps.append(current_step)
119
- current_step = []
120
- if current_step:
121
- steps.append(current_step)
122
- return steps
123
-
124
-
125
- class SmartInputContainer(Container):
126
- def __init__(self, app: "TextualAgent"):
127
- """Smart input container supporting single-line and multi-line input modes."""
128
- super().__init__(classes="smart-input-container")
129
- self._app = app
130
- self._multiline_mode = False
131
- self.can_focus = True
132
- self.display = False
133
-
134
- self.pending_prompt: str | None = None
135
- self._input_event = threading.Event()
136
- self._input_result: str | None = None
137
-
138
- self._header_display = Static(id="input-header-display", classes="message-header input-request-header")
139
- self._hint_text = Static(classes="hint-text")
140
- self._single_input = Input(placeholder="Type your input...")
141
- self._multi_input = TextArea(show_line_numbers=False, classes="multi-input")
142
- self._input_elements_container = Vertical(
143
- self._header_display,
144
- self._hint_text,
145
- self._single_input,
146
- self._multi_input,
147
- classes="message-container",
148
- )
149
-
150
- def compose(self) -> ComposeResult:
151
- yield self._input_elements_container
152
-
153
- def on_mount(self) -> None:
154
- """Initialize the widget state."""
155
- self._multi_input.display = False
156
- self._update_mode_display()
157
-
158
- def on_focus(self) -> None:
159
- """Called when the container gains focus."""
160
- if self._multiline_mode:
161
- self._multi_input.focus()
162
- else:
163
- self._single_input.focus()
164
-
165
- def request_input(self, prompt: str) -> str:
166
- """Request input from user. Returns input text (empty string if confirmed without reason)."""
167
- self._input_event.clear()
168
- self._input_result = None
169
- self.pending_prompt = prompt
170
- self._header_display.update(prompt)
171
- self._update_mode_display()
172
- self._app.call_from_thread(self._app.update_content)
173
- self._input_event.wait()
174
- return self._input_result or ""
175
-
176
- def _complete_input(self, input_text: str):
177
- """Internal method to complete the input process."""
178
- self._input_result = input_text
179
- self.pending_prompt = None
180
- self.display = False
181
- self._single_input.value = ""
182
- self._multi_input.text = ""
183
- self._multiline_mode = False
184
- self._update_mode_display()
185
- self._app.agent_state = "RUNNING"
186
- self._app.update_content()
187
- # Reset scroll position to bottom since input container disappearing changes layout
188
- # somehow scroll_to doesn't work.
189
- self._app._vscroll.scroll_y = 0
190
- self._input_event.set()
191
-
192
- def action_toggle_mode(self) -> None:
193
- """Switch from single-line to multi-line mode (one-way only)."""
194
- if self.pending_prompt is None or self._multiline_mode:
195
- return
196
-
197
- self._multiline_mode = True
198
- self._update_mode_display()
199
- self.on_focus()
200
-
201
- def _update_mode_display(self) -> None:
202
- """Update the display based on current mode."""
203
- if self._multiline_mode:
204
- self._multi_input.text = self._single_input.value
205
- self._single_input.display = False
206
- self._multi_input.display = True
207
- self._hint_text.update(
208
- "[reverse][bold][$accent] Ctrl+D [/][/][/] to submit, [reverse][bold][$accent] Tab [/][/][/] to switch focus with other controls"
209
- )
210
- else:
211
- self._hint_text.update(
212
- "[reverse][bold][$accent] Enter [/][/][/] to submit, [reverse][bold][$accent] Ctrl+T [/][/][/] to switch to multi-line input, [reverse][bold][$accent] Tab [/][/][/] to switch focus with other controls",
213
- )
214
- self._multi_input.display = False
215
- self._single_input.display = True
216
-
217
- def on_input_submitted(self, event: Input.Submitted) -> None:
218
- """Handle single-line input submission."""
219
- if not self._multiline_mode:
220
- text = event.input.value.strip()
221
- self._complete_input(text)
222
-
223
- def on_key(self, event: Key) -> None:
224
- """Handle key events."""
225
- if event.key == "ctrl+t" and not self._multiline_mode:
226
- event.prevent_default()
227
- self.action_toggle_mode()
228
- return
229
-
230
- if self._multiline_mode and event.key == "ctrl+d":
231
- event.prevent_default()
232
- self._complete_input(self._multi_input.text.strip())
233
- return
234
-
235
- if event.key == "escape":
236
- event.prevent_default()
237
- self.can_focus = False
238
- self._app.set_focus(None)
239
- return
240
-
241
-
242
- class TextualAgent(App):
243
- BINDINGS = [
244
- Binding("right,l", "next_step", "Step++", tooltip="Show next step of the agent"),
245
- Binding("left,h", "previous_step", "Step--", tooltip="Show previous step of the agent"),
246
- Binding("0", "first_step", "Step=0", tooltip="Show first step of the agent", show=False),
247
- Binding("$", "last_step", "Step=-1", tooltip="Show last step of the agent", show=False),
248
- Binding("j,down", "scroll_down", "Scroll down", show=False),
249
- Binding("k,up", "scroll_up", "Scroll up", show=False),
250
- Binding("q,ctrl+q", "quit", "Quit", tooltip="Quit the agent"),
251
- Binding("y,ctrl+y", "yolo", "YOLO mode", tooltip="Switch to YOLO Mode (LM actions will execute immediately)"),
252
- Binding(
253
- "c",
254
- "confirm",
255
- "CONFIRM mode",
256
- tooltip="Switch to Confirm Mode (LM proposes commands and you confirm/reject them)",
257
- ),
258
- Binding("u,ctrl+u", "human", "HUMAN mode", tooltip="Switch to Human Mode (you can now type commands directly)"),
259
- Binding("f1,question_mark", "toggle_help_panel", "Help", tooltip="Show help"),
260
- ]
261
-
262
- def __init__(self, model, env, **kwargs):
263
- css_path = os.environ.get("MSWEA_MINI_STYLE_PATH", str(Path(__file__).parent.parent / "config" / "mini.tcss"))
264
- self.__class__.CSS = Path(css_path).read_text()
265
- super().__init__()
266
- self.agent_state = "UNINITIALIZED"
267
- self.agent = _TextualAgent(self, model=model, env=env, **kwargs)
268
- self._i_step = 0
269
- self.n_steps = 1
270
- self.input_container = SmartInputContainer(self)
271
- self.log_handler = AddLogEmitCallback(lambda record: self.call_from_thread(self.on_log_message_emitted, record))
272
- logging.getLogger().addHandler(self.log_handler)
273
- self._spinner = Spinner("dots")
274
- self.exit_status: str = "ExitStatusUnset"
275
- self.result: str = ""
276
-
277
- self._vscroll = VerticalScroll()
278
-
279
- def run(self, task: str, **kwargs) -> tuple[str, str]:
280
- threading.Thread(target=lambda: self.agent.run(task, **kwargs), daemon=True).start()
281
- super().run()
282
- return self.exit_status, self.result
283
-
284
- # --- Basics ---
285
-
286
- @property
287
- def config(self):
288
- return self.agent.config
289
-
290
- @property
291
- def i_step(self) -> int:
292
- """Current step index."""
293
- return self._i_step
294
-
295
- @i_step.setter
296
- def i_step(self, value: int) -> None:
297
- """Set current step index, automatically clamping to valid bounds."""
298
- if value != self._i_step:
299
- self._i_step = max(0, min(value, self.n_steps - 1))
300
- self._vscroll.scroll_to(y=0, animate=False)
301
- self.update_content()
302
-
303
- def compose(self) -> ComposeResult:
304
- yield Header()
305
- with Container(id="main"):
306
- with self._vscroll:
307
- with Vertical(id="content"):
308
- pass
309
- yield self.input_container
310
- yield Footer()
311
-
312
- def on_mount(self) -> None:
313
- self.agent_state = "RUNNING"
314
- self.update_content()
315
- self.set_interval(1 / 8, self._update_headers)
316
-
317
- @property
318
- def messages(self) -> list[dict]:
319
- return self.agent.messages
320
-
321
- @property
322
- def model(self):
323
- return self.agent.model
324
-
325
- @property
326
- def env(self):
327
- return self.agent.env
328
-
329
- # --- Reacting to events ---
330
-
331
- def on_message_added(self) -> None:
332
- auto_follow = self.i_step == self.n_steps - 1 and self._vscroll.scroll_y <= 1
333
- self.n_steps = len(_messages_to_steps(self.agent.messages))
334
- self.update_content()
335
- if auto_follow:
336
- self.action_last_step()
337
-
338
- def on_log_message_emitted(self, record: logging.LogRecord) -> None:
339
- """Handle log messages of warning level or higher by showing them as notifications."""
340
- if record.levelno >= logging.WARNING:
341
- self.notify(f"[{record.levelname}] {record.getMessage()}", severity="warning")
342
-
343
- def on_unmount(self) -> None:
344
- """Clean up the log handler when the app shuts down."""
345
- if hasattr(self, "log_handler"):
346
- logging.getLogger().removeHandler(self.log_handler)
347
-
348
- def on_agent_finished(self, exit_status: str, result: str):
349
- self.agent_state = "STOPPED"
350
- self.notify(f"Agent finished with status: {exit_status}")
351
- self.exit_status = exit_status
352
- self.result = result
353
- self.update_content()
354
-
355
- # --- UI update logic ---
356
-
357
- def update_content(self) -> None:
358
- container = self.query_one("#content", Vertical)
359
- container.remove_children()
360
- items = _messages_to_steps(self.agent.messages)
361
-
362
- if not items:
363
- container.mount(Static("Waiting for agent to start..."))
364
- return
365
-
366
- for message in items[self.i_step]:
367
- if isinstance(message["content"], list):
368
- content_str = "\n".join([item["text"] for item in message["content"]])
369
- else:
370
- content_str = str(message["content"])
371
- message_container = Vertical(classes="message-container")
372
- container.mount(message_container)
373
- role = message["role"].replace("assistant", "mini-swe-agent")
374
- message_container.mount(Static(role.upper(), classes="message-header"))
375
- message_container.mount(Static(Text(content_str, no_wrap=False), classes="message-content"))
376
-
377
- if self.input_container.pending_prompt is not None:
378
- self.agent_state = "AWAITING_INPUT"
379
- self.input_container.display = self.input_container.pending_prompt is not None and self.i_step == len(items) - 1
380
- if self.input_container.display:
381
- self.input_container.on_focus()
382
-
383
- self._update_headers()
384
- self.refresh()
385
-
386
- def _update_headers(self) -> None:
387
- """Update just the title with current state and spinner if needed."""
388
- status_text = self.agent_state
389
- if self.agent_state == "RUNNING":
390
- spinner_frame = str(self._spinner.render(time.time())).strip()
391
- status_text = f"{self.agent_state} {spinner_frame}"
392
- self.title = f"Step {self.i_step + 1}/{self.n_steps} - {status_text} - Cost: ${self.agent.model.cost:.2f}"
393
- try:
394
- self.query_one("Header").set_class(self.agent_state == "RUNNING", "running")
395
- except NoMatches: # might be called when shutting down
396
- pass
397
-
398
- # --- Other textual overrides ---
399
-
400
- def get_system_commands(self, screen: Screen) -> Iterable[SystemCommand]:
401
- # Add to palette
402
- yield from super().get_system_commands(screen)
403
- for binding in self.BINDINGS:
404
- description = f"{binding.description} (shortcut {' OR '.join(binding.key.split(','))})" # type: ignore[attr-defined]
405
- action_method = getattr(self, f"action_{binding.action}") # type: ignore[attr-defined]
406
- yield SystemCommand(description, binding.tooltip, action_method) # type: ignore[attr-defined]
407
-
408
- # --- Textual bindings ---
409
-
410
- def action_yolo(self):
411
- self.agent.config.mode = "yolo"
412
- if self.input_container.pending_prompt is not None:
413
- self.input_container._complete_input("") # accept
414
- self.notify("YOLO mode enabled - LM actions will execute immediately")
415
-
416
- def action_human(self):
417
- if self.agent.config.mode == "confirm" and self.input_container.pending_prompt is not None:
418
- self.input_container._complete_input("User switched to manual mode, this command will be ignored")
419
- self.agent.config.mode = "human"
420
- self.notify("Human mode enabled - you can now type commands directly")
421
-
422
- def action_confirm(self):
423
- if self.agent.config.mode == "human" and self.input_container.pending_prompt is not None:
424
- self.input_container._complete_input("") # just submit blank action
425
- self.agent.config.mode = "confirm"
426
- self.notify("Confirm mode enabled - LM proposes commands and you confirm/reject them")
427
-
428
- def action_next_step(self) -> None:
429
- self.i_step += 1
430
-
431
- def action_previous_step(self) -> None:
432
- self.i_step -= 1
433
-
434
- def action_first_step(self) -> None:
435
- self.i_step = 0
436
-
437
- def action_last_step(self) -> None:
438
- self.i_step = self.n_steps - 1
439
-
440
- def action_scroll_down(self) -> None:
441
- self._vscroll.scroll_to(y=self._vscroll.scroll_target_y + 15)
442
-
443
- def action_scroll_up(self) -> None:
444
- self._vscroll.scroll_to(y=self._vscroll.scroll_target_y - 15)
445
-
446
- def action_toggle_help_panel(self) -> None:
447
- if self.query("HelpPanel"):
448
- self.action_hide_help_panel()
449
- else:
450
- self.action_show_help_panel()
@@ -1,233 +0,0 @@
1
- agent:
2
- system_template: |
3
- You are a helpful assistant that can interact multiple times with a computer shell to solve programming tasks.
4
- Your response must contain exactly ONE bash code block with ONE command (or commands connected with && or ||).
5
-
6
- Include a THOUGHT section before your command where you explain your reasoning process.
7
- Format your response as shown in <format_example>.
8
-
9
- <format_example>
10
- THOUGHT: Your reasoning and analysis here
11
-
12
- ```bash
13
- your_command_here
14
- ```
15
- </format_example>
16
-
17
- Failure to follow these rules will cause your response to be rejected.
18
- instance_template: |
19
- <pr_description>
20
- Consider the following PR description:
21
- {{task}}
22
- </pr_description>
23
-
24
- <instructions>
25
- # Task Instructions
26
-
27
- ## Overview
28
- You're a software engineer interacting continuously with a computer by submitting commands.
29
- You'll be helping implement necessary changes to meet requirements in the PR description.
30
- Your task is specifically to make changes to non-test files in the current directory in order to fix the issue described in the PR description in a way that is general and consistent with the codebase.
31
-
32
- IMPORTANT: This is an interactive process where you will think and issue ONE command, see its result, then think and issue your next command.
33
-
34
- For each response:
35
- 1. Include a THOUGHT section explaining your reasoning and what you're trying to accomplish
36
- 2. Provide exactly ONE bash command to execute
37
-
38
- ## Important Boundaries
39
- - MODIFY: Regular source code files in /testbed (this is the working directory for all your subsequent commands)
40
- - DO NOT MODIFY: Tests, configuration files (pyproject.toml, setup.cfg, etc.)
41
-
42
- ## Recommended Workflow
43
- 1. Analyze the codebase by finding and reading relevant files
44
- 2. Create a script to reproduce the issue
45
- 3. Edit the source code to resolve the issue
46
- 4. Verify your fix works by running your script again
47
- 5. Test edge cases to ensure your fix is robust
48
-
49
- ## Command Execution Rules
50
- You are operating in an environment where
51
- 1. You write a single command
52
- 2. The system executes that command in a subshell
53
- 3. You see the result
54
- 4. You write your next command
55
-
56
- Each response should include:
57
- 1. A **THOUGHT** section where you explain your reasoning and plan
58
- 2. A single bash code block with your command
59
-
60
- Format your responses like this:
61
-
62
- <format_example>
63
- THOUGHT: Here I explain my reasoning process, analysis of the current situation,
64
- and what I'm trying to accomplish with the command below.
65
-
66
- ```bash
67
- your_command_here
68
- ```
69
- </format_example>
70
-
71
- Commands must be specified in a single bash code block:
72
-
73
- ```bash
74
- your_command_here
75
- ```
76
-
77
- **CRITICAL REQUIREMENTS:**
78
- - Your response SHOULD include a THOUGHT section explaining your reasoning
79
- - Your response MUST include EXACTLY ONE bash code block
80
- - This bash block MUST contain EXACTLY ONE command (or a set of commands connected with && or ||)
81
- - If you include zero or multiple bash blocks, or no command at all, YOUR RESPONSE WILL FAIL
82
- - Do NOT try to run multiple independent commands in separate blocks in one response
83
- - Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
84
- - However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
85
-
86
- Example of a CORRECT response:
87
- <example_response>
88
- THOUGHT: I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
89
-
90
- ```bash
91
- ls -la
92
- ```
93
- </example_response>
94
-
95
- Example of an INCORRECT response:
96
- <example_response>
97
- THOUGHT: I need to examine the codebase and then look at a specific file. I'll run multiple commands to do this.
98
-
99
- ```bash
100
- ls -la
101
- ```
102
-
103
- Now I'll read the file:
104
-
105
- ```bash
106
- cat file.txt
107
- ```
108
- </example_response>
109
-
110
- If you need to run multiple commands, either:
111
- 1. Combine them in one block using && or ||
112
- ```bash
113
- command1 && command2 || echo "Error occurred"
114
- ```
115
-
116
- 2. Wait for the first command to complete, see its output, then issue the next command in your following response.
117
-
118
- ## Environment Details
119
- - You have a full Linux shell environment
120
- - Always use non-interactive flags (-y, -f) for commands
121
- - Avoid interactive tools like vi, nano, or any that require user input
122
- - If a command isn't available, you can install it
123
-
124
- ## Useful Command Examples
125
-
126
- ### Create a new file:
127
- ```bash
128
- cat <<'EOF' > newfile.py
129
- import numpy as np
130
- hello = "world"
131
- print(hello)
132
- EOF
133
- ```
134
-
135
- ### Edit files with sed:
136
- ```bash
137
- # Replace all occurrences
138
- sed -i 's/old_string/new_string/g' filename.py
139
-
140
- # Replace only first occurrence
141
- sed -i 's/old_string/new_string/' filename.py
142
-
143
- # Replace first occurrence on line 1
144
- sed -i '1s/old_string/new_string/' filename.py
145
-
146
- # Replace all occurrences in lines 1-10
147
- sed -i '1,10s/old_string/new_string/g' filename.py
148
- ```
149
-
150
- ### View file content:
151
- ```bash
152
- # View specific lines with numbers
153
- nl -ba filename.py | sed -n '10,20p'
154
- ```
155
-
156
- ### Any other command you want to run
157
- ```bash
158
- anything
159
- ```
160
-
161
- ## Submission
162
- When you've completed your work (reading, editing, testing), and cannot make further progress
163
- issue exactly the following command:
164
-
165
- ```bash
166
- echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT && git add -A && git diff --cached
167
- ```
168
-
169
- This command will submit your work.
170
- You cannot continue working (reading, editing, testing) in any way on this task after submitting.
171
- </instructions>
172
- action_observation_template: |
173
- <returncode>{{output.returncode}}</returncode>
174
- {% if output.output | length < 10000 -%}
175
- <output>
176
- {{ output.output -}}
177
- </output>
178
- {%- else -%}
179
- <warning>
180
- The output of your last command was too long.
181
- Please try a different command that produces less output.
182
- If you're looking at a file you can try use head, tail or sed to view a smaller number of lines selectively.
183
- If you're using grep or find and it produced too much output, you can use a more selective search pattern.
184
- If you really need to see something from the full command's output, you can redirect output to a file and then search in that file.
185
- </warning>
186
- {%- set elided_chars = output.output | length - 10000 -%}
187
- <output_head>
188
- {{ output.output[:5000] }}
189
- </output_head>
190
- <elided_chars>
191
- {{ elided_chars }} characters elided
192
- </elided_chars>
193
- <output_tail>
194
- {{ output.output[-5000:] }}
195
- </output_tail>
196
- {%- endif -%}
197
- format_error_template: |
198
- Please always provide EXACTLY ONE action in triple backticks, found {{actions|length}} actions.
199
-
200
- Please format your action in triple backticks as shown in <response_example>.
201
-
202
- <response_example>
203
- Here are some thoughts about why you want to perform the action.
204
-
205
- ```bash
206
- <action>
207
- ```
208
- </response_example>
209
-
210
- If you have completed your assignment, please consult the first message about how to
211
- submit your solution (you will not be able to continue working on this task after that).
212
- step_limit: 250
213
- cost_limit: 3.
214
-
215
- environment:
216
- cwd: "/testbed"
217
- timeout: 60
218
- env:
219
- PAGER: cat
220
- MANPAGER: cat
221
- LESS: -R
222
- PIP_PROGRESS_BAR: 'off'
223
- TQDM_DISABLE: '1'
224
- environment_class: docker
225
-
226
- model:
227
- model_name: "roulette"
228
- model_class: "minisweagent.models.extra.roulette.RouletteModel"
229
- model_kwargs:
230
- - model_name: "anthropic/claude-sonnet-4-5-20250929"
231
- model_kwargs:
232
- temperature: 0.
233
- - model_name: "gpt-5"