mini-swe-agent 1.8.0__py3-none-any.whl → 1.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mini-swe-agent
3
- Version: 1.8.0
3
+ Version: 1.9.0
4
4
  Summary: Nano SWE Agent - A simple AI software engineering agent
5
5
  Author-email: Kilian Lieret <kilian.lieret@posteo.de>, "Carlos E. Jimenez" <carlosej@princeton.edu>
6
6
  License: MIT License
@@ -50,7 +50,7 @@ Requires-Dist: textual
50
50
  Requires-Dist: prompt_toolkit
51
51
  Provides-Extra: full
52
52
  Requires-Dist: mini-swe-agent[dev]; extra == "full"
53
- Requires-Dist: swe-rex; extra == "full"
53
+ Requires-Dist: swe-rex>=1.4.0; extra == "full"
54
54
  Provides-Extra: dev
55
55
  Requires-Dist: datasets; extra == "dev"
56
56
  Requires-Dist: pytest; extra == "dev"
@@ -1,9 +1,9 @@
1
- mini_swe_agent-1.8.0.dist-info/licenses/LICENSE.md,sha256=D3luWPkdHAe7LBsdD4vzqDAXw6Xewb3G-uczss0uh1s,1094
2
- minisweagent/__init__.py,sha256=uHro6Ebka9Iwr8HmO6GaEvNEuwJh_JKSd_54XNw445c,1797
1
+ mini_swe_agent-1.9.0.dist-info/licenses/LICENSE.md,sha256=D3luWPkdHAe7LBsdD4vzqDAXw6Xewb3G-uczss0uh1s,1094
2
+ minisweagent/__init__.py,sha256=qO_mqMrgTR4dyM2Eih8wQvc_Gd_0SJ6vpZ6khgGIM9g,1909
3
3
  minisweagent/__main__.py,sha256=FIyAOiw--c3FQ2g240FOM1FdL0lk_PxSpixu0pQ7WFo,194
4
4
  minisweagent/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  minisweagent/agents/__init__.py,sha256=cpjJLzg1IGxLM-tZpoMJV9S33ye13XtdBO0x7DU_Lrk,48
6
- minisweagent/agents/default.py,sha256=2KiXOGNXLlhMqFtcsYGICTnfk8pUHn292g_kOlMyECY,5555
6
+ minisweagent/agents/default.py,sha256=znoeQPSnSLNvrvVC07kwgEb-rqi0yxf3Kv7JPeMDNsI,5624
7
7
  minisweagent/agents/interactive.py,sha256=7HW2cffaV5f66DIjxvtIbL8mo_S5aZSwgNLSmHp6VC0,7450
8
8
  minisweagent/agents/interactive_textual.py,sha256=yYVtgHXdrKkirMyyHehYs5S3e7ddzqtoMSx8rU8_zBo,17944
9
9
  minisweagent/config/README.md,sha256=ABd9anA4aRWtx7Oh37z36Wv6ARvcxD2w9lPUE24R2mY,435
@@ -16,15 +16,15 @@ minisweagent/config/mini_no_temp.yaml,sha256=n0W-017tBmMx57U9SLt7Fy9WJxI9x2vdTWB
16
16
  minisweagent/config/extra/__init__.py,sha256=e1MoAlDn_wc9HnXNoncf1P-B4DQ-iRf6n7Q_txjZGRI,52
17
17
  minisweagent/config/extra/swebench.yaml,sha256=5FKblpcNTHmVUNE1JLHo5_AsupvlwsrLj8I3R2mRItk,7680
18
18
  minisweagent/environments/__init__.py,sha256=tTnNjNAhMvIuB1mlesreBV5TLdQBp79qj_Mxr7HGzNk,1180
19
- minisweagent/environments/docker.py,sha256=dSkD8FtHb9yN_ejau3ekN-FaHQMH2AWdhfpvZOoH7NQ,3909
20
- minisweagent/environments/local.py,sha256=-2EV3RqZSB8WEjJE7BHLhRjocPMLpoJ3HbM8QB1WXUU,1060
21
- minisweagent/environments/singularity.py,sha256=fqDH4nTg3njHMe7BzQ4HUp_jAgXeKyLWm-TmH8WuNlA,2552
19
+ minisweagent/environments/docker.py,sha256=mj9I1HhwwyWBEm_lGVWEvBDyDzRKypiKoln5uSY58zw,4146
20
+ minisweagent/environments/local.py,sha256=dV05j4jPrmY0k5O_UtF_qrRqqdilyPUUkbn-YUhRC3M,1236
21
+ minisweagent/environments/singularity.py,sha256=sVZ1L_4ud7WbJxYIhi4WPNOK3dosqfYKZxak1V9LEjg,2765
22
22
  minisweagent/environments/extra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- minisweagent/environments/extra/swerex_docker.py,sha256=MOhhFdX1sAk_U0g-GOxohfjrldzO4YfrUnHq8qJff7c,1502
23
+ minisweagent/environments/extra/swerex_docker.py,sha256=wu2F_9moa8yj5IgCEquFOoxEXJ8Vi6BF19rJvRdoJIs,1665
24
24
  minisweagent/models/__init__.py,sha256=J4bnvfMByTVG0cL_6p51sm8gdargXhARfbG5c0UZ8Z4,2890
25
25
  minisweagent/models/anthropic.py,sha256=D8nHvvbgzPjla0He8p0O9kaXASPWg1Sai0pHsAj_Yn8,855
26
- minisweagent/models/litellm_model.py,sha256=fXhVJtNDhu9buB45p7S576r86f_9oLFdvgJnMZ1oJ4M,2384
27
- minisweagent/models/test_models.py,sha256=oB3jmZUire5TkVT8ebUCD3jLuLhPIbcTiTqdIix85Yw,1174
26
+ minisweagent/models/litellm_model.py,sha256=tEwAV2dzslJ4HFDXApXSPo5OWsiz8soy52P8-r9p4Xg,2538
27
+ minisweagent/models/test_models.py,sha256=ItCA6ddntzkYA7dzSuUEaLMV-AE8TBuXBFP8CzpiO3U,1351
28
28
  minisweagent/models/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
29
  minisweagent/models/utils/cache_control.py,sha256=mG9cE56HQaUwXfoqvXoH6LcbMV_G1vlEE1aBBpikXYg,1608
30
30
  minisweagent/models/utils/key_per_thread.py,sha256=Vlxt--rapNNYCgIHrMCu1WVAkuiVIhC_awbarkbnkZQ,644
@@ -36,14 +36,16 @@ minisweagent/run/mini.py,sha256=d-dtnddRDvs0Ub3mFuXJYsNh3sSfRnSPjp6877Y9O2I,4215
36
36
  minisweagent/run/mini_extra.py,sha256=ecA1PnTWElpO60G9RktvVLtUOf3bZ_ESmnSttS6izhQ,1465
37
37
  minisweagent/run/extra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
38
  minisweagent/run/extra/config.py,sha256=CEsEr8AdEm64Jods2ZRURChHKMILSatkBLkLmRywkrw,3672
39
- minisweagent/run/extra/swebench.py,sha256=1v5qGLaAOKdESEaa6qMgJUlUFh025gASpbEyxfzb4uM,10601
40
- minisweagent/run/extra/swebench_single.py,sha256=YWYAMr6rfsUCGtB_4_e_w_CQ5RWfLbXIXOOGV8HPDYc,2441
39
+ minisweagent/run/extra/swebench.py,sha256=mth9nqayjDZw07CHcXUD-ABmISLkVkJDl7G-MQ-8CjY,10828
40
+ minisweagent/run/extra/swebench_single.py,sha256=xbKjCLgDBPTKWAZa2H4gGWy6k3kadJYTJBGn-kytlu4,3023
41
41
  minisweagent/run/extra/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
42
  minisweagent/run/extra/utils/batch_progress.py,sha256=xhJ7FmsaTBGz-yh8pzYl4yMoUGjn7GA24eYrP-nHj60,6804
43
43
  minisweagent/run/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- minisweagent/run/utils/save.py,sha256=yI_hSU-GOaB7j8YeHBCc7Fhl4js9AyO9N5SC6p-nnu8,1606
45
- mini_swe_agent-1.8.0.dist-info/METADATA,sha256=jwXlBjL8-xSWt3QxyvLsw7E8erT0xtWYELQkdk3Ean8,13791
46
- mini_swe_agent-1.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
- mini_swe_agent-1.8.0.dist-info/entry_points.txt,sha256=d1_yRbTaGjs1UXHa6JQK0sKDGBIVGm8oeW0k2kfbJgQ,182
48
- mini_swe_agent-1.8.0.dist-info/top_level.txt,sha256=zKF4t8bFpV87fdVABZt2Da-vnb4Vkh_CxkwQx5YT4Ew,13
49
- mini_swe_agent-1.8.0.dist-info/RECORD,,
44
+ minisweagent/run/utils/save.py,sha256=MzIvUyCiqbq3EaQc8cKtaD0MEkcyXmUyl4ZcxYzZW_Y,1680
45
+ minisweagent/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ minisweagent/utils/log.py,sha256=cObZMmhncD1HoRiAnEGQ65jGUgXXofMu59SH9naXIuI,915
47
+ mini_swe_agent-1.9.0.dist-info/METADATA,sha256=NKTT4AES1ZOPVoW5cTPtzRYXv5hFLyiKgHo1Fxd2NBA,13798
48
+ mini_swe_agent-1.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
49
+ mini_swe_agent-1.9.0.dist-info/entry_points.txt,sha256=d1_yRbTaGjs1UXHa6JQK0sKDGBIVGm8oeW0k2kfbJgQ,182
50
+ mini_swe_agent-1.9.0.dist-info/top_level.txt,sha256=zKF4t8bFpV87fdVABZt2Da-vnb4Vkh_CxkwQx5YT4Ew,13
51
+ mini_swe_agent-1.9.0.dist-info/RECORD,,
minisweagent/__init__.py CHANGED
@@ -8,7 +8,7 @@ This file provides:
8
8
  unless you want the static type checking.
9
9
  """
10
10
 
11
- __version__ = "1.8.0"
11
+ __version__ = "1.9.0"
12
12
 
13
13
  import os
14
14
  from pathlib import Path
@@ -45,6 +45,8 @@ class Model(Protocol):
45
45
 
46
46
  def query(self, messages: list[dict[str, str]], **kwargs) -> dict: ...
47
47
 
48
+ def get_template_vars(self) -> dict[str, Any]: ...
49
+
48
50
 
49
51
  class Environment(Protocol):
50
52
  """Protocol for execution environments."""
@@ -53,6 +55,8 @@ class Environment(Protocol):
53
55
 
54
56
  def execute(self, command: str, cwd: str = "") -> dict[str, str]: ...
55
57
 
58
+ def get_template_vars(self) -> dict[str, Any]: ...
59
+
56
60
 
57
61
  class Agent(Protocol):
58
62
  """Protocol for agents."""
@@ -1,7 +1,5 @@
1
1
  """Basic agent class. See https://mini-swe-agent.com/latest/advanced/control_flow/ for visual explanation."""
2
2
 
3
- import os
4
- import platform
5
3
  import re
6
4
  import subprocess
7
5
  from collections.abc import Callable
@@ -61,19 +59,21 @@ class DefaultAgent:
61
59
  self.messages: list[dict] = []
62
60
  self.model = model
63
61
  self.env = env
62
+ self.extra_template_vars = {}
64
63
 
65
64
  def render_template(self, template: str, **kwargs) -> str:
66
- cs = asdict(self.config) | asdict(self.env.config) | asdict(self.model.config) | platform.uname()._asdict()
67
- return Template(template).render(**kwargs, **cs, **os.environ)
65
+ template_vars = asdict(self.config) | self.env.get_template_vars() | self.model.get_template_vars()
66
+ return Template(template).render(**kwargs, **template_vars, **self.extra_template_vars)
68
67
 
69
68
  def add_message(self, role: str, content: str, **kwargs):
70
69
  self.messages.append({"role": role, "content": content, **kwargs})
71
70
 
72
71
  def run(self, task: str, **kwargs) -> tuple[str, str]:
73
72
  """Run step() until agent is finished. Return exit status & message"""
73
+ self.extra_template_vars |= {"task": task, **kwargs}
74
74
  self.messages = []
75
75
  self.add_message("system", self.render_template(self.config.system_template))
76
- self.add_message("user", self.render_template(self.config.instance_template, task=task, **kwargs))
76
+ self.add_message("user", self.render_template(self.config.instance_template))
77
77
  while True:
78
78
  try:
79
79
  self.step()
@@ -2,9 +2,11 @@ import os
2
2
  import shlex
3
3
  import subprocess
4
4
  import uuid
5
- from dataclasses import dataclass, field
5
+ from dataclasses import asdict, dataclass, field
6
6
  from typing import Any
7
7
 
8
+ from minisweagent.utils.log import get_logger
9
+
8
10
 
9
11
  @dataclass
10
12
  class DockerEnvironmentConfig:
@@ -33,10 +35,14 @@ class DockerEnvironment:
33
35
  """This class executes bash commands in a Docker container using direct docker commands.
34
36
  See `DockerEnvironmentConfig` for keyword arguments.
35
37
  """
38
+ self.logger = get_logger("minisweagent.environment")
36
39
  self.container_id: str | None = None
37
40
  self.config = config_class(**kwargs)
38
41
  self._start_container()
39
42
 
43
+ def get_template_vars(self) -> dict[str, Any]:
44
+ return asdict(self.config)
45
+
40
46
  def _start_container(self):
41
47
  """Start the Docker container and return the container ID."""
42
48
  container_name = f"minisweagent-{uuid.uuid4().hex[:8]}"
@@ -53,7 +59,7 @@ class DockerEnvironment:
53
59
  "sleep",
54
60
  self.config.container_timeout,
55
61
  ]
56
- print(f"Starting container with command: {shlex.join(cmd)}")
62
+ self.logger.debug(f"Starting container with command: {shlex.join(cmd)}")
57
63
  result = subprocess.run(
58
64
  cmd,
59
65
  capture_output=True,
@@ -61,7 +67,7 @@ class DockerEnvironment:
61
67
  timeout=120, # docker pull might take a while
62
68
  check=True,
63
69
  )
64
- print(f"Started container {container_name} with ID {result.stdout.strip()}")
70
+ self.logger.info(f"Started container {container_name} with ID {result.stdout.strip()}")
65
71
  self.container_id = result.stdout.strip()
66
72
 
67
73
  def execute(self, command: str, cwd: str = "") -> dict[str, Any]:
@@ -91,7 +97,7 @@ class DockerEnvironment:
91
97
  def cleanup(self):
92
98
  """Stop and remove the Docker container."""
93
99
  if getattr(self, "container_id", None) is not None: # if init fails early, container_id might not be set
94
- print(f"Stopping container {self.container_id}")
100
+ self.logger.info(f"Stopping container {self.container_id}")
95
101
  cmd = f"(timeout 60 {self.config.executable} stop {self.container_id} || {self.config.executable} rm -f {self.container_id}) >/dev/null 2>&1 &"
96
102
  subprocess.Popen(cmd, shell=True)
97
103
 
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from dataclasses import dataclass, field
2
+ from dataclasses import asdict, dataclass, field
3
3
  from typing import Any
4
4
 
5
5
  from swerex.deployment.docker import DockerDeployment
@@ -29,11 +29,19 @@ class SwerexDockerEnvironment:
29
29
  output = asyncio.run(
30
30
  self.deployment.runtime.execute(
31
31
  RexCommand(
32
- command=command, shell=True, check=False, cwd=cwd or self.config.cwd, timeout=self.config.timeout
32
+ command=command,
33
+ shell=True,
34
+ check=False,
35
+ cwd=cwd or self.config.cwd,
36
+ timeout=self.config.timeout,
37
+ merge_output_streams=True,
33
38
  )
34
39
  )
35
40
  )
36
41
  return {
37
- "output": f"<stdout>\n{output.stdout}</stdout>\n<stderr>\n{output.stderr}</stderr>",
42
+ "output": output.stdout,
38
43
  "returncode": output.exit_code,
39
44
  }
45
+
46
+ def get_template_vars(self) -> dict[str, Any]:
47
+ return asdict(self.config)
@@ -1,6 +1,8 @@
1
1
  import os
2
+ import platform
2
3
  import subprocess
3
- from dataclasses import dataclass, field
4
+ from dataclasses import asdict, dataclass, field
5
+ from typing import Any
4
6
 
5
7
 
6
8
  @dataclass
@@ -31,3 +33,6 @@ class LocalEnvironment:
31
33
  stderr=subprocess.STDOUT,
32
34
  )
33
35
  return {"output": result.stdout, "returncode": result.returncode}
36
+
37
+ def get_template_vars(self) -> dict[str, Any]:
38
+ return asdict(self.config) | platform.uname()._asdict() | os.environ
@@ -5,10 +5,12 @@ import shutil
5
5
  import subprocess
6
6
  import tempfile
7
7
  import uuid
8
- from dataclasses import dataclass, field
8
+ from dataclasses import asdict, dataclass, field
9
9
  from pathlib import Path
10
10
  from typing import Any
11
11
 
12
+ from minisweagent.utils.log import get_logger
13
+
12
14
 
13
15
  @dataclass
14
16
  class SingularityEnvironmentConfig:
@@ -27,14 +29,17 @@ class SingularityEnvironmentConfig:
27
29
  class SingularityEnvironment:
28
30
  def __init__(self, **kwargs):
29
31
  """Singularity environment. See `SingularityEnvironmentConfig` for kwargs."""
32
+ self.logger = get_logger("minisweagent.environment")
30
33
  self.config = SingularityEnvironmentConfig(**kwargs)
31
34
  self.sandbox_dir = Path(tempfile.gettempdir()) / f"minisweagent-{uuid.uuid4().hex[:8]}"
32
-
33
35
  subprocess.run(
34
36
  [self.config.executable, "build", "--sandbox", self.sandbox_dir, self.config.image],
35
37
  check=True,
36
38
  )
37
39
 
40
+ def get_template_vars(self) -> dict[str, Any]:
41
+ return asdict(self.config)
42
+
38
43
  def execute(self, command: str, cwd: str = "") -> dict[str, Any]:
39
44
  """Execute a command in a Singularity container and return the result as a dict."""
40
45
  cmd = [self.config.executable, "exec"]
@@ -66,7 +71,7 @@ class SingularityEnvironment:
66
71
 
67
72
  def cleanup(self):
68
73
  if self.sandbox_dir.exists():
69
- print(f"Removing sandbox {self.sandbox_dir}")
74
+ self.logger.info(f"Removing sandbox {self.sandbox_dir}")
70
75
  shutil.rmtree(self.sandbox_dir)
71
76
 
72
77
  def __del__(self):
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
- from dataclasses import dataclass, field
4
+ from dataclasses import asdict, dataclass, field
5
5
  from pathlib import Path
6
6
  from typing import Any
7
7
 
@@ -68,3 +68,6 @@ class LitellmModel:
68
68
  return {
69
69
  "content": response.choices[0].message.content or "", # type: ignore
70
70
  }
71
+
72
+ def get_template_vars(self) -> dict[str, Any]:
73
+ return asdict(self.config) | {"n_model_calls": self.n_calls, "model_cost": self.cost}
@@ -1,6 +1,7 @@
1
1
  import logging
2
2
  import time
3
- from dataclasses import dataclass
3
+ from dataclasses import asdict, dataclass
4
+ from typing import Any
4
5
 
5
6
  from minisweagent.models import GLOBAL_MODEL_STATS
6
7
 
@@ -36,3 +37,6 @@ class DeterministicModel:
36
37
  self.cost += self.config.cost_per_call
37
38
  GLOBAL_MODEL_STATS.add(self.config.cost_per_call)
38
39
  return {"content": output}
40
+
41
+ def get_template_vars(self) -> dict[str, Any]:
42
+ return asdict(self.config) | {"n_model_calls": self.n_calls, "model_cost": self.cost}
@@ -24,6 +24,7 @@ from minisweagent.environments import get_environment
24
24
  from minisweagent.models import get_model
25
25
  from minisweagent.run.extra.utils.batch_progress import RunBatchProgressManager
26
26
  from minisweagent.run.utils.save import save_traj
27
+ from minisweagent.utils.log import add_file_handlers, logger
27
28
 
28
29
  _HELP_TEXT = """Run mini-SWE-agent on SWEBench instances.
29
30
 
@@ -141,7 +142,7 @@ def process_instance(
141
142
  )
142
143
  exit_status, result = agent.run(task)
143
144
  except Exception as e:
144
- print(f"Error processing instance {instance_id}: {e}\n{traceback.format_exc()}")
145
+ logger.error(f"Error processing instance {instance_id}: {e}", exc_info=True)
145
146
  exit_status, result = type(e).__name__, str(e)
146
147
  extra_info = {"traceback": traceback.format_exc()}
147
148
  finally:
@@ -152,6 +153,7 @@ def process_instance(
152
153
  result=result,
153
154
  extra_info=extra_info,
154
155
  instance_id=instance_id,
156
+ print_fct=logger.info,
155
157
  )
156
158
  update_preds_file(output_dir / "preds.json", instance_id, model.config.model_name, result)
157
159
  progress_manager.on_instance_end(instance_id, exit_status)
@@ -168,12 +170,12 @@ def filter_instances(
168
170
  before_filter = len(instances)
169
171
  instances = [instance for instance in instances if re.match(filter_spec, instance["instance_id"])]
170
172
  if (after_filter := len(instances)) != before_filter:
171
- print(f"Instance filter: {before_filter} -> {after_filter} instances")
173
+ logger.info(f"Instance filter: {before_filter} -> {after_filter} instances")
172
174
  if slice_spec:
173
175
  values = [int(x) if x else None for x in slice_spec.split(":")]
174
176
  instances = instances[slice(*values)]
175
177
  if (after_slice := len(instances)) != before_filter:
176
- print(f"Instance slice: {before_filter} -> {after_slice} instances")
178
+ logger.info(f"Instance slice: {before_filter} -> {after_slice} instances")
177
179
  return instances
178
180
 
179
181
 
@@ -193,24 +195,28 @@ def main(
193
195
  environment_class: str | None = typer.Option( None, "--environment-class", help="Environment type to use. Recommended are docker or singularity", rich_help_panel="Advanced"),
194
196
  ) -> None:
195
197
  # fmt: on
198
+ output_path = Path(output)
199
+ output_path.mkdir(parents=True, exist_ok=True)
200
+ logger.info(f"Results will be saved to {output_path}")
201
+ add_file_handlers(output_path / "minisweagent.log")
202
+
196
203
  dataset_path = DATASET_MAPPING.get(subset, subset)
197
- print(f"Loading dataset {dataset_path}, split {split}...")
204
+ logger.info(f"Loading dataset {dataset_path}, split {split}...")
198
205
  instances = list(load_dataset(dataset_path, split=split))
199
206
 
200
207
  instances = filter_instances(instances, filter_spec=filter_spec, slice_spec=slice_spec, shuffle=shuffle)
201
- output_path = Path(output)
202
208
  if not redo_existing and (output_path / "preds.json").exists():
203
209
  existing_instances = list(json.loads((output_path / "preds.json").read_text()).keys())
204
- print(f"Skipping {len(existing_instances)} existing instances")
210
+ logger.info(f"Skipping {len(existing_instances)} existing instances")
205
211
  instances = [instance for instance in instances if instance["instance_id"] not in existing_instances]
212
+ logger.info(f"Running on {len(instances)} instances...")
206
213
 
207
- output_path.mkdir(parents=True, exist_ok=True)
208
- print(f"Running on {len(instances)} instances...")
209
- print(f"Results will be saved to {output_path}")
210
214
 
211
215
  config = yaml.safe_load(get_config_path(config_spec).read_text())
212
- config.setdefault("environment", {}).setdefault("environment_class", environment_class)
213
- config.setdefault("model", {}).setdefault("model_name", model)
216
+ if environment_class is not None:
217
+ config.setdefault("environment", {})["environment_class"] = environment_class
218
+ if model is not None:
219
+ config.setdefault("model", {})["model_name"] = model
214
220
 
215
221
  progress_manager = RunBatchProgressManager(len(instances), output_path / f"exit_statuses_{time.time()}.yaml")
216
222
 
@@ -222,8 +228,7 @@ def main(
222
228
  pass
223
229
  except Exception as e:
224
230
  instance_id = futures[future]
225
- print(f"Error in future for instance {instance_id}: {e}")
226
- traceback.print_exc()
231
+ logger.error(f"Error in future for instance {instance_id}: {e}", exc_info=True)
227
232
  progress_manager.on_uncaught_exception(instance_id, e)
228
233
 
229
234
  with Live(progress_manager.render_group, refresh_per_second=4):
@@ -237,7 +242,7 @@ def main(
237
242
  try:
238
243
  process_futures(futures)
239
244
  except KeyboardInterrupt:
240
- print("Cancelling all pending jobs. Press ^C again to exit immediately.")
245
+ logger.info("Cancelling all pending jobs. Press ^C again to exit immediately.")
241
246
  for future in futures:
242
247
  if not future.running() and not future.done():
243
248
  future.cancel()
@@ -6,6 +6,7 @@ import typer
6
6
  import yaml
7
7
  from datasets import load_dataset
8
8
 
9
+ from minisweagent import global_config_dir
9
10
  from minisweagent.agents.interactive import InteractiveAgent
10
11
  from minisweagent.config import builtin_config_dir, get_config_path
11
12
  from minisweagent.models import get_model
@@ -13,9 +14,13 @@ from minisweagent.run.extra.swebench import (
13
14
  DATASET_MAPPING,
14
15
  get_sb_environment,
15
16
  )
17
+ from minisweagent.run.utils.save import save_traj
18
+ from minisweagent.utils.log import logger
16
19
 
17
20
  app = typer.Typer(add_completion=False)
18
21
 
22
+ DEFAULT_OUTPUT = global_config_dir / "last_swebench_single_run.traj.json"
23
+
19
24
 
20
25
  # fmt: off
21
26
  @app.command()
@@ -27,11 +32,12 @@ def main(
27
32
  config_path: Path = typer.Option( builtin_config_dir / "extra" / "swebench.yaml", "-c", "--config", help="Path to a config file", rich_help_panel="Basic"),
28
33
  environment_class: str | None = typer.Option(None, "--environment-class", rich_help_panel="Advanced"),
29
34
  exit_immediately: bool = typer.Option( False, "--exit-immediately", help="Exit immediately when the agent wants to finish instead of prompting.", rich_help_panel="Basic"),
35
+ output: Path = typer.Option(DEFAULT_OUTPUT, "-o", "--output", help="Output trajectory file", rich_help_panel="Basic"),
30
36
  ) -> None:
31
37
  # fmt: on
32
38
  """Run on a single SWE-Bench instance."""
33
39
  dataset_path = DATASET_MAPPING.get(subset, subset)
34
- print(f"Loading dataset from {dataset_path}, split {split}...")
40
+ logger.info(f"Loading dataset from {dataset_path}, split {split}...")
35
41
  instances = {
36
42
  inst["instance_id"]: inst # type: ignore
37
43
  for inst in load_dataset(dataset_path, split=split)
@@ -41,16 +47,22 @@ def main(
41
47
  instance: dict = instances[instance_spec] # type: ignore
42
48
 
43
49
  config = yaml.safe_load(get_config_path(config_path).read_text())
44
- config.setdefault("environment", {}).setdefault("environment_class", environment_class)
50
+ if environment_class is not None:
51
+ config.setdefault("environment", {})["environment_class"] = environment_class
45
52
  if exit_immediately:
46
53
  config.setdefault("agent", {})["confirm_exit"] = False
47
54
  env = get_sb_environment(config, instance)
48
55
  agent = InteractiveAgent(
49
56
  get_model(model_name, config.get("model", {})),
50
57
  env,
51
- **(config.get("agent", {}) | {"mode": "yolo"}),
58
+ **({"mode": "yolo"} | config.get("agent", {})),
52
59
  )
53
- agent.run(instance["problem_statement"])
60
+
61
+ exit_status, result = None, None
62
+ try:
63
+ exit_status, result = agent.run(instance["problem_statement"]) # type: ignore[arg-type]
64
+ finally:
65
+ save_traj(agent, output, exit_status=exit_status, result=result) # type: ignore[arg-type]
54
66
 
55
67
 
56
68
  if __name__ == "__main__":
@@ -1,4 +1,5 @@
1
1
  import json
2
+ from collections.abc import Callable
2
3
  from pathlib import Path
3
4
 
4
5
  from minisweagent import Agent, __version__
@@ -12,6 +13,7 @@ def save_traj(
12
13
  exit_status: str | None = None,
13
14
  result: str | None = None,
14
15
  extra_info: dict | None = None,
16
+ print_fct: Callable = print,
15
17
  **kwargs,
16
18
  ):
17
19
  """Save the trajectory of the agent to a file.
@@ -49,4 +51,4 @@ def save_traj(
49
51
  path.parent.mkdir(parents=True, exist_ok=True)
50
52
  path.write_text(json.dumps(data, indent=2))
51
53
  if print_path:
52
- print(f"Saved trajectory to '{path}'")
54
+ print_fct(f"Saved trajectory to '{path}'")
File without changes
@@ -0,0 +1,32 @@
1
+ import logging
2
+ from pathlib import Path
3
+
4
+ MINI_LOGGERS = {}
5
+ _EXTRA_HANDLERS = []
6
+
7
+
8
+ def get_logger(name: str) -> logging.Logger:
9
+ if name in MINI_LOGGERS:
10
+ return MINI_LOGGERS[name]
11
+ logger = logging.getLogger(name)
12
+ logger.setLevel(logging.DEBUG)
13
+ handler = logging.StreamHandler()
14
+ formatter = logging.Formatter("%(name)s: %(levelname)s: %(message)s")
15
+ handler.setFormatter(formatter)
16
+ logger.addHandler(handler)
17
+ for handler in _EXTRA_HANDLERS:
18
+ logger.addHandler(handler)
19
+ MINI_LOGGERS[name] = logger
20
+ return logger
21
+
22
+
23
+ def add_file_handlers(path: Path):
24
+ handler = logging.FileHandler(path)
25
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
26
+ handler.setFormatter(formatter)
27
+ _EXTRA_HANDLERS.append(handler)
28
+ for logger in MINI_LOGGERS.values():
29
+ logger.addHandler(handler)
30
+
31
+
32
+ logger = get_logger("minisweagent")