inspect-swe 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,8 @@ from inspect_ai.tool import MCPServerConfig
16
16
  from inspect_ai.util import sandbox as sandbox_env
17
17
  from pydantic_core import to_json
18
18
 
19
+ from inspect_swe._util.trace import trace
20
+
19
21
  from .._util._async import is_callable_coroutine
20
22
  from .install.install import ensure_claude_code_installed
21
23
 
@@ -34,10 +36,11 @@ def claude_code(
34
36
  attempts: int | AgentAttempts = 1,
35
37
  model: str | None = None,
36
38
  small_model: str | None = None,
39
+ cwd: str | None = None,
37
40
  env: dict[str, str] | None = None,
38
- version: Literal["auto", "sandbox", "stable", "latest"] | str = "auto",
39
41
  user: str | None = None,
40
42
  sandbox: str | None = None,
43
+ version: Literal["auto", "sandbox", "stable", "latest"] | str = "auto",
41
44
  ) -> Agent:
42
45
  """Claude Code agent.
43
46
 
@@ -60,15 +63,16 @@ def claude_code(
60
63
  attempts: Configure agent to make multiple attempts.
61
64
  model: Model name to use for Opus and Sonnet calls (defaults to main model for task).
62
65
  small_model: Model to use for Haiku calls (defaults to main model for task).
66
+ cwd: Working directory to run claude code within.
63
67
  env: Environment variables to set for claude code.
68
+ user: User to execute claude code with.
69
+ sandbox: Optional sandbox environment name.
64
70
  version: Version of claude code to use. One of:
65
71
  - "auto": Use any available version of claude code in the sandbox, otherwise download the current stable version.
66
72
  - "sandbox": Use the version of claude code in the sandbox (raises `RuntimeError` if claude is not available in the sandbox)
67
73
  - "stable": Download and use the current stable version of claude code.
68
74
  - "latest": Download and use the very latest version of claude code.
69
75
  - "x.x.x": Download and use a specific version of claude code.
70
- user: User to execute claude code with.
71
- sandbox: Optional sandbox environment name.
72
76
  """
73
77
  # resolve models
74
78
  model = f"inspect/{model}" if model is not None else "inspect"
@@ -91,6 +95,8 @@ def claude_code(
91
95
  cmd = [
92
96
  "--print", # run without interactions
93
97
  "--dangerously-skip-permissions",
98
+ "--debug",
99
+ "--verbose",
94
100
  "--model",
95
101
  model,
96
102
  ]
@@ -127,7 +133,8 @@ def claude_code(
127
133
  # resolve sandbox
128
134
  sbox = sandbox_env(sandbox)
129
135
 
130
- # execute the agent
136
+ # execute the agent (track debug output)
137
+ debug_output: list[str] = []
131
138
  agent_prompt = prompt
132
139
  attempt_count = 0
133
140
  while True:
@@ -140,6 +147,7 @@ def claude_code(
140
147
  # run agent
141
148
  result = await sbox.exec(
142
149
  cmd=agent_cmd,
150
+ cwd=cwd,
143
151
  env={
144
152
  "ANTHROPIC_BASE_URL": f"http://localhost:{bridge.port}",
145
153
  "ANTHROPIC_API_KEY": "sk-ant-api03-DOq5tyLPrk9M4hPE",
@@ -156,6 +164,10 @@ def claude_code(
156
164
  user=user,
157
165
  )
158
166
 
167
+ # track debug output
168
+ debug_output.append(result.stdout)
169
+ debug_output.append(result.stderr)
170
+
159
171
  # raise for error
160
172
  if not result.success:
161
173
  f"Error executing claude code agent: {result.stdout}\n{result.stderr}"
@@ -185,6 +197,10 @@ def claude_code(
185
197
  else:
186
198
  agent_prompt = attempts.incorrect_message
187
199
 
200
+ # trace debug info
201
+ debug_output.insert(0, "Claude Code Debug Output:")
202
+ trace("\n".join(debug_output))
203
+
188
204
  return bridge.state
189
205
 
190
206
  # return agent with specified name and descritpion
@@ -1,12 +1,11 @@
1
1
  import re
2
- from typing import Literal
2
+ from typing import Callable, Literal
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
6
  from ..._util.checksum import verify_checksum
7
7
  from ..._util.download import download_file, download_text_file
8
8
  from ..._util.sandbox import SandboxPlatform
9
- from ..._util.trace import trace
10
9
  from .cache import (
11
10
  read_cached_claude_code_binary,
12
11
  write_cached_claude_code_binary,
@@ -14,8 +13,13 @@ from .cache import (
14
13
 
15
14
 
16
15
  async def download_claude_code_async(
17
- version: Literal["stable", "latest"] | str, platform: SandboxPlatform
16
+ version: Literal["stable", "latest"] | str,
17
+ platform: SandboxPlatform,
18
+ logger: Callable[[str], None] | None = None,
18
19
  ) -> bytes:
20
+ # resovle logger
21
+ logger = logger or print
22
+
19
23
  # determine version and checksum
20
24
  gcs_bucket = await _claude_code_gcs_bucket()
21
25
  version = await _claude_code_version(gcs_bucket, version)
@@ -35,9 +39,9 @@ async def download_claude_code_async(
35
39
  write_cached_claude_code_binary(binary_data, version, platform)
36
40
 
37
41
  # trace
38
- trace(f"Downloaded claude code binary: {version} ({platform})")
42
+ logger(f"Downloaded claude code binary: {version} ({platform})")
39
43
  else:
40
- trace(f"Used claude code binary from cache: {version} ({platform})")
44
+ logger(f"Used claude code binary from cache: {version} ({platform})")
41
45
 
42
46
  # return data
43
47
  return binary_data
@@ -50,7 +50,9 @@ async def ensure_claude_code_installed(
50
50
 
51
51
  # download the binary
52
52
  if claude_binary_bytes is None:
53
- claude_binary_bytes = await download_claude_code_async(version, platform)
53
+ claude_binary_bytes = await download_claude_code_async(
54
+ version, platform, trace
55
+ )
54
56
 
55
57
  # write it into the container and return it
56
58
  claude_binary = f"/opt/claude-{version}-{platform}"
@@ -1,7 +1,10 @@
1
1
  from logging import getLogger
2
2
 
3
+ from inspect_ai.util import trace_message
4
+
3
5
  logger = getLogger(__file__)
4
6
 
5
7
 
6
8
  def trace(message: str) -> None:
7
- logger.info(f"[Inspect SWE] {message}")
9
+ logger.setLevel("TRACE")
10
+ trace_message(logger, category="Inspect SWE", message=message)
inspect_swe/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.2.7'
32
- __version_tuple__ = version_tuple = (0, 2, 7)
31
+ __version__ = version = '0.2.9'
32
+ __version_tuple__ = version_tuple = (0, 2, 9)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_swe
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: Software engineering agents for Inspect AI.
5
5
  Project-URL: Documentation, https://meridianlabs-ai.github.io/inspect_swe/
6
6
  Project-URL: Source Code, https://github.com/meridianlabs-ai/inspect_swe
@@ -10,7 +10,7 @@ License: MIT License
10
10
  License-File: LICENSE
11
11
  Requires-Python: >=3.10
12
12
  Requires-Dist: httpx
13
- Requires-Dist: inspect-ai>=0.3.126
13
+ Requires-Dist: inspect-ai>=0.3.128
14
14
  Requires-Dist: nest-asyncio
15
15
  Requires-Dist: platformdirs
16
16
  Requires-Dist: pydantic>=2.11.4
@@ -1,13 +1,13 @@
1
1
  inspect_swe/__init__.py,sha256=yJ9tBcF2Wy11mVmLh1fTYXgYcsSHv30GAW-tVwE-r3s,342
2
2
  inspect_swe/_registry.py,sha256=jM37ysrY39Ufd67GRKbiwfSViOLlm-82lm_JEaWKshw,97
3
- inspect_swe/_version.py,sha256=yXzK2akXKIKUAfJk0WCQothqygqvndys6GBuXxo-wk0,704
3
+ inspect_swe/_version.py,sha256=051on7ZmwGNyKvbO1AXKoElw7RjLuRmeJqVOApytNd4,704
4
4
  inspect_swe/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  inspect_swe/_claude_code/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- inspect_swe/_claude_code/claude_code.py,sha256=V1C79iWhVawTYy-JeRsUYM6VdiGWO_bjH_qRWf3r5lM,9825
6
+ inspect_swe/_claude_code/claude_code.py,sha256=VcFI4UfJU5oKMMZ63pRnvlltdAUA3M6Amnj6aBfpOFk,10380
7
7
  inspect_swe/_claude_code/install/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  inspect_swe/_claude_code/install/cache.py,sha256=k08bCxGq-iYVpO16LNQhPjxTM9p2iecpqMjqYd2WBss,1708
9
- inspect_swe/_claude_code/install/download.py,sha256=s1y4CDHVbJenfsR7OUwwxr5QFp-rDi4XnIxumDEvmws,3217
10
- inspect_swe/_claude_code/install/install.py,sha256=nbf1SZJzr4DBPfUmBH64zWcdI4AnKiKhm4Q4Zelh_TM,2483
9
+ inspect_swe/_claude_code/install/download.py,sha256=AKyyaaWAinoSsrb4m07IqCyF74eHISNDcuv3Z3jF-S8,3301
10
+ inspect_swe/_claude_code/install/install.py,sha256=RZuRYB7SVqvZXgmNzJYAYRogHZv8z4NiTXx1y0j6f0A,2520
11
11
  inspect_swe/_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  inspect_swe/_tools/download.py,sha256=Jn_gcFR5Kw2vTYA1dWOFYRpqFtoFnKFv2Kv-4xT8tz4,1283
13
13
  inspect_swe/_util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -19,9 +19,9 @@ inspect_swe/_util/constants.py,sha256=xKvGgaJ0MwNbdzaken5HMbxYyKBEw_3VrBwCgkvAIW
19
19
  inspect_swe/_util/download.py,sha256=cCUau4ZBOKezpotJV5-v3JY_5CuYDZ-VcWlLf_EyNL0,340
20
20
  inspect_swe/_util/platform.py,sha256=wm4efIFfdyTeaV2oxOXVvYl1u22MHX3jQMERHJMgv7A,339
21
21
  inspect_swe/_util/sandbox.py,sha256=2wYmVz5EGUDBhqbN3NgLAOsyKeU-KRI161MZMJ54n4M,1769
22
- inspect_swe/_util/trace.py,sha256=mFHmBKn2F8iJP9PpTHaCseMHnTMz3ErRx6RCKV83rZk,139
23
- inspect_swe-0.2.7.dist-info/METADATA,sha256=keVzMz6nRC72XluU9hqrPlgixueQtCj5bsNm98A6L0A,1724
24
- inspect_swe-0.2.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
- inspect_swe-0.2.7.dist-info/entry_points.txt,sha256=OzpvUhd7M3T2Rog4MjwJAxIKeX5ljiR0mVYM9GefBKg,49
26
- inspect_swe-0.2.7.dist-info/licenses/LICENSE,sha256=Hi3UDcbD6yCKZ1mcgt7pprzSG0rDEnSrbrm3XinyiDA,1070
27
- inspect_swe-0.2.7.dist-info/RECORD,,
22
+ inspect_swe/_util/trace.py,sha256=bZJlTqb9PSBAFm9RcJrC5dxJKoCvbll3ctWBT5ngNFE,234
23
+ inspect_swe-0.2.9.dist-info/METADATA,sha256=ykdoifSzHaGUkWsbprbiGOH97_UCW143cPudHbtc5-s,1724
24
+ inspect_swe-0.2.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
+ inspect_swe-0.2.9.dist-info/entry_points.txt,sha256=OzpvUhd7M3T2Rog4MjwJAxIKeX5ljiR0mVYM9GefBKg,49
26
+ inspect_swe-0.2.9.dist-info/licenses/LICENSE,sha256=Hi3UDcbD6yCKZ1mcgt7pprzSG0rDEnSrbrm3XinyiDA,1070
27
+ inspect_swe-0.2.9.dist-info/RECORD,,