inspect-swe 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/PKG-INFO +2 -2
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/pyproject.toml +1 -1
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/claude_code.py +20 -4
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/download.py +9 -5
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/install.py +3 -1
- inspect_swe-0.2.9/src/inspect_swe/_util/trace.py +10 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_version.py +2 -2
- inspect_swe-0.2.7/src/inspect_swe/_util/trace.py +0 -7
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/.gitignore +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/LICENSE +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/README.md +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/__init__.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/__init__.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/__init__.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/cache.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_registry.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_tools/__init__.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_tools/download.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_util/__init__.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_util/_async.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_util/_yaml.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_util/appdirs.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_util/checksum.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_util/constants.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_util/download.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_util/platform.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_util/sandbox.py +0 -0
- {inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/py.typed +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: inspect_swe
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.9
|
4
4
|
Summary: Software engineering agents for Inspect AI.
|
5
5
|
Project-URL: Documentation, https://meridianlabs-ai.github.io/inspect_swe/
|
6
6
|
Project-URL: Source Code, https://github.com/meridianlabs-ai/inspect_swe
|
@@ -10,7 +10,7 @@ License: MIT License
|
|
10
10
|
License-File: LICENSE
|
11
11
|
Requires-Python: >=3.10
|
12
12
|
Requires-Dist: httpx
|
13
|
-
Requires-Dist: inspect-ai>=0.3.
|
13
|
+
Requires-Dist: inspect-ai>=0.3.128
|
14
14
|
Requires-Dist: nest-asyncio
|
15
15
|
Requires-Dist: platformdirs
|
16
16
|
Requires-Dist: pydantic>=2.11.4
|
@@ -16,6 +16,8 @@ from inspect_ai.tool import MCPServerConfig
|
|
16
16
|
from inspect_ai.util import sandbox as sandbox_env
|
17
17
|
from pydantic_core import to_json
|
18
18
|
|
19
|
+
from inspect_swe._util.trace import trace
|
20
|
+
|
19
21
|
from .._util._async import is_callable_coroutine
|
20
22
|
from .install.install import ensure_claude_code_installed
|
21
23
|
|
@@ -34,10 +36,11 @@ def claude_code(
|
|
34
36
|
attempts: int | AgentAttempts = 1,
|
35
37
|
model: str | None = None,
|
36
38
|
small_model: str | None = None,
|
39
|
+
cwd: str | None = None,
|
37
40
|
env: dict[str, str] | None = None,
|
38
|
-
version: Literal["auto", "sandbox", "stable", "latest"] | str = "auto",
|
39
41
|
user: str | None = None,
|
40
42
|
sandbox: str | None = None,
|
43
|
+
version: Literal["auto", "sandbox", "stable", "latest"] | str = "auto",
|
41
44
|
) -> Agent:
|
42
45
|
"""Claude Code agent.
|
43
46
|
|
@@ -60,15 +63,16 @@ def claude_code(
|
|
60
63
|
attempts: Configure agent to make multiple attempts.
|
61
64
|
model: Model name to use for Opus and Sonnet calls (defaults to main model for task).
|
62
65
|
small_model: Model to use for Haiku calls (defaults to main model for task).
|
66
|
+
cwd: Working directory to run claude code within.
|
63
67
|
env: Environment variables to set for claude code.
|
68
|
+
user: User to execute claude code with.
|
69
|
+
sandbox: Optional sandbox environment name.
|
64
70
|
version: Version of claude code to use. One of:
|
65
71
|
- "auto": Use any available version of claude code in the sandbox, otherwise download the current stable version.
|
66
72
|
- "sandbox": Use the version of claude code in the sandbox (raises `RuntimeError` if claude is not available in the sandbox)
|
67
73
|
- "stable": Download and use the current stable version of claude code.
|
68
74
|
- "latest": Download and use the very latest version of claude code.
|
69
75
|
- "x.x.x": Download and use a specific version of claude code.
|
70
|
-
user: User to execute claude code with.
|
71
|
-
sandbox: Optional sandbox environment name.
|
72
76
|
"""
|
73
77
|
# resolve models
|
74
78
|
model = f"inspect/{model}" if model is not None else "inspect"
|
@@ -91,6 +95,8 @@ def claude_code(
|
|
91
95
|
cmd = [
|
92
96
|
"--print", # run without interactions
|
93
97
|
"--dangerously-skip-permissions",
|
98
|
+
"--debug",
|
99
|
+
"--verbose",
|
94
100
|
"--model",
|
95
101
|
model,
|
96
102
|
]
|
@@ -127,7 +133,8 @@ def claude_code(
|
|
127
133
|
# resolve sandbox
|
128
134
|
sbox = sandbox_env(sandbox)
|
129
135
|
|
130
|
-
# execute the agent
|
136
|
+
# execute the agent (track debug output)
|
137
|
+
debug_output: list[str] = []
|
131
138
|
agent_prompt = prompt
|
132
139
|
attempt_count = 0
|
133
140
|
while True:
|
@@ -140,6 +147,7 @@ def claude_code(
|
|
140
147
|
# run agent
|
141
148
|
result = await sbox.exec(
|
142
149
|
cmd=agent_cmd,
|
150
|
+
cwd=cwd,
|
143
151
|
env={
|
144
152
|
"ANTHROPIC_BASE_URL": f"http://localhost:{bridge.port}",
|
145
153
|
"ANTHROPIC_API_KEY": "sk-ant-api03-DOq5tyLPrk9M4hPE",
|
@@ -156,6 +164,10 @@ def claude_code(
|
|
156
164
|
user=user,
|
157
165
|
)
|
158
166
|
|
167
|
+
# track debug output
|
168
|
+
debug_output.append(result.stdout)
|
169
|
+
debug_output.append(result.stderr)
|
170
|
+
|
159
171
|
# raise for error
|
160
172
|
if not result.success:
|
161
173
|
f"Error executing claude code agent: {result.stdout}\n{result.stderr}"
|
@@ -185,6 +197,10 @@ def claude_code(
|
|
185
197
|
else:
|
186
198
|
agent_prompt = attempts.incorrect_message
|
187
199
|
|
200
|
+
# trace debug info
|
201
|
+
debug_output.insert(0, "Claude Code Debug Output:")
|
202
|
+
trace("\n".join(debug_output))
|
203
|
+
|
188
204
|
return bridge.state
|
189
205
|
|
190
206
|
# return agent with specified name and descritpion
|
@@ -1,12 +1,11 @@
|
|
1
1
|
import re
|
2
|
-
from typing import Literal
|
2
|
+
from typing import Callable, Literal
|
3
3
|
|
4
4
|
from pydantic import BaseModel
|
5
5
|
|
6
6
|
from ..._util.checksum import verify_checksum
|
7
7
|
from ..._util.download import download_file, download_text_file
|
8
8
|
from ..._util.sandbox import SandboxPlatform
|
9
|
-
from ..._util.trace import trace
|
10
9
|
from .cache import (
|
11
10
|
read_cached_claude_code_binary,
|
12
11
|
write_cached_claude_code_binary,
|
@@ -14,8 +13,13 @@ from .cache import (
|
|
14
13
|
|
15
14
|
|
16
15
|
async def download_claude_code_async(
|
17
|
-
version: Literal["stable", "latest"] | str,
|
16
|
+
version: Literal["stable", "latest"] | str,
|
17
|
+
platform: SandboxPlatform,
|
18
|
+
logger: Callable[[str], None] | None = None,
|
18
19
|
) -> bytes:
|
20
|
+
# resovle logger
|
21
|
+
logger = logger or print
|
22
|
+
|
19
23
|
# determine version and checksum
|
20
24
|
gcs_bucket = await _claude_code_gcs_bucket()
|
21
25
|
version = await _claude_code_version(gcs_bucket, version)
|
@@ -35,9 +39,9 @@ async def download_claude_code_async(
|
|
35
39
|
write_cached_claude_code_binary(binary_data, version, platform)
|
36
40
|
|
37
41
|
# trace
|
38
|
-
|
42
|
+
logger(f"Downloaded claude code binary: {version} ({platform})")
|
39
43
|
else:
|
40
|
-
|
44
|
+
logger(f"Used claude code binary from cache: {version} ({platform})")
|
41
45
|
|
42
46
|
# return data
|
43
47
|
return binary_data
|
@@ -50,7 +50,9 @@ async def ensure_claude_code_installed(
|
|
50
50
|
|
51
51
|
# download the binary
|
52
52
|
if claude_binary_bytes is None:
|
53
|
-
claude_binary_bytes = await download_claude_code_async(
|
53
|
+
claude_binary_bytes = await download_claude_code_async(
|
54
|
+
version, platform, trace
|
55
|
+
)
|
54
56
|
|
55
57
|
# write it into the container and return it
|
56
58
|
claude_binary = f"/opt/claude-{version}-{platform}"
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '0.2.
|
32
|
-
__version_tuple__ = version_tuple = (0, 2,
|
31
|
+
__version__ = version = '0.2.9'
|
32
|
+
__version_tuple__ = version_tuple = (0, 2, 9)
|
33
33
|
|
34
34
|
__commit_id__ = commit_id = None
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|