inspect-swe 0.2.8__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/PKG-INFO +1 -1
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/claude_code.py +14 -1
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/download.py +9 -5
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/install.py +3 -1
- inspect_swe-0.2.9/src/inspect_swe/_util/trace.py +10 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_version.py +2 -2
- inspect_swe-0.2.8/src/inspect_swe/_util/trace.py +0 -7
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/.gitignore +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/LICENSE +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/README.md +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/pyproject.toml +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/__init__.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/__init__.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/__init__.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/cache.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_registry.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_tools/__init__.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_tools/download.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/__init__.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/_async.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/_yaml.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/appdirs.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/checksum.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/constants.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/download.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/platform.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/sandbox.py +0 -0
- {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/py.typed +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: inspect_swe
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.9
|
4
4
|
Summary: Software engineering agents for Inspect AI.
|
5
5
|
Project-URL: Documentation, https://meridianlabs-ai.github.io/inspect_swe/
|
6
6
|
Project-URL: Source Code, https://github.com/meridianlabs-ai/inspect_swe
|
@@ -16,6 +16,8 @@ from inspect_ai.tool import MCPServerConfig
|
|
16
16
|
from inspect_ai.util import sandbox as sandbox_env
|
17
17
|
from pydantic_core import to_json
|
18
18
|
|
19
|
+
from inspect_swe._util.trace import trace
|
20
|
+
|
19
21
|
from .._util._async import is_callable_coroutine
|
20
22
|
from .install.install import ensure_claude_code_installed
|
21
23
|
|
@@ -93,6 +95,8 @@ def claude_code(
|
|
93
95
|
cmd = [
|
94
96
|
"--print", # run without interactions
|
95
97
|
"--dangerously-skip-permissions",
|
98
|
+
"--debug",
|
99
|
+
"--verbose",
|
96
100
|
"--model",
|
97
101
|
model,
|
98
102
|
]
|
@@ -129,7 +133,8 @@ def claude_code(
|
|
129
133
|
# resolve sandbox
|
130
134
|
sbox = sandbox_env(sandbox)
|
131
135
|
|
132
|
-
# execute the agent
|
136
|
+
# execute the agent (track debug output)
|
137
|
+
debug_output: list[str] = []
|
133
138
|
agent_prompt = prompt
|
134
139
|
attempt_count = 0
|
135
140
|
while True:
|
@@ -159,6 +164,10 @@ def claude_code(
|
|
159
164
|
user=user,
|
160
165
|
)
|
161
166
|
|
167
|
+
# track debug output
|
168
|
+
debug_output.append(result.stdout)
|
169
|
+
debug_output.append(result.stderr)
|
170
|
+
|
162
171
|
# raise for error
|
163
172
|
if not result.success:
|
164
173
|
f"Error executing claude code agent: {result.stdout}\n{result.stderr}"
|
@@ -188,6 +197,10 @@ def claude_code(
|
|
188
197
|
else:
|
189
198
|
agent_prompt = attempts.incorrect_message
|
190
199
|
|
200
|
+
# trace debug info
|
201
|
+
debug_output.insert(0, "Claude Code Debug Output:")
|
202
|
+
trace("\n".join(debug_output))
|
203
|
+
|
191
204
|
return bridge.state
|
192
205
|
|
193
206
|
# return agent with specified name and descritpion
|
@@ -1,12 +1,11 @@
|
|
1
1
|
import re
|
2
|
-
from typing import Literal
|
2
|
+
from typing import Callable, Literal
|
3
3
|
|
4
4
|
from pydantic import BaseModel
|
5
5
|
|
6
6
|
from ..._util.checksum import verify_checksum
|
7
7
|
from ..._util.download import download_file, download_text_file
|
8
8
|
from ..._util.sandbox import SandboxPlatform
|
9
|
-
from ..._util.trace import trace
|
10
9
|
from .cache import (
|
11
10
|
read_cached_claude_code_binary,
|
12
11
|
write_cached_claude_code_binary,
|
@@ -14,8 +13,13 @@ from .cache import (
|
|
14
13
|
|
15
14
|
|
16
15
|
async def download_claude_code_async(
|
17
|
-
version: Literal["stable", "latest"] | str,
|
16
|
+
version: Literal["stable", "latest"] | str,
|
17
|
+
platform: SandboxPlatform,
|
18
|
+
logger: Callable[[str], None] | None = None,
|
18
19
|
) -> bytes:
|
20
|
+
# resovle logger
|
21
|
+
logger = logger or print
|
22
|
+
|
19
23
|
# determine version and checksum
|
20
24
|
gcs_bucket = await _claude_code_gcs_bucket()
|
21
25
|
version = await _claude_code_version(gcs_bucket, version)
|
@@ -35,9 +39,9 @@ async def download_claude_code_async(
|
|
35
39
|
write_cached_claude_code_binary(binary_data, version, platform)
|
36
40
|
|
37
41
|
# trace
|
38
|
-
|
42
|
+
logger(f"Downloaded claude code binary: {version} ({platform})")
|
39
43
|
else:
|
40
|
-
|
44
|
+
logger(f"Used claude code binary from cache: {version} ({platform})")
|
41
45
|
|
42
46
|
# return data
|
43
47
|
return binary_data
|
@@ -50,7 +50,9 @@ async def ensure_claude_code_installed(
|
|
50
50
|
|
51
51
|
# download the binary
|
52
52
|
if claude_binary_bytes is None:
|
53
|
-
claude_binary_bytes = await download_claude_code_async(
|
53
|
+
claude_binary_bytes = await download_claude_code_async(
|
54
|
+
version, platform, trace
|
55
|
+
)
|
54
56
|
|
55
57
|
# write it into the container and return it
|
56
58
|
claude_binary = f"/opt/claude-{version}-{platform}"
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '0.2.
|
32
|
-
__version_tuple__ = version_tuple = (0, 2,
|
31
|
+
__version__ = version = '0.2.9'
|
32
|
+
__version_tuple__ = version_tuple = (0, 2, 9)
|
33
33
|
|
34
34
|
__commit_id__ = commit_id = None
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|