inspect-swe 0.2.8__tar.gz → 0.2.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/PKG-INFO +1 -1
  2. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/claude_code.py +14 -1
  3. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/download.py +9 -5
  4. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/install.py +3 -1
  5. inspect_swe-0.2.9/src/inspect_swe/_util/trace.py +10 -0
  6. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_version.py +2 -2
  7. inspect_swe-0.2.8/src/inspect_swe/_util/trace.py +0 -7
  8. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/.gitignore +0 -0
  9. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/LICENSE +0 -0
  10. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/README.md +0 -0
  11. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/pyproject.toml +0 -0
  12. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/__init__.py +0 -0
  13. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/__init__.py +0 -0
  14. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/__init__.py +0 -0
  15. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/cache.py +0 -0
  16. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_registry.py +0 -0
  17. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_tools/__init__.py +0 -0
  18. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_tools/download.py +0 -0
  19. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/__init__.py +0 -0
  20. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/_async.py +0 -0
  21. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/_yaml.py +0 -0
  22. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/appdirs.py +0 -0
  23. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/checksum.py +0 -0
  24. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/constants.py +0 -0
  25. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/download.py +0 -0
  26. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/platform.py +0 -0
  27. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/_util/sandbox.py +0 -0
  28. {inspect_swe-0.2.8 → inspect_swe-0.2.9}/src/inspect_swe/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_swe
3
- Version: 0.2.8
3
+ Version: 0.2.9
4
4
  Summary: Software engineering agents for Inspect AI.
5
5
  Project-URL: Documentation, https://meridianlabs-ai.github.io/inspect_swe/
6
6
  Project-URL: Source Code, https://github.com/meridianlabs-ai/inspect_swe
@@ -16,6 +16,8 @@ from inspect_ai.tool import MCPServerConfig
16
16
  from inspect_ai.util import sandbox as sandbox_env
17
17
  from pydantic_core import to_json
18
18
 
19
+ from inspect_swe._util.trace import trace
20
+
19
21
  from .._util._async import is_callable_coroutine
20
22
  from .install.install import ensure_claude_code_installed
21
23
 
@@ -93,6 +95,8 @@ def claude_code(
93
95
  cmd = [
94
96
  "--print", # run without interactions
95
97
  "--dangerously-skip-permissions",
98
+ "--debug",
99
+ "--verbose",
96
100
  "--model",
97
101
  model,
98
102
  ]
@@ -129,7 +133,8 @@ def claude_code(
129
133
  # resolve sandbox
130
134
  sbox = sandbox_env(sandbox)
131
135
 
132
- # execute the agent
136
+ # execute the agent (track debug output)
137
+ debug_output: list[str] = []
133
138
  agent_prompt = prompt
134
139
  attempt_count = 0
135
140
  while True:
@@ -159,6 +164,10 @@ def claude_code(
159
164
  user=user,
160
165
  )
161
166
 
167
+ # track debug output
168
+ debug_output.append(result.stdout)
169
+ debug_output.append(result.stderr)
170
+
162
171
  # raise for error
163
172
  if not result.success:
164
173
  f"Error executing claude code agent: {result.stdout}\n{result.stderr}"
@@ -188,6 +197,10 @@ def claude_code(
188
197
  else:
189
198
  agent_prompt = attempts.incorrect_message
190
199
 
200
+ # trace debug info
201
+ debug_output.insert(0, "Claude Code Debug Output:")
202
+ trace("\n".join(debug_output))
203
+
191
204
  return bridge.state
192
205
 
193
206
  # return agent with specified name and descritpion
@@ -1,12 +1,11 @@
1
1
  import re
2
- from typing import Literal
2
+ from typing import Callable, Literal
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
6
  from ..._util.checksum import verify_checksum
7
7
  from ..._util.download import download_file, download_text_file
8
8
  from ..._util.sandbox import SandboxPlatform
9
- from ..._util.trace import trace
10
9
  from .cache import (
11
10
  read_cached_claude_code_binary,
12
11
  write_cached_claude_code_binary,
@@ -14,8 +13,13 @@ from .cache import (
14
13
 
15
14
 
16
15
  async def download_claude_code_async(
17
- version: Literal["stable", "latest"] | str, platform: SandboxPlatform
16
+ version: Literal["stable", "latest"] | str,
17
+ platform: SandboxPlatform,
18
+ logger: Callable[[str], None] | None = None,
18
19
  ) -> bytes:
20
+ # resovle logger
21
+ logger = logger or print
22
+
19
23
  # determine version and checksum
20
24
  gcs_bucket = await _claude_code_gcs_bucket()
21
25
  version = await _claude_code_version(gcs_bucket, version)
@@ -35,9 +39,9 @@ async def download_claude_code_async(
35
39
  write_cached_claude_code_binary(binary_data, version, platform)
36
40
 
37
41
  # trace
38
- trace(f"Downloaded claude code binary: {version} ({platform})")
42
+ logger(f"Downloaded claude code binary: {version} ({platform})")
39
43
  else:
40
- trace(f"Used claude code binary from cache: {version} ({platform})")
44
+ logger(f"Used claude code binary from cache: {version} ({platform})")
41
45
 
42
46
  # return data
43
47
  return binary_data
@@ -50,7 +50,9 @@ async def ensure_claude_code_installed(
50
50
 
51
51
  # download the binary
52
52
  if claude_binary_bytes is None:
53
- claude_binary_bytes = await download_claude_code_async(version, platform)
53
+ claude_binary_bytes = await download_claude_code_async(
54
+ version, platform, trace
55
+ )
54
56
 
55
57
  # write it into the container and return it
56
58
  claude_binary = f"/opt/claude-{version}-{platform}"
@@ -0,0 +1,10 @@
1
+ from logging import getLogger
2
+
3
+ from inspect_ai.util import trace_message
4
+
5
+ logger = getLogger(__file__)
6
+
7
+
8
+ def trace(message: str) -> None:
9
+ logger.setLevel("TRACE")
10
+ trace_message(logger, category="Inspect SWE", message=message)
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.2.8'
32
- __version_tuple__ = version_tuple = (0, 2, 8)
31
+ __version__ = version = '0.2.9'
32
+ __version_tuple__ = version_tuple = (0, 2, 9)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,7 +0,0 @@
1
- from logging import getLogger
2
-
3
- logger = getLogger(__file__)
4
-
5
-
6
- def trace(message: str) -> None:
7
- logger.info(f"[Inspect SWE] {message}")
File without changes
File without changes
File without changes
File without changes