PyPI - inspect-swe - Versions diffs - 0.2.7__tar.gz → 0.2.9__tar.gz - Mend

inspect-swe 0.2.7tar.gz → 0.2.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{inspect_swe-0.2.7 → inspect_swe-0.2.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inspect_swe
-Version: 0.2.7
+Version: 0.2.9
 Summary: Software engineering agents for Inspect AI.
 Project-URL: Documentation, https://meridianlabs-ai.github.io/inspect_swe/
 Project-URL: Source Code, https://github.com/meridianlabs-ai/inspect_swe
@@ -10,7 +10,7 @@ License: MIT License
 License-File: LICENSE
 Requires-Python: >=3.10
 Requires-Dist: httpx
-Requires-Dist: inspect-ai>=0.3.126
+Requires-Dist: inspect-ai>=0.3.128
 Requires-Dist: nest-asyncio
 Requires-Dist: platformdirs
 Requires-Dist: pydantic>=2.11.4

{inspect_swe-0.2.7 → inspect_swe-0.2.9}/pyproject.toml RENAMED Viewed

@@ -12,7 +12,7 @@ requires-python = ">=3.10"
 license = { text = "MIT License" }
 dependencies = [
     "httpx",
-    "inspect_ai>=0.3.126",
+    "inspect_ai>=0.3.128",
     "nest_asyncio",
     "platformdirs",
     "pydantic>=2.11.4",

{inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/claude_code.py RENAMED Viewed

@@ -16,6 +16,8 @@ from inspect_ai.tool import MCPServerConfig
 from inspect_ai.util import sandbox as sandbox_env
 from pydantic_core import to_json
+from inspect_swe._util.trace import trace
 from .._util._async import is_callable_coroutine
 from .install.install import ensure_claude_code_installed
@@ -34,10 +36,11 @@ def claude_code(
     attempts: int | AgentAttempts = 1,
     model: str | None = None,
     small_model: str | None = None,
+    cwd: str | None = None,
     env: dict[str, str] | None = None,
-    version: Literal["auto", "sandbox", "stable", "latest"] | str = "auto",
     user: str | None = None,
     sandbox: str | None = None,
+    version: Literal["auto", "sandbox", "stable", "latest"] | str = "auto",
 ) -> Agent:
     """Claude Code agent.
@@ -60,15 +63,16 @@ def claude_code(
         attempts: Configure agent to make multiple attempts.
         model: Model name to use for Opus and Sonnet calls (defaults to main model for task).
         small_model: Model to use for Haiku calls (defaults to main model for task).
+        cwd: Working directory to run claude code within.
         env: Environment variables to set for claude code.
+        user: User to execute claude code with.
+        sandbox: Optional sandbox environment name.
         version: Version of claude code to use. One of:
             - "auto": Use any available version of claude code in the sandbox, otherwise download the current stable version.
             - "sandbox": Use the version of claude code in the sandbox (raises `RuntimeError` if claude is not available in the sandbox)
             - "stable": Download and use the current stable version of claude code.
             - "latest": Download and use the very latest version of claude code.
             - "x.x.x": Download and use a specific version of claude code.
-        user: User to execute claude code with.
-        sandbox: Optional sandbox environment name.
     """
     # resolve models
     model = f"inspect/{model}" if model is not None else "inspect"
@@ -91,6 +95,8 @@ def claude_code(
             cmd = [
                 "--print",  # run without interactions
                 "--dangerously-skip-permissions",
+                "--debug",
+                "--verbose",
                 "--model",
                 model,
             ]
@@ -127,7 +133,8 @@ def claude_code(
             # resolve sandbox
             sbox = sandbox_env(sandbox)
-            # execute the agent
+            # execute the agent (track debug output)
+            debug_output: list[str] = []
             agent_prompt = prompt
             attempt_count = 0
             while True:
@@ -140,6 +147,7 @@ def claude_code(
                 # run agent
                 result = await sbox.exec(
                     cmd=agent_cmd,
+                    cwd=cwd,
                     env={
                         "ANTHROPIC_BASE_URL": f"http://localhost:{bridge.port}",
                         "ANTHROPIC_API_KEY": "sk-ant-api03-DOq5tyLPrk9M4hPE",
@@ -156,6 +164,10 @@ def claude_code(
                     user=user,
                 )
+                # track debug output
+                debug_output.append(result.stdout)
+                debug_output.append(result.stderr)
                 # raise for error
                 if not result.success:
                     f"Error executing claude code agent: {result.stdout}\n{result.stderr}"
@@ -185,6 +197,10 @@ def claude_code(
                     else:
                         agent_prompt = attempts.incorrect_message
+            # trace debug info
+            debug_output.insert(0, "Claude Code Debug Output:")
+            trace("\n".join(debug_output))
         return bridge.state
     # return agent with specified name and descritpion

{inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/download.py RENAMED Viewed

@@ -1,12 +1,11 @@
 import re
-from typing import Literal
+from typing import Callable, Literal
 from pydantic import BaseModel
 from ..._util.checksum import verify_checksum
 from ..._util.download import download_file, download_text_file
 from ..._util.sandbox import SandboxPlatform
-from ..._util.trace import trace
 from .cache import (
     read_cached_claude_code_binary,
     write_cached_claude_code_binary,
@@ -14,8 +13,13 @@ from .cache import (
 async def download_claude_code_async(
-    version: Literal["stable", "latest"] | str, platform: SandboxPlatform
+    version: Literal["stable", "latest"] | str,
+    platform: SandboxPlatform,
+    logger: Callable[[str], None] | None = None,
 ) -> bytes:
+    # resovle logger
+    logger = logger or print
     # determine version and checksum
     gcs_bucket = await _claude_code_gcs_bucket()
     version = await _claude_code_version(gcs_bucket, version)
@@ -35,9 +39,9 @@ async def download_claude_code_async(
         write_cached_claude_code_binary(binary_data, version, platform)
         # trace
-        trace(f"Downloaded claude code binary: {version} ({platform})")
+        logger(f"Downloaded claude code binary: {version} ({platform})")
     else:
-        trace(f"Used claude code binary from cache: {version} ({platform})")
+        logger(f"Used claude code binary from cache: {version} ({platform})")
     # return data
     return binary_data

{inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_claude_code/install/install.py RENAMED Viewed

@@ -50,7 +50,9 @@ async def ensure_claude_code_installed(
         # download the binary
         if claude_binary_bytes is None:
-            claude_binary_bytes = await download_claude_code_async(version, platform)
+            claude_binary_bytes = await download_claude_code_async(
+                version, platform, trace
+            )
         # write it into the container and return it
         claude_binary = f"/opt/claude-{version}-{platform}"

inspect_swe-0.2.9/src/inspect_swe/_util/trace.py ADDED Viewed

@@ -0,0 +1,10 @@
+from logging import getLogger
+from inspect_ai.util import trace_message
+logger = getLogger(__file__)
+def trace(message: str) -> None:
+    logger.setLevel("TRACE")
+    trace_message(logger, category="Inspect SWE", message=message)

{inspect_swe-0.2.7 → inspect_swe-0.2.9}/src/inspect_swe/_version.py RENAMED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.2.7'
-__version_tuple__ = version_tuple = (0, 2, 7)
+__version__ = version = '0.2.9'
+__version_tuple__ = version_tuple = (0, 2, 9)
 __commit_id__ = commit_id = None

inspect_swe-0.2.7/src/inspect_swe/_util/trace.py DELETED Viewed

@@ -1,7 +0,0 @@
-from logging import getLogger
-logger = getLogger(__file__)
-def trace(message: str) -> None:
-    logger.info(f"[Inspect SWE] {message}")