inspect-swe 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ from ._claude_code.claude_code import claude_code
2
+
3
+ try:
4
+ from ._version import __version__
5
+ except ImportError:
6
+ __version__ = "unknown"
7
+
8
+
9
+ __all__ = ["claude_code", "__version__"]
File without changes
@@ -0,0 +1,9 @@
1
+ from inspect_ai.agent import Agent, AgentState, agent
2
+
3
+
4
+ @agent
5
+ def claude_code() -> Agent:
6
+ async def execute(state: AgentState) -> AgentState:
7
+ raise RuntimeError("claude_code() not yet implemented.")
8
+
9
+ return execute
@@ -0,0 +1,341 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import hashlib
4
+ import json
5
+ import os
6
+ import re
7
+ import subprocess
8
+ import tempfile
9
+ import urllib.request
10
+ from pathlib import Path
11
+ from typing import Optional, cast
12
+
13
+ # Constants
14
+ INSTALL_SCRIPT_URL = "https://claude.ai/install.sh"
15
+ CACHE_DIR = Path.home() / ".claude" / "downloads"
16
+ # Fallback GCS bucket in case we can't fetch from install.sh
17
+ FALLBACK_GCS_BUCKET = "https://storage.googleapis.com/claude-code-dist-86c565f3-f756-42ad-8dfa-d59b1c096819/claude-code-releases"
18
+
19
+
20
+ def run_docker_exec(container_name: str, command: str) -> str:
21
+ """Execute a command in the Docker container and return output."""
22
+ cmd = ["docker", "exec", container_name, "bash", "-c", command]
23
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
24
+ return result.stdout.strip()
25
+
26
+
27
+ def detect_platform(container_name: str) -> str:
28
+ """Detect the platform (OS and architecture) of the container."""
29
+ # Get OS
30
+ os_name = run_docker_exec(container_name, "uname -s")
31
+ if os_name == "Darwin":
32
+ os_type = "darwin"
33
+ elif os_name == "Linux":
34
+ os_type = "linux"
35
+ else:
36
+ raise ValueError(f"Unsupported OS: {os_name}")
37
+
38
+ # Get architecture
39
+ arch = run_docker_exec(container_name, "uname -m")
40
+ if arch in ["x86_64", "amd64"]:
41
+ arch_type = "x64"
42
+ elif arch in ["arm64", "aarch64"]:
43
+ arch_type = "arm64"
44
+ else:
45
+ raise ValueError(f"Unsupported architecture: {arch}")
46
+
47
+ # Check for musl on Linux
48
+ if os_type == "linux":
49
+ # Check for musl libc
50
+ musl_check_cmd = (
51
+ "if [ -f /lib/libc.musl-x86_64.so.1 ] || "
52
+ "[ -f /lib/libc.musl-aarch64.so.1 ] || "
53
+ "ldd /bin/ls 2>&1 | grep -q musl; then "
54
+ "echo 'musl'; else echo 'glibc'; fi"
55
+ )
56
+ libc_type = run_docker_exec(container_name, musl_check_cmd)
57
+ if libc_type == "musl":
58
+ platform = f"linux-{arch_type}-musl"
59
+ else:
60
+ platform = f"linux-{arch_type}"
61
+ else:
62
+ platform = f"{os_type}-{arch_type}"
63
+
64
+ return platform
65
+
66
+
67
+ def download_file(url: str) -> bytes:
68
+ """Download a file from the given URL and return its contents."""
69
+ with urllib.request.urlopen(url) as response:
70
+ return cast(bytes, response.read())
71
+
72
+
73
+ def get_gcs_bucket_from_install_script() -> str:
74
+ """Fetch the install.sh script and extract the GCS_BUCKET URL.
75
+
76
+ Falls back to hardcoded URL if extraction fails.
77
+ """
78
+ try:
79
+ print("Fetching install script to discover GCS bucket...")
80
+ script_content = download_file(INSTALL_SCRIPT_URL).decode("utf-8")
81
+
82
+ # Look for GCS_BUCKET= line in the script
83
+ # Pattern matches: GCS_BUCKET="https://storage.googleapis.com/..."
84
+ pattern = r'GCS_BUCKET="(https://storage\.googleapis\.com/[^"]+)"'
85
+ match = re.search(pattern, script_content)
86
+
87
+ if match:
88
+ gcs_bucket = match.group(1)
89
+ print(f"Discovered GCS bucket: {gcs_bucket}")
90
+ return gcs_bucket
91
+ else:
92
+ print("Could not extract GCS bucket from install script, using fallback")
93
+ return FALLBACK_GCS_BUCKET
94
+
95
+ except Exception as e:
96
+ print(f"Error fetching install script: {e}, using fallback")
97
+ return FALLBACK_GCS_BUCKET
98
+
99
+
100
+ def validate_target(target: str) -> bool:
101
+ """Validate the target parameter format."""
102
+ pattern = r"^(stable|latest|[0-9]+\.[0-9]+\.[0-9]+(-[^[:space:]]+)?)$"
103
+ return bool(re.match(pattern, target))
104
+
105
+
106
+ def get_version(gcs_bucket: str, target: str = "stable") -> str:
107
+ """Get the actual version to install based on the target."""
108
+ if not validate_target(target):
109
+ raise ValueError(f"Invalid target: {target}")
110
+
111
+ # Always download stable version first (it has the most up-to-date installer)
112
+ stable_url = f"{gcs_bucket}/stable"
113
+ stable_version = download_file(stable_url).decode("utf-8").strip()
114
+
115
+ if target == "stable" or target == stable_version:
116
+ return stable_version
117
+ elif target == "latest":
118
+ # For latest, we'd need to check the latest version
119
+ # For now, we'll use stable as the implementation
120
+ return stable_version
121
+ else:
122
+ # Specific version requested
123
+ return target
124
+
125
+
126
+ def get_checksum_from_manifest(manifest_json: str, platform: str) -> str:
127
+ """Extract the checksum for the given platform from the manifest."""
128
+ manifest = json.loads(manifest_json)
129
+
130
+ if "platforms" not in manifest:
131
+ raise ValueError("Invalid manifest: missing platforms")
132
+
133
+ if platform not in manifest["platforms"]:
134
+ raise ValueError(f"Platform {platform} not found in manifest")
135
+
136
+ checksum = manifest["platforms"][platform].get("checksum")
137
+
138
+ if not checksum or not re.match(r"^[a-f0-9]{64}$", checksum):
139
+ raise ValueError(f"Invalid checksum for platform {platform}")
140
+
141
+ return str(checksum)
142
+
143
+
144
+ def verify_checksum(data: bytes, expected_checksum: str) -> bool:
145
+ """Verify the SHA256 checksum of the data."""
146
+ actual_checksum = hashlib.sha256(data).hexdigest()
147
+ return actual_checksum == expected_checksum
148
+
149
+
150
+ def get_cached_binary_path(version: str, platform: str) -> Path:
151
+ """Get the path where a binary would be cached."""
152
+ return CACHE_DIR / f"claude-{version}-{platform}"
153
+
154
+
155
+ def get_cached_binary(
156
+ version: str, platform: str, expected_checksum: str
157
+ ) -> Optional[bytes]:
158
+ """
159
+ Check if we have a cached binary and verify its checksum.
160
+
161
+ Returns the binary data if valid, None otherwise.
162
+ """
163
+ cache_path = get_cached_binary_path(version, platform)
164
+
165
+ if not cache_path.exists():
166
+ return None
167
+
168
+ try:
169
+ with open(cache_path, "rb") as f:
170
+ binary_data = f.read()
171
+
172
+ # Verify the cached binary still has the correct checksum
173
+ if verify_checksum(binary_data, expected_checksum):
174
+ # Update access time so this file is considered "recently used"
175
+ cache_path.touch()
176
+ print(f"Using cached binary from {cache_path}")
177
+ return binary_data
178
+ else:
179
+ print("Cached binary checksum mismatch, will re-download")
180
+ cache_path.unlink() # Remove invalid cache file
181
+ return None
182
+ except Exception as e:
183
+ print(f"Error reading cached binary: {e}")
184
+ return None
185
+
186
+
187
+ def cleanup_old_cache_files(keep_count: int = 3) -> None:
188
+ """
189
+ Remove old cached binaries, keeping only the most recent ones.
190
+
191
+ Keeps the specified number of most recently accessed files.
192
+ """
193
+ if not CACHE_DIR.exists():
194
+ return
195
+
196
+ # Get all claude binary files in cache
197
+ cache_files = list(CACHE_DIR.glob("claude-*"))
198
+
199
+ if len(cache_files) <= keep_count:
200
+ return # Nothing to clean up
201
+
202
+ # Sort by access time (most recently accessed last)
203
+ cache_files.sort(key=lambda f: f.stat().st_atime)
204
+
205
+ # Remove oldest files
206
+ files_to_remove = cache_files[:-keep_count]
207
+ for file_path in files_to_remove:
208
+ try:
209
+ file_size_mb = file_path.stat().st_size / (1024 * 1024)
210
+ file_path.unlink()
211
+ print(f"Removed old cache file: {file_path.name} ({file_size_mb:.1f} MB)")
212
+ except Exception as e:
213
+ print(f"Error removing cache file {file_path}: {e}")
214
+
215
+
216
+ def save_to_cache(binary_data: bytes, version: str, platform: str) -> None:
217
+ """Save a binary to the cache directory and clean up old files."""
218
+ CACHE_DIR.mkdir(parents=True, exist_ok=True)
219
+ cache_path = get_cached_binary_path(version, platform)
220
+
221
+ with open(cache_path, "wb") as f:
222
+ f.write(binary_data)
223
+
224
+ print(f"Saved binary to cache: {cache_path}")
225
+
226
+ # Clean up old cache files, keeping only the 3 most recent
227
+ cleanup_old_cache_files(keep_count=3)
228
+
229
+
230
+ def transfer_binary(container_name: str, binary_data: bytes, target_path: str) -> None:
231
+ """Transfer binary data to the container."""
232
+ # Use a temporary file and docker cp
233
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
234
+ tmp_file.write(binary_data)
235
+ tmp_file_path = tmp_file.name
236
+
237
+ try:
238
+ # Copy file to container
239
+ subprocess.run(
240
+ ["docker", "cp", tmp_file_path, f"{container_name}:{target_path}"],
241
+ check=True,
242
+ )
243
+ finally:
244
+ # Clean up temporary file
245
+ os.unlink(tmp_file_path)
246
+
247
+
248
+ def install_claude(container_name: str, binary_path: str) -> None:
249
+ """Install claude binary and verify it works."""
250
+ # Copy binary to /usr/local/bin for system-wide access
251
+ run_docker_exec(container_name, f"cp {binary_path} /usr/local/bin/claude")
252
+ run_docker_exec(container_name, "chmod +x /usr/local/bin/claude")
253
+
254
+ # Clean up the temporary binary
255
+ run_docker_exec(container_name, f"rm -f {binary_path}")
256
+
257
+ # Verify installation and initialize config
258
+ try:
259
+ # Check version
260
+ version_output = run_docker_exec(container_name, "claude --version")
261
+ print(f"Claude installed successfully: {version_output}")
262
+
263
+ # Initialize config files/directories by running config list
264
+ run_docker_exec(container_name, "claude config list")
265
+ print("Claude configuration initialized")
266
+
267
+ except subprocess.CalledProcessError as e:
268
+ print(f"Warning: Could not verify claude installation: {e}")
269
+ raise ValueError("Claude installation verification failed") from e
270
+
271
+
272
+ def main(container_name: str, target: str = "stable") -> None:
273
+ """Main function to orchestrate the Claude installation."""
274
+ print(f"Installing Claude Code in container: {container_name}")
275
+ print(f"Target: {target}")
276
+
277
+ # Step 0: Get GCS bucket URL
278
+ gcs_bucket = get_gcs_bucket_from_install_script()
279
+
280
+ # Step 1: Detect platform
281
+ print("Detecting platform...")
282
+ platform = detect_platform(container_name)
283
+ print(f"Platform: {platform}")
284
+
285
+ # Step 2: Get version
286
+ print("Determining version...")
287
+ version = get_version(gcs_bucket, target)
288
+ print(f"Version: {version}")
289
+
290
+ # Step 3: Download and parse manifest
291
+ print("Downloading manifest...")
292
+ manifest_url = f"{gcs_bucket}/{version}/manifest.json"
293
+ manifest_json = download_file(manifest_url).decode("utf-8")
294
+
295
+ # Step 4: Get checksum for platform
296
+ print("Extracting checksum...")
297
+ expected_checksum = get_checksum_from_manifest(manifest_json, platform)
298
+
299
+ # Step 5: Check cache or download binary
300
+ binary_data = get_cached_binary(version, platform, expected_checksum)
301
+
302
+ if binary_data is None:
303
+ # Not in cache or invalid, need to download
304
+ print(f"Downloading Claude binary for {platform}...")
305
+ binary_url = f"{gcs_bucket}/{version}/{platform}/claude"
306
+ binary_data = download_file(binary_url)
307
+
308
+ # Step 6: Verify checksum
309
+ print("Verifying checksum...")
310
+ if not verify_checksum(binary_data, expected_checksum):
311
+ raise ValueError("Checksum verification failed")
312
+ print("Checksum verified successfully")
313
+
314
+ # Save to cache for future use
315
+ save_to_cache(binary_data, version, platform)
316
+ else:
317
+ print("Checksum already verified for cached binary")
318
+
319
+ # Step 7: Transfer binary to container
320
+ print("Transferring binary to container...")
321
+ binary_path = f"/tmp/claude-{version}-{platform}"
322
+ transfer_binary(container_name, binary_data, binary_path)
323
+
324
+ # Step 8: Install
325
+ print("Installing Claude Code...")
326
+ install_claude(container_name, binary_path)
327
+
328
+ print("\n✅ Installation complete!")
329
+
330
+
331
+ if __name__ == "__main__":
332
+ # Test code - replace with your actual container name
333
+ test_container = "inspect-intervention-izedw74-default-1"
334
+
335
+ # You can test with different targets
336
+ # main(test_container, "stable")
337
+ # main(test_container, "latest")
338
+ # main(test_container, "1.0.0")
339
+
340
+ # Default test
341
+ main(test_container, "stable")
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.1.5'
32
+ __version_tuple__ = version_tuple = (0, 1, 5)
33
+
34
+ __commit_id__ = commit_id = None
inspect_swe/py.typed ADDED
File without changes
@@ -0,0 +1,50 @@
1
+ Metadata-Version: 2.4
2
+ Name: inspect_swe
3
+ Version: 0.1.5
4
+ Summary: Software engineering agents for Inspect AI.
5
+ Project-URL: Documentation, https://meridianlabs-ai.github.io/inspect_swe/
6
+ Project-URL: Source Code, https://github.com/meridianlabs-ai/inspect_swe
7
+ Project-URL: Issue Tracker, https://github.com/meridianlabs-ai/inspect_swe/issues
8
+ Author: Meridian Labs
9
+ License: MIT License
10
+ License-File: LICENSE
11
+ Requires-Python: >=3.10
12
+ Requires-Dist: inspect-ai
13
+ Requires-Dist: typing-extensions>=4.9.0
14
+ Provides-Extra: dev
15
+ Requires-Dist: mypy; extra == 'dev'
16
+ Requires-Dist: pytest; extra == 'dev'
17
+ Requires-Dist: ruff; extra == 'dev'
18
+ Provides-Extra: doc
19
+ Requires-Dist: quarto-cli==1.7.31; extra == 'doc'
20
+ Description-Content-Type: text/markdown
21
+
22
+ Welcome to Inspect SWE, a suite of software engineering agents for [Inspect AI](https://inspect.aisi.org.uk/).
23
+
24
+
25
+ ## Installation
26
+
27
+ Latest development version:
28
+
29
+ ```bash
30
+ pip install git+https://github.com/meridianlabs-ai/inspect_swe
31
+ ```
32
+
33
+ ## Development
34
+
35
+ To work on development of Inspect SWE, clone the repository and install with the `-e` flag and `[dev]` optional dependencies:
36
+
37
+ ```bash
38
+ git clone https://github.com/meridianlabs-ai/inspect_swe
39
+ cd inspect_swe
40
+ pip install -e ".[dev]"
41
+ ```
42
+
43
+ Run linting, formatting, and tests via
44
+
45
+ ```bash
46
+ make check
47
+ make test
48
+ ```
49
+
50
+
@@ -0,0 +1,10 @@
1
+ inspect_swe/__init__.py,sha256=6F52dddUoPvJA7RugtyDaswUqliDGgeaTK_OXWplvI0,185
2
+ inspect_swe/_version.py,sha256=rdxBMYpwzYxiWk08QbPLHSAxHoDfeKWwyaJIAM0lSic,704
3
+ inspect_swe/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ inspect_swe/_claude_code/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ inspect_swe/_claude_code/claude_code.py,sha256=-WVHfA-kRDyEwCMeUlDR6A5fWSBcJGGV5o_np0Y6MnY,232
6
+ inspect_swe/_claude_code/install_claude.py,sha256=g5nHIY-JVKDQFgm0IIhpCsCX5B6MadYj8-CtKpKU4YE,11796
7
+ inspect_swe-0.1.5.dist-info/METADATA,sha256=5JqMYTi_E31VZmxOHqHICbc5Nhwq9tkhEZ8l2NtYQ14,1296
8
+ inspect_swe-0.1.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
9
+ inspect_swe-0.1.5.dist-info/licenses/LICENSE,sha256=Hi3UDcbD6yCKZ1mcgt7pprzSG0rDEnSrbrm3XinyiDA,1070
10
+ inspect_swe-0.1.5.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Meridian Labs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.