inspect-swe 0.2.6__tar.gz → 0.2.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/PKG-INFO +2 -2
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/pyproject.toml +1 -1
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_claude_code/claude_code.py +31 -12
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_version.py +2 -2
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/.gitignore +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/LICENSE +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/README.md +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/__init__.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_claude_code/__init__.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_claude_code/install/__init__.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_claude_code/install/cache.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_claude_code/install/download.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_claude_code/install/install.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_registry.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_tools/__init__.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_tools/download.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_util/__init__.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_util/_async.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_util/_yaml.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_util/appdirs.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_util/checksum.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_util/constants.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_util/download.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_util/platform.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_util/sandbox.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/_util/trace.py +0 -0
- {inspect_swe-0.2.6 → inspect_swe-0.2.8}/src/inspect_swe/py.typed +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: inspect_swe
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.8
|
4
4
|
Summary: Software engineering agents for Inspect AI.
|
5
5
|
Project-URL: Documentation, https://meridianlabs-ai.github.io/inspect_swe/
|
6
6
|
Project-URL: Source Code, https://github.com/meridianlabs-ai/inspect_swe
|
@@ -10,7 +10,7 @@ License: MIT License
|
|
10
10
|
License-File: LICENSE
|
11
11
|
Requires-Python: >=3.10
|
12
12
|
Requires-Dist: httpx
|
13
|
-
Requires-Dist: inspect-ai>=0.3.
|
13
|
+
Requires-Dist: inspect-ai>=0.3.128
|
14
14
|
Requires-Dist: nest-asyncio
|
15
15
|
Requires-Dist: platformdirs
|
16
16
|
Requires-Dist: pydantic>=2.11.4
|
@@ -29,13 +29,16 @@ def claude_code(
|
|
29
29
|
"""),
|
30
30
|
system_prompt: str | None = None,
|
31
31
|
mcp_servers: Sequence[MCPServerConfig] | None = None,
|
32
|
+
allowed_tools: list[str] | None = None,
|
33
|
+
disallowed_tools: list[str] | None = None,
|
32
34
|
attempts: int | AgentAttempts = 1,
|
33
35
|
model: str | None = None,
|
34
36
|
small_model: str | None = None,
|
37
|
+
cwd: str | None = None,
|
35
38
|
env: dict[str, str] | None = None,
|
36
|
-
version: Literal["auto", "sandbox", "stable", "latest"] | str = "auto",
|
37
39
|
user: str | None = None,
|
38
40
|
sandbox: str | None = None,
|
41
|
+
version: Literal["auto", "sandbox", "stable", "latest"] | str = "auto",
|
39
42
|
) -> Agent:
|
40
43
|
"""Claude Code agent.
|
41
44
|
|
@@ -43,6 +46,8 @@ def claude_code(
|
|
43
46
|
|
44
47
|
The agent can either use a version of Claude Code installed in the sandbox, or can download a version and install it in the sandbox (see docs on `version` option below for details).
|
45
48
|
|
49
|
+
Use `allowed_tools` and `disallowed_tools` to control access to tools. See [Tools available to Claude](https://docs.anthropic.com/en/docs/claude-code/settings#tools-available-to-claude) for the list of built-in tools and [How to use Allowed Tools in Claude Code](https://www.instructa.ai/blog/claude-code/how-to-use-allowed-tools-in-claude-code) for details on the supported syntax. Note that `allowed_tools` enables you to filter allowed parameter values and `disallowed_tools` enables you to remove tools entirely. In other words, `allowed_tools` is not a complete list of what tools are available but rather just filters on tool parameters---to remove tools you need to explicitly set `disallowed_tools`.
|
50
|
+
|
46
51
|
Use the `attempts` option to enable additional submissions if the initial
|
47
52
|
submission(s) are incorrect (by default, no additional attempts are permitted).
|
48
53
|
|
@@ -51,18 +56,21 @@ def claude_code(
|
|
51
56
|
description: Agent description (used in multi-agent systems with `as_tool()` and `handoff()`)
|
52
57
|
system_prompt: Additional system prompt to append to default system prompt.
|
53
58
|
mcp_servers: MCP servers to make available to the agent.
|
59
|
+
allowed_tools: Parameter filters for built-in tools.
|
60
|
+
disallowed_tools: List of tool names to disallow entirely.
|
54
61
|
attempts: Configure agent to make multiple attempts.
|
55
62
|
model: Model name to use for Opus and Sonnet calls (defaults to main model for task).
|
56
63
|
small_model: Model to use for Haiku calls (defaults to main model for task).
|
64
|
+
cwd: Working directory to run claude code within.
|
57
65
|
env: Environment variables to set for claude code.
|
66
|
+
user: User to execute claude code with.
|
67
|
+
sandbox: Optional sandbox environment name.
|
58
68
|
version: Version of claude code to use. One of:
|
59
69
|
- "auto": Use any available version of claude code in the sandbox, otherwise download the current stable version.
|
60
70
|
- "sandbox": Use the version of claude code in the sandbox (raises `RuntimeError` if claude is not available in the sandbox)
|
61
71
|
- "stable": Download and use the current stable version of claude code.
|
62
72
|
- "latest": Download and use the very latest version of claude code.
|
63
73
|
- "x.x.x": Download and use a specific version of claude code.
|
64
|
-
user: User to execute claude code with.
|
65
|
-
sandbox: Optional sandbox environment name.
|
66
74
|
"""
|
67
75
|
# resolve models
|
68
76
|
model = f"inspect/{model}" if model is not None else "inspect"
|
@@ -99,8 +107,19 @@ def claude_code(
|
|
99
107
|
cmd.extend(["--append-system-prompt", "\n\n".join(system_messages)])
|
100
108
|
|
101
109
|
# mcp servers
|
110
|
+
cmd_allowed_tools = allowed_tools or []
|
102
111
|
if mcp_servers:
|
103
|
-
|
112
|
+
mcp_server_args, mcp_allowed_tools = resolve_mcp_servers(mcp_servers)
|
113
|
+
cmd.extend(mcp_server_args)
|
114
|
+
cmd_allowed_tools.extend(mcp_allowed_tools)
|
115
|
+
|
116
|
+
# add allowed and disallowed tools
|
117
|
+
if len(cmd_allowed_tools) > 0:
|
118
|
+
cmd.append("--allowed-tools")
|
119
|
+
cmd.append(",".join(cmd_allowed_tools))
|
120
|
+
if disallowed_tools is not None and len(disallowed_tools) > 0:
|
121
|
+
cmd.append("--disallowed-tools")
|
122
|
+
cmd.append(",".join(disallowed_tools))
|
104
123
|
|
105
124
|
# user prompt
|
106
125
|
prompt = "\n\n".join(
|
@@ -123,6 +142,7 @@ def claude_code(
|
|
123
142
|
# run agent
|
124
143
|
result = await sbox.exec(
|
125
144
|
cmd=agent_cmd,
|
145
|
+
cwd=cwd,
|
126
146
|
env={
|
127
147
|
"ANTHROPIC_BASE_URL": f"http://localhost:{bridge.port}",
|
128
148
|
"ANTHROPIC_API_KEY": "sk-ant-api03-DOq5tyLPrk9M4hPE",
|
@@ -174,7 +194,9 @@ def claude_code(
|
|
174
194
|
return agent_with(execute, name=name, description=description)
|
175
195
|
|
176
196
|
|
177
|
-
def
|
197
|
+
def resolve_mcp_servers(
|
198
|
+
mcp_servers: Sequence[MCPServerConfig],
|
199
|
+
) -> tuple[list[str], list[str]]:
|
178
200
|
# build servers and allowed tools
|
179
201
|
mcp_servers_json: dict[str, dict[str, Any]] = {}
|
180
202
|
allowed_tools: list[str] = []
|
@@ -194,14 +216,11 @@ def mcp_server_args(mcp_servers: Sequence[MCPServerConfig]) -> list[str]:
|
|
194
216
|
)
|
195
217
|
|
196
218
|
# map to cli args
|
197
|
-
|
219
|
+
mcp_config_cmds: list[str] = []
|
198
220
|
if len(mcp_servers_json) > 0:
|
199
|
-
|
200
|
-
|
221
|
+
mcp_config_cmds.append("--mcp-config")
|
222
|
+
mcp_config_cmds.append(
|
201
223
|
to_json({"mcpServers": mcp_servers_json}, exclude_none=True).decode()
|
202
224
|
)
|
203
|
-
if len(allowed_tools):
|
204
|
-
cmds.append("--allowed-tools")
|
205
|
-
cmds.append(",".join(allowed_tools))
|
206
225
|
|
207
|
-
return
|
226
|
+
return mcp_config_cmds, allowed_tools
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
28
28
|
commit_id: COMMIT_ID
|
29
29
|
__commit_id__: COMMIT_ID
|
30
30
|
|
31
|
-
__version__ = version = '0.2.
|
32
|
-
__version_tuple__ = version_tuple = (0, 2,
|
31
|
+
__version__ = version = '0.2.8'
|
32
|
+
__version_tuple__ = version_tuple = (0, 2, 8)
|
33
33
|
|
34
34
|
__commit_id__ = commit_id = None
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|