inspect-swe 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
inspect_swe/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from ._claude_code.claude_code import ClaudeCodeOptions, claude_code
1
+ from ._claude_code.claude_code import claude_code
2
2
  from ._tools.download import download_agent_binary
3
3
  from ._util.sandbox import SandboxPlatform
4
4
 
@@ -10,7 +10,6 @@ except ImportError:
10
10
 
11
11
  __all__ = [
12
12
  "claude_code",
13
- "ClaudeCodeOptions",
14
13
  "download_agent_binary",
15
14
  "SandboxPlatform",
16
15
  "__version__",
@@ -4,41 +4,20 @@ from typing import Any, Literal, Sequence
4
4
 
5
5
  from inspect_ai.agent import (
6
6
  Agent,
7
+ AgentAttempts,
7
8
  AgentState,
8
9
  agent,
9
10
  agent_with,
10
11
  sandbox_agent_bridge,
11
12
  )
12
13
  from inspect_ai.model import ChatMessageSystem, ChatMessageUser
14
+ from inspect_ai.scorer import score
13
15
  from inspect_ai.tool import MCPServerConfig
14
16
  from inspect_ai.util import sandbox as sandbox_env
15
- from pydantic import BaseModel, Field
16
17
  from pydantic_core import to_json
17
18
 
18
- from inspect_swe._claude_code.install.install import ensure_claude_code_installed
19
-
20
- # TODO: AgentAttempts
21
- # TODO: AgentContinue
22
- # TODO: generate config merging (they are passing max_tokens=32000, temperature=1)
23
-
24
-
25
- class ClaudeCodeOptions(BaseModel):
26
- """Claude Code options."""
27
-
28
- system_prompt: str | None = Field(default=None)
29
- """Additional system prompt to append to default system prompt."""
30
-
31
- mcp_servers: Sequence[MCPServerConfig] | None = Field(default=None)
32
- """MCP servers to make available to the agent."""
33
-
34
- model: str | None = Field(default=None)
35
- """ Model name to use for Opus and Sonnet calls (defaults to main model for task)."""
36
-
37
- small_model: str | None = Field(default=None)
38
- """Model to use for Haiku calls (defaults to main model for task)."""
39
-
40
- env: dict[str, str] | None = Field(default=None)
41
- """Environment variables to set for claude code."""
19
+ from .._util._async import is_callable_coroutine
20
+ from .install.install import ensure_claude_code_installed
42
21
 
43
22
 
44
23
  @agent
@@ -48,7 +27,12 @@ def claude_code(
48
27
  Autonomous coding agent capable of writing, testing, debugging,
49
28
  and iterating on code across multiple languages.
50
29
  """),
51
- options: ClaudeCodeOptions | None = None,
30
+ system_prompt: str | None = None,
31
+ mcp_servers: Sequence[MCPServerConfig] | None = None,
32
+ attempts: int | AgentAttempts = 1,
33
+ model: str | None = None,
34
+ small_model: str | None = None,
35
+ env: dict[str, str] | None = None,
52
36
  version: Literal["auto", "sandbox", "stable", "latest"] | str = "auto",
53
37
  user: str | None = None,
54
38
  sandbox: str | None = None,
@@ -59,10 +43,18 @@ def claude_code(
59
43
 
60
44
  The agent can either use a version of Claude Code installed in the sandbox, or can download a version and install it in the sandbox (see docs on `version` option below for details).
61
45
 
46
+ Use the `attempts` option to enable additional submissions if the initial
47
+ submission(s) are incorrect (by default, no additional attempts are permitted).
48
+
62
49
  Args:
63
50
  name: Agent name (used in multi-agent systems with `as_tool()` and `handoff()`)
64
51
  description: Agent description (used in multi-agent systems with `as_tool()` and `handoff()`)
65
- options: Claude code options.
52
+ system_prompt: Additional system prompt to append to default system prompt.
53
+ mcp_servers: MCP servers to make available to the agent.
54
+ attempts: Configure agent to make multiple attempts.
55
+ model: Model name to use for Opus and Sonnet calls (defaults to main model for task).
56
+ small_model: Model to use for Haiku calls (defaults to main model for task).
57
+ env: Environment variables to set for claude code.
66
58
  version: Version of claude code to use. One of:
67
59
  - "auto": Use any available version of claude code in the sandbox, otherwise download the current stable version.
68
60
  - "sandbox": Use the version of claude code in the sandbox (raises `RuntimeError` if claude is not available in the sandbox)
@@ -72,16 +64,12 @@ def claude_code(
72
64
  user: User to execute claude code with.
73
65
  sandbox: Optional sandbox environment name.
74
66
  """
75
- # provide default options if none specified
76
- options = options or ClaudeCodeOptions()
77
-
78
67
  # resolve models
79
- model = f"inspect/{options.model}" if options.model is not None else "inspect"
80
- small_model = (
81
- f"inspect/{options.small_model}"
82
- if options.small_model is not None
83
- else "inspect"
84
- )
68
+ model = f"inspect/{model}" if model is not None else "inspect"
69
+ small_model = f"inspect/{small_model}" if small_model is not None else "inspect"
70
+
71
+ # resolve attempts
72
+ attempts = AgentAttempts(attempts) if isinstance(attempts, int) else attempts
85
73
 
86
74
  async def execute(state: AgentState) -> AgentState:
87
75
  async with sandbox_agent_bridge(state) as bridge:
@@ -95,9 +83,6 @@ def claude_code(
95
83
 
96
84
  # base options
97
85
  cmd = [
98
- claude_binary,
99
- "--session-id",
100
- session_id,
101
86
  "--print", # run without interactions
102
87
  "--dangerously-skip-permissions",
103
88
  "--model",
@@ -108,50 +93,82 @@ def claude_code(
108
93
  system_messages = [
109
94
  m.text for m in state.messages if isinstance(m, ChatMessageSystem)
110
95
  ]
111
- if options.system_prompt is not None:
112
- system_messages.append(options.system_prompt)
96
+ if system_prompt is not None:
97
+ system_messages.append(system_prompt)
113
98
  if system_messages:
114
99
  cmd.extend(["--append-system-prompt", "\n\n".join(system_messages)])
115
100
 
116
101
  # mcp servers
117
- if options.mcp_servers:
118
- cmd.extend(mcp_server_args(options.mcp_servers))
102
+ if mcp_servers:
103
+ cmd.extend(mcp_server_args(mcp_servers))
119
104
 
120
105
  # user prompt
121
106
  prompt = "\n\n".join(
122
107
  [m.text for m in state.messages if isinstance(m, ChatMessageUser)]
123
108
  )
124
- cmd.append("--")
125
- cmd.append(prompt)
126
109
 
127
110
  # resolve sandbox
128
111
  sbox = sandbox_env(sandbox)
129
112
 
130
113
  # execute the agent
131
- result = await sbox.exec(
132
- cmd=cmd,
133
- env={
134
- "ANTHROPIC_BASE_URL": f"http://localhost:{bridge.port}",
135
- "ANTHROPIC_API_KEY": "sk-ant-api03-DOq5tyLPrk9M4hPE",
136
- "ANTHROPIC_MODEL": model,
137
- "ANTHROPIC_DEFAULT_OPUS_MODEL": model,
138
- "ANTHROPIC_DEFAULT_SONNET_MODEL": model,
139
- "CLAUDE_CODE_SUBAGENT_MODEL": model,
140
- "ANTHROPIC_DEFAULT_HAIKU_MODEL": small_model,
141
- "ANTHROPIC_SMALL_FAST_MODEL": small_model,
142
- "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
143
- "IS_SANDBOX": "1",
144
- }
145
- | (options.env or {}),
146
- user=user,
147
- )
148
-
149
- if result.success:
150
- return bridge.state
151
- else:
152
- raise RuntimeError(
153
- f"Error executing claude code agent: {result.stdout}\n{result.stderr}"
154
- )
114
+ agent_prompt = prompt
115
+ attempt_count = 0
116
+ while True:
117
+ # either starting a new session or resuming one
118
+ id_param = "--session-id" if attempt_count == 0 else "--resume"
119
+ agent_cmd = (
120
+ [claude_binary, id_param, session_id] + cmd + ["--", agent_prompt]
121
+ )
122
+
123
+ # run agent
124
+ result = await sbox.exec(
125
+ cmd=agent_cmd,
126
+ env={
127
+ "ANTHROPIC_BASE_URL": f"http://localhost:{bridge.port}",
128
+ "ANTHROPIC_API_KEY": "sk-ant-api03-DOq5tyLPrk9M4hPE",
129
+ "ANTHROPIC_MODEL": model,
130
+ "ANTHROPIC_DEFAULT_OPUS_MODEL": model,
131
+ "ANTHROPIC_DEFAULT_SONNET_MODEL": model,
132
+ "CLAUDE_CODE_SUBAGENT_MODEL": model,
133
+ "ANTHROPIC_DEFAULT_HAIKU_MODEL": small_model,
134
+ "ANTHROPIC_SMALL_FAST_MODEL": small_model,
135
+ "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1",
136
+ "IS_SANDBOX": "1",
137
+ }
138
+ | (env or {}),
139
+ user=user,
140
+ )
141
+
142
+ # raise for error
143
+ if not result.success:
144
+ f"Error executing claude code agent: {result.stdout}\n{result.stderr}"
145
+
146
+ # exit if we are at max_attempts
147
+ attempt_count += 1
148
+ if attempt_count >= attempts.attempts:
149
+ break
150
+
151
+ # score this attempt
152
+ answer_scores = await score(state)
153
+
154
+ # break if we score 'correct'
155
+ if attempts.score_value(answer_scores[0].value) == 1.0:
156
+ break
157
+
158
+ # otherwise update prompt with incorrect message and continue
159
+ else:
160
+ if callable(attempts.incorrect_message):
161
+ if not is_callable_coroutine(attempts.incorrect_message):
162
+ raise ValueError(
163
+ "The incorrect_message function must be async."
164
+ )
165
+ agent_prompt = await attempts.incorrect_message(
166
+ state, answer_scores
167
+ )
168
+ else:
169
+ agent_prompt = attempts.incorrect_message
170
+
171
+ return bridge.state
155
172
 
156
173
  # return agent with specified name and descritpion
157
174
  return agent_with(execute, name=name, description=description)
@@ -1,5 +1,6 @@
1
1
  import asyncio
2
- from typing import Coroutine, Literal, TypeVar, cast
2
+ import inspect
3
+ from typing import Any, Coroutine, Literal, TypeVar, cast
3
4
 
4
5
  import nest_asyncio # type: ignore
5
6
  import sniffio
@@ -9,6 +10,14 @@ from .platform import running_in_notebook
9
10
  T = TypeVar("T")
10
11
 
11
12
 
13
+ def is_callable_coroutine(func_or_cls: Any) -> bool:
14
+ if inspect.iscoroutinefunction(func_or_cls):
15
+ return True
16
+ elif callable(func_or_cls):
17
+ return inspect.iscoroutinefunction(func_or_cls.__call__)
18
+ return False
19
+
20
+
12
21
  def run_coroutine(coroutine: Coroutine[None, None, T]) -> T:
13
22
  if current_async_backend() == "trio":
14
23
  raise RuntimeError("run_coroutine cannot be used with trio")
inspect_swe/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.2.4'
32
- __version_tuple__ = version_tuple = (0, 2, 4)
31
+ __version__ = version = '0.2.6'
32
+ __version_tuple__ = version_tuple = (0, 2, 6)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_swe
3
- Version: 0.2.4
3
+ Version: 0.2.6
4
4
  Summary: Software engineering agents for Inspect AI.
5
5
  Project-URL: Documentation, https://meridianlabs-ai.github.io/inspect_swe/
6
6
  Project-URL: Source Code, https://github.com/meridianlabs-ai/inspect_swe
@@ -10,7 +10,7 @@ License: MIT License
10
10
  License-File: LICENSE
11
11
  Requires-Python: >=3.10
12
12
  Requires-Dist: httpx
13
- Requires-Dist: inspect-ai>=0.3.125
13
+ Requires-Dist: inspect-ai>=0.3.126
14
14
  Requires-Dist: nest-asyncio
15
15
  Requires-Dist: platformdirs
16
16
  Requires-Dist: pydantic>=2.11.4
@@ -1,9 +1,9 @@
1
- inspect_swe/__init__.py,sha256=aqHkY79cer0TXcw2dy7RRLkGWoTCmCPFprPuCtXR_6k,386
1
+ inspect_swe/__init__.py,sha256=yJ9tBcF2Wy11mVmLh1fTYXgYcsSHv30GAW-tVwE-r3s,342
2
2
  inspect_swe/_registry.py,sha256=jM37ysrY39Ufd67GRKbiwfSViOLlm-82lm_JEaWKshw,97
3
- inspect_swe/_version.py,sha256=NRw4Jle4n9v_DD2wtplRqflGCvX8OU5eAjycYY0vY3Y,704
3
+ inspect_swe/_version.py,sha256=2Q6v117QPuRsVsIEaHT3nJJVx7xxa47FYOkmuhVbGAI,704
4
4
  inspect_swe/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  inspect_swe/_claude_code/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- inspect_swe/_claude_code/claude_code.py,sha256=-E_Ibu_xwIuDGGYgSFKXeBitfny0kINjwu-n-2rQdj8,7114
6
+ inspect_swe/_claude_code/claude_code.py,sha256=bRYPsSYK3G4J5mPbgcpZSSKqWtUnhAy4_3acWDyaORs,8359
7
7
  inspect_swe/_claude_code/install/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  inspect_swe/_claude_code/install/cache.py,sha256=k08bCxGq-iYVpO16LNQhPjxTM9p2iecpqMjqYd2WBss,1708
9
9
  inspect_swe/_claude_code/install/download.py,sha256=s1y4CDHVbJenfsR7OUwwxr5QFp-rDi4XnIxumDEvmws,3217
@@ -11,7 +11,7 @@ inspect_swe/_claude_code/install/install.py,sha256=nbf1SZJzr4DBPfUmBH64zWcdI4AnK
11
11
  inspect_swe/_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  inspect_swe/_tools/download.py,sha256=Jn_gcFR5Kw2vTYA1dWOFYRpqFtoFnKFv2Kv-4xT8tz4,1283
13
13
  inspect_swe/_util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- inspect_swe/_util/_async.py,sha256=cL8_Smmj2Es41TefceGDYLyVaO7gZ56VJcA4oByrWfQ,1520
14
+ inspect_swe/_util/_async.py,sha256=foxHmEaZusCbK8HOBbThZKCnwaPFerwLhQXh7jIafVU,1778
15
15
  inspect_swe/_util/_yaml.py,sha256=sRgf0UryF9Bd7pEEyfzL1qZBCgrpYe0l3l3U7bYeU44,505
16
16
  inspect_swe/_util/appdirs.py,sha256=V3o1ERdSYLjKP-m4O1T_Hvkx0UsP2HdfvsshLSQgP6E,562
17
17
  inspect_swe/_util/checksum.py,sha256=i-_GhtgCFd5eFj3PPJiGSCHDhZdPcIPNwiqddX93Sls,186
@@ -20,8 +20,8 @@ inspect_swe/_util/download.py,sha256=cCUau4ZBOKezpotJV5-v3JY_5CuYDZ-VcWlLf_EyNL0
20
20
  inspect_swe/_util/platform.py,sha256=wm4efIFfdyTeaV2oxOXVvYl1u22MHX3jQMERHJMgv7A,339
21
21
  inspect_swe/_util/sandbox.py,sha256=2wYmVz5EGUDBhqbN3NgLAOsyKeU-KRI161MZMJ54n4M,1769
22
22
  inspect_swe/_util/trace.py,sha256=mFHmBKn2F8iJP9PpTHaCseMHnTMz3ErRx6RCKV83rZk,139
23
- inspect_swe-0.2.4.dist-info/METADATA,sha256=wxryGFAjtZarLk41tmkyAGPVuiIh5OWFQq1QylHw0VM,1724
24
- inspect_swe-0.2.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
- inspect_swe-0.2.4.dist-info/entry_points.txt,sha256=OzpvUhd7M3T2Rog4MjwJAxIKeX5ljiR0mVYM9GefBKg,49
26
- inspect_swe-0.2.4.dist-info/licenses/LICENSE,sha256=Hi3UDcbD6yCKZ1mcgt7pprzSG0rDEnSrbrm3XinyiDA,1070
27
- inspect_swe-0.2.4.dist-info/RECORD,,
23
+ inspect_swe-0.2.6.dist-info/METADATA,sha256=SJwbJADMfKoOBkiQ0_RST4iR1hjoHNM0nrn-kN_kU_o,1724
24
+ inspect_swe-0.2.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
+ inspect_swe-0.2.6.dist-info/entry_points.txt,sha256=OzpvUhd7M3T2Rog4MjwJAxIKeX5ljiR0mVYM9GefBKg,49
26
+ inspect_swe-0.2.6.dist-info/licenses/LICENSE,sha256=Hi3UDcbD6yCKZ1mcgt7pprzSG0rDEnSrbrm3XinyiDA,1070
27
+ inspect_swe-0.2.6.dist-info/RECORD,,