PyPI - inspect-ai - Versions diffs - 0.3.82__py3-none-any.whl → 0.3.83__py3-none-any.whl - Mend

inspect-ai 0.3.82py3-none-any.whl → 0.3.83py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

inspect_ai/__init__.py +2 -1
inspect_ai/_display/textual/app.py +14 -3
inspect_ai/_display/textual/display.py +4 -0
inspect_ai/_display/textual/widgets/samples.py +9 -3
inspect_ai/_display/textual/widgets/task_detail.py +3 -4
inspect_ai/_display/textual/widgets/tasks.py +17 -1
inspect_ai/_display/textual/widgets/vscode.py +44 -0
inspect_ai/_eval/eval.py +36 -24
inspect_ai/_eval/evalset.py +17 -18
inspect_ai/_eval/loader.py +34 -11
inspect_ai/_eval/run.py +8 -13
inspect_ai/_eval/score.py +13 -3
inspect_ai/_eval/task/generate.py +8 -9
inspect_ai/_eval/task/log.py +2 -0
inspect_ai/_eval/task/task.py +23 -9
inspect_ai/_util/file.py +13 -0
inspect_ai/_util/json.py +2 -1
inspect_ai/_util/registry.py +1 -0
inspect_ai/_util/vscode.py +37 -0
inspect_ai/_view/www/App.css +6 -0
inspect_ai/_view/www/dist/assets/index.css +304 -128
inspect_ai/_view/www/dist/assets/index.js +47495 -27519
inspect_ai/_view/www/log-schema.json +124 -31
inspect_ai/_view/www/package.json +3 -0
inspect_ai/_view/www/src/App.tsx +12 -0
inspect_ai/_view/www/src/appearance/icons.ts +1 -0
inspect_ai/_view/www/src/components/Card.tsx +6 -4
inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
inspect_ai/_view/www/src/components/Modal.module.css +38 -0
inspect_ai/_view/www/src/components/Modal.tsx +77 -0
inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
inspect_ai/_view/www/src/state/hooks.ts +5 -3
inspect_ai/_view/www/src/state/logPolling.ts +5 -1
inspect_ai/_view/www/src/state/logSlice.ts +10 -0
inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
inspect_ai/_view/www/src/types/log.d.ts +34 -26
inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
inspect_ai/_view/www/yarn.lock +94 -1
inspect_ai/agent/__init__.py +36 -0
inspect_ai/agent/_agent.py +268 -0
inspect_ai/agent/_as_solver.py +72 -0
inspect_ai/agent/_as_tool.py +122 -0
inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
inspect_ai/agent/_filter.py +46 -0
inspect_ai/agent/_handoff.py +93 -0
inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
inspect_ai/agent/_react.py +241 -0
inspect_ai/agent/_run.py +36 -0
inspect_ai/agent/_types.py +81 -0
inspect_ai/log/_log.py +11 -2
inspect_ai/log/_transcript.py +13 -9
inspect_ai/model/__init__.py +7 -1
inspect_ai/model/_call_tools.py +256 -52
inspect_ai/model/_chat_message.py +7 -4
inspect_ai/model/_conversation.py +13 -62
inspect_ai/model/_display.py +85 -0
inspect_ai/model/_model.py +113 -14
inspect_ai/model/_model_output.py +14 -9
inspect_ai/model/_openai.py +16 -4
inspect_ai/model/_openai_computer_use.py +162 -0
inspect_ai/model/_openai_responses.py +319 -165
inspect_ai/model/_providers/anthropic.py +20 -21
inspect_ai/model/_providers/azureai.py +24 -13
inspect_ai/model/_providers/bedrock.py +1 -7
inspect_ai/model/_providers/cloudflare.py +3 -3
inspect_ai/model/_providers/goodfire.py +2 -6
inspect_ai/model/_providers/google.py +11 -10
inspect_ai/model/_providers/groq.py +6 -3
inspect_ai/model/_providers/hf.py +7 -3
inspect_ai/model/_providers/mistral.py +7 -10
inspect_ai/model/_providers/openai.py +47 -17
inspect_ai/model/_providers/openai_o1.py +11 -4
inspect_ai/model/_providers/openai_responses.py +12 -14
inspect_ai/model/_providers/providers.py +2 -2
inspect_ai/model/_providers/together.py +12 -2
inspect_ai/model/_providers/util/chatapi.py +7 -2
inspect_ai/model/_providers/util/hf_handler.py +4 -2
inspect_ai/model/_providers/util/llama31.py +4 -2
inspect_ai/model/_providers/vertex.py +11 -9
inspect_ai/model/_providers/vllm.py +4 -4
inspect_ai/scorer/__init__.py +2 -0
inspect_ai/scorer/_metrics/__init__.py +2 -0
inspect_ai/scorer/_metrics/grouped.py +84 -0
inspect_ai/scorer/_score.py +26 -6
inspect_ai/solver/__init__.py +2 -2
inspect_ai/solver/_basic_agent.py +22 -9
inspect_ai/solver/_bridge.py +31 -0
inspect_ai/solver/_chain.py +20 -12
inspect_ai/solver/_fork.py +5 -1
inspect_ai/solver/_human_agent.py +52 -0
inspect_ai/solver/_prompt.py +3 -1
inspect_ai/solver/_run.py +59 -0
inspect_ai/solver/_solver.py +14 -4
inspect_ai/solver/_task_state.py +5 -3
inspect_ai/tool/_tool_call.py +15 -8
inspect_ai/tool/_tool_def.py +17 -12
inspect_ai/tool/_tool_support_helpers.py +2 -2
inspect_ai/tool/_tool_with.py +14 -11
inspect_ai/tool/_tools/_bash_session.py +11 -2
inspect_ai/tool/_tools/_computer/_common.py +18 -2
inspect_ai/tool/_tools/_computer/_computer.py +18 -2
inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
inspect_ai/tool/_tools/_think.py +1 -1
inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
inspect_ai/util/__init__.py +2 -0
inspect_ai/util/_anyio.py +27 -0
inspect_ai/util/_sandbox/__init__.py +2 -1
inspect_ai/util/_sandbox/context.py +32 -7
inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
inspect_ai/util/_sandbox/docker/compose.py +2 -2
inspect_ai/util/_sandbox/docker/docker.py +12 -1
inspect_ai/util/_store_model.py +30 -7
inspect_ai/util/_subprocess.py +13 -3
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/METADATA +1 -1
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/RECORD +179 -153
inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
/inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.83.dist-info}/top_level.txt +0 -0

inspect_ai/agent/_as_solver.py ADDED Viewed

@@ -0,0 +1,72 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+if TYPE_CHECKING:
+    from inspect_ai.solver._solver import Solver
+from inspect_ai._util.registry import (
+    is_registry_object,
+    registry_unqualified_name,
+)
+from inspect_ai.tool._tool_info import parse_tool_info
+from ._agent import Agent, AgentState
+def as_solver(agent: Agent, **agent_kwargs: Any) -> Solver:
+    """Convert an agent to a solver.
+    Note that agents used as solvers will only receive their first parameter
+    (`state`). Any other parameters must provide appropriate defaults
+    or be explicitly specified in `agent_kwargs`
+    Args:
+       agent: Agent to convert.
+       **agent_kwargs: Arguments to curry to Agent function (required
+          if the agent has parameters without default values).
+    Solver:
+       Solver from agent.
+    """
+    from inspect_ai.solver._solver import Generate, solver
+    from inspect_ai.solver._task_state import TaskState
+    # agent must be registered (so we can get its name)
+    if not is_registry_object(agent):
+        raise RuntimeError(
+            "Agent passed to as_solver was not created by an @agent decorated function"
+        )
+    agent_name = registry_unqualified_name(agent)
+    # check to make sure we have all the parameters we need to run the agent
+    agent_info = parse_tool_info(agent)
+    for name, param in list(agent_info.parameters.properties.items())[1:]:
+        if param.default is None and name not in agent_kwargs:
+            raise ValueError(
+                f"To use the {agent_name} agent as a solver "
+                + f"you must pass a value for the agent's required '{name}' "
+                + "parameter to the as_solver() function."
+            )
+    @solver(name=agent_name)
+    def agent_to_solver() -> Solver:
+        async def solve(state: TaskState, generate: Generate) -> TaskState:
+            # run agent
+            agent_state = await agent(
+                AgentState(messages=state.messages), **agent_kwargs
+            )
+            # update messages
+            state.messages = agent_state.messages
+            # update output if its not empty
+            if agent_state.output:
+                state.output = agent_state.output
+            return state
+        # return solver
+        return solve
+    return agent_to_solver()

inspect_ai/agent/_as_tool.py ADDED Viewed

@@ -0,0 +1,122 @@
+from typing import Any
+from inspect_ai._util.registry import (
+    is_registry_object,
+    registry_info,
+    registry_unqualified_name,
+)
+from inspect_ai.model._chat_message import ChatMessageAssistant, ChatMessageUser
+from inspect_ai.tool._tool import Tool, ToolResult, tool
+from inspect_ai.tool._tool_def import ToolDef, validate_tool_parameters
+from inspect_ai.tool._tool_info import ToolInfo, parse_tool_info
+from inspect_ai.tool._tool_params import ToolParam
+from ._agent import AGENT_DESCRIPTION, Agent, AgentState
+@tool
+def as_tool(agent: Agent, description: str | None = None, **agent_kwargs: Any) -> Tool:
+    """Convert an agent to a tool.
+    By default the model will see all of the agent's arguments as
+    tool arguments (save for `state` which is converted to an `input`
+    arguments of type `str`). Provide optional `agent_kwargs` to mask
+    out agent parameters with default values (these parameters will
+    not be presented to the model as part of the tool interface)
+    Args:
+       agent: Agent to convert.
+       description: Tool description (defaults to agent description)
+       **agent_kwargs: Arguments to curry to Agent function (arguments
+          provided here will not be presented to the model as part
+          of the tool interface).
+    Returns:
+        Tool from agent.
+    """
+    # agent must be registered (so we can get its name)
+    if not is_registry_object(agent):
+        raise RuntimeError(
+            "Agent passed to as_tool was not created by an @agent decorated function"
+        )
+    async def execute(input: str, *args: Any, **kwargs: Any) -> ToolResult:
+        # prepare state and call agent
+        state = AgentState(messages=[ChatMessageUser(content=input)])
+        state = await agent(state, *args, **(agent_kwargs | kwargs))
+        # find assistant message to read content from (prefer output)
+        if not state.output.empty:
+            return state.output.message.content
+        elif len(state.messages) > 0 and isinstance(
+            state.messages[-1], ChatMessageAssistant
+        ):
+            return state.messages[-1].content
+        else:
+            return ""
+    # get tool_info
+    tool_info = agent_tool_info(agent, description, **agent_kwargs)
+    # add "input" param
+    tool_info.parameters.properties = {
+        "input": ToolParam(type="string", description="Input message.")
+    } | tool_info.parameters.properties
+    tool_info.parameters.required.append("input")
+    # create tool
+    tool_def = ToolDef(
+        execute,
+        name=tool_info.name,
+        description=tool_info.description,
+        parameters=tool_info.parameters,
+    )
+    return tool_def.as_tool()
+def agent_tool_info(
+    agent: Agent, description: str | None, **agent_kwargs: Any
+) -> ToolInfo:
+    # get tool_info and name
+    tool_info = parse_tool_info(agent)
+    tool_info.name = registry_unqualified_name(agent)
+    # remove "state" param
+    def remove_param(param: str) -> None:
+        if param in tool_info.parameters.properties:
+            del tool_info.parameters.properties[param]
+        if param in tool_info.parameters.required:
+            tool_info.parameters.required.remove(param)
+    remove_param("state")
+    # validate and remove curried params
+    for agent_param in agent_kwargs.keys():
+        if agent_param in tool_info.parameters.properties:
+            remove_param(agent_param)
+        else:
+            raise ValueError(
+                f"Agent {tool_info.name} does not have a '{agent_param}' parameter."
+            )
+    # resolve and validate description. the description in the call takes
+    # precedence, then any @agent(description="<foo>"), and finally any
+    # doc comment on the agent's execute function
+    reg_info = registry_info(agent)
+    tool_info.description = (
+        description
+        or reg_info.metadata.get(AGENT_DESCRIPTION, None)
+        or tool_info.description
+    )
+    if len(tool_info.description) == 0:
+        raise ValueError(
+            f"Description not provided for agent function '{tool_info.name}'. Provide a "
+            + "description either via @agent(description='<description>'), the description "
+            + "argument to as_tool() or handoff(), or via a doc comment on the agent's "
+            + "execute function."
+        )
+    # validate parameter descriptions and types
+    validate_tool_parameters(tool_info.name, tool_info.parameters.properties)
+    return tool_info

inspect_ai/{solver → agent}/_bridge/bridge.py RENAMED Viewed

@@ -5,17 +5,15 @@ from pydantic import BaseModel, Field, ValidationError
 from pydantic_core import to_json
 from inspect_ai._util._async import is_callable_coroutine
-from inspect_ai.model._chat_message import ChatMessage, ChatMessageUser
+from inspect_ai.agent._agent import Agent, AgentState, agent
+from inspect_ai.model._model import get_model
+from inspect_ai.model._model_output import ModelOutput
 from inspect_ai.model._providers.providers import validate_openai_client
-from inspect_ai.scorer._metric import Score
-from .._solver import Generate, Solver, solver
-from .._task_state import TaskState
-@solver
-def bridge(agent: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> Solver:
-    """Bridge an external agent into an Inspect Solver.
+@agent
+def bridge(agent: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> Agent:
+    """Bridge an external agent into an Inspect Agent.
     See documentation at <https://inspect.aisi.org.uk/agent-bridge.html>
@@ -25,7 +23,7 @@ def bridge(agent: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> Solv
     Returns:
       Standard Inspect solver.
     """
-    validate_openai_client("Solver bridge()")
+    validate_openai_client("Agent bridge()")
     from openai.types.chat import ChatCompletionMessageParam
@@ -36,17 +34,15 @@ def bridge(agent: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> Solv
     from .patch import openai_request_to_inspect_model
-    class BridgeSample(BaseModel):
-        sample_id: str
-        epoch: int
+    class BridgeInput(BaseModel):
+        messages: list[ChatCompletionMessageParam]
+        # temporarily here for backward compatibility w/ previous bridge
         input: list[ChatCompletionMessageParam]
-        metadata: dict[str, Any]
-        target: list[str]
     class BridgeResult(BaseModel):
         output: str
         messages: list[ChatCompletionMessageParam] | None = Field(default=None)
-        scores: dict[str, Score] | None = Field(default=None)
     result_schema = BridgeResult.model_json_schema()
     result_validator = Draft7Validator(result_schema)
@@ -55,27 +51,15 @@ def bridge(agent: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> Solv
     if not is_callable_coroutine(agent):
         raise TypeError(f"'{agent.__name__}' is not declared as an async callable.")
-    async def solve(state: TaskState, generate: Generate) -> TaskState:
-        # resolve input to array
-        input: list[ChatMessage] = (
-            [ChatMessageUser(content=state.input)]
-            if isinstance(state.input, str)
-            else state.input
-        )
-        # create sample (use standard gpt-4 message encoding -- i.e. no 'developer' messages)
-        sample = BridgeSample(
-            sample_id=str(state.sample_id),
-            epoch=state.epoch,
-            input=await openai_chat_messages(input, model="gpt-4"),
-            metadata=state.metadata,
-            target=list(state.target),
-        )
+    async def execute(state: AgentState) -> AgentState:
+        # create input (use standard gpt-4 message encoding -- i.e. no 'developer' messages)
+        messages = await openai_chat_messages(state.messages, model="gpt-4")
+        input = BridgeInput(messages=messages, input=messages)
         # run target function
         async with openai_request_to_inspect_model():
             # call the function
-            result_dict = await agent(sample.model_dump())
+            result_dict = await agent(input.model_dump())
             try:
                 result = BridgeResult.model_validate(result_dict)
             except ValidationError:
@@ -89,12 +73,14 @@ def bridge(agent: Callable[[dict[str, Any]], Awaitable[dict[str, Any]]]) -> Solv
                 raise ValueError(message)
         # update and return state
-        state.output.completion = result.output
+        state.output = ModelOutput.from_content(
+            model=get_model().name, content=result.output
+        )
         if result.messages is not None:
-            state.messages = chat_messages_from_openai(result.messages)
-        if result.scores is not None:
-            state.scores = result.scores
+            state.messages = chat_messages_from_openai(
+                state.output.model, result.messages
+            )
         return state
-    return solve
+    return execute

inspect_ai/{solver → agent}/_bridge/patch.py RENAMED Viewed

@@ -23,7 +23,6 @@ from inspect_ai.model._openai import (
     openai_chat_choices,
     openai_completion_usage,
 )
-from inspect_ai.solver._task_state import sample_state
 from inspect_ai.tool._tool_choice import ToolChoice, ToolFunction
 from inspect_ai.tool._tool_info import ToolInfo
 from inspect_ai.tool._tool_params import ToolParams
@@ -98,10 +97,18 @@ def init_openai_request_patch() -> None:
 async def inspect_model_request(
     model_name: str, options: FinalRequestOptions
 ) -> ChatCompletion:
+    from inspect_ai.solver._task_state import sample_state
+    # resolve model
+    if model_name == "inspect":
+        model = get_model()
+    else:
+        model = get_model(model_name.removeprefix("inspect/"))
     # convert openai messages to inspect messages
     json_data = cast(dict[str, Any], options.json_data)
     messages: list[ChatCompletionMessageParam] = json_data["messages"]
-    input = chat_messages_from_openai(messages)
+    input = chat_messages_from_openai(model.api.model_name, messages)
     # convert openai tools to inspect tools
     tools: list[ChatCompletionToolParam] = json_data.get("tools", [])
@@ -130,12 +137,6 @@ async def inspect_model_request(
             case _:
                 inspect_tool_choice = ToolFunction(name=tool_choice["function"]["name"])
-    # resolve model
-    if model_name == "inspect":
-        model = get_model()
-    else:
-        model = get_model(model_name.removeprefix("inspect/"))
     output = await model.generate(
         input=input,
         tools=inspect_tools,

inspect_ai/agent/_filter.py ADDED Viewed

@@ -0,0 +1,46 @@
+from typing import Awaitable, Callable
+from inspect_ai.model._chat_message import (
+    ChatMessage,
+    ChatMessageAssistant,
+    ChatMessageTool,
+)
+MessageFilter = Callable[[list[ChatMessage]], Awaitable[list[ChatMessage]]]
+"""Filter messages sent to or received from agent handoffs."""
+async def remove_tools(messages: list[ChatMessage]) -> list[ChatMessage]:
+    """Remove tool calls from messages.
+    Removes all instances of `ChatMessageTool` as well as the `tool_calls`
+    field from `ChatMessageAssistant`.
+    Args:
+       messages: Messages to remove tool calls from.
+    Returns:
+       Messages without tool calls.
+    """
+    filtered: list[ChatMessage] = []
+    for message in messages:
+        if isinstance(message, ChatMessageTool):
+            continue
+        if isinstance(message, ChatMessageAssistant):
+            message = message.model_copy(update=dict(tool_calls=None))
+        filtered.append(message)
+    return filtered
+async def last_message(messages: list[ChatMessage]) -> list[ChatMessage]:
+    """Remove all but the last message.
+    Args:
+       messages: Target messages.
+    Returns:
+       List containing only the last message from the input list.
+    """
+    return messages[-1:]

inspect_ai/agent/_handoff.py ADDED Viewed

@@ -0,0 +1,93 @@
+from typing import Any
+from inspect_ai._util.registry import (
+    RegistryInfo,
+    is_registry_object,
+    registry_unqualified_name,
+    set_registry_info,
+)
+from inspect_ai.tool._tool import Tool, ToolResult
+from inspect_ai.tool._tool_description import ToolDescription, set_tool_description
+from ._agent import Agent
+from ._as_tool import agent_tool_info
+from ._filter import MessageFilter
+def handoff(
+    agent: Agent,
+    description: str | None = None,
+    input_filter: MessageFilter | None = None,
+    output_filter: MessageFilter | None = None,
+    tool_name: str | None = None,
+    **agent_kwargs: Any,
+) -> Tool:
+    """Create a tool that enables models to handoff to agents.
+    Args:
+        agent: Agent to hand off to.
+        description: Handoff tool description (defaults to agent description)
+        input_filter: Filter to modify the message history before calling the tool.
+            Use the built-in `remove_tools` filter to remove all tool calls
+            or alternatively specify a custom `MessageFilter` function.
+        output_filter: Filter to modify the message history after calling the tool.
+            Use the built-in `last_message` filter to return only the last message
+            or alternatively specify a custom `MessageFilter` function.
+        tool_name: Alternate tool name (defaults to `transfer_to_{agent_name}`)
+        **agent_kwargs: Arguments to curry to `Agent` function (arguments provided here
+            will not be presented to the model as part of the tool interface).
+    Returns:
+        Tool for handing off to the agent (must be called using `execute_tools()` to be
+        properly handled)
+    """
+    # agent must be registered (so we can get its name)
+    if not is_registry_object(agent):
+        raise RuntimeError(
+            "Agent passed to as_tool was not created by an @agent decorated function"
+        )
+    # get tool_info
+    tool_info = agent_tool_info(agent, description, **agent_kwargs)
+    # AgentTool calls will be intercepted by execute_tools
+    agent_tool = AgentTool(agent, input_filter, output_filter, **agent_kwargs)
+    tool_name = tool_name or f"transfer_to_{tool_info.name}"
+    set_registry_info(agent_tool, RegistryInfo(type="tool", name=tool_name))
+    set_tool_description(
+        agent_tool,
+        ToolDescription(
+            name=tool_name,
+            description=tool_info.description,
+            parameters=tool_info.parameters,
+        ),
+    )
+    return agent_tool
+class AgentTool(Tool):
+    def __init__(
+        self,
+        agent: Agent,
+        input_filter: MessageFilter | None = None,
+        output_filter: MessageFilter | None = None,
+        **kwargs: Any,
+    ):
+        self.agent = agent
+        self.input_filter = input_filter
+        self.output_filter = output_filter
+        self.kwargs = kwargs
+    @property
+    def __name__(self) -> str:
+        return registry_unqualified_name(self.agent)
+    async def __call__(self) -> ToolResult:
+        raise RuntimeError("AgentTool should not be called directly")
+def has_handoff(tools: list[Tool] | None) -> bool:
+    if tools:
+        return any([isinstance(tool, AgentTool) for tool in tools])
+    else:
+        return False

inspect_ai/{solver/_human_agent → agent/_human}/agent.py RENAMED Viewed

@@ -5,8 +5,7 @@ import anyio
 from inspect_ai.util import display_type, input_panel, sandbox
 from inspect_ai.util._sandbox.events import SandboxEnvironmentProxy
-from .._solver import Generate, Solver, solver
-from .._task_state import TaskState
+from .._agent import Agent, AgentState, agent
 from .commands import human_agent_commands
 from .install import install_human_agent
 from .panel import HumanAgentPanel
@@ -14,15 +13,15 @@ from .service import run_human_agent_service
 from .view import ConsoleView, HumanAgentView
-@solver
-def human_agent(
+@agent
+def human_cli(
     answer: bool | str = True,
     intermediate_scoring: bool = False,
     record_session: bool = True,
-) -> Solver:
-    """Human solver for agentic tasks that run in a Linux environment.
+) -> Agent:
+    """Human CLI agent for tasks that run in a sandbox.
-    The Human agent solver installs agent task tools in the default
+    The Human CLI agent installs agent task tools in the default
     sandbox and presents the user with both task instructions and
     documentation for the various tools (e.g. `task submit`,
     `task start`, `task stop` `task instructions`, etc.). A human agent panel
@@ -40,12 +39,12 @@ def human_agent(
        record_session: Record all user commands and outputs in the sandbox bash session.
     Returns:
-       Solver: Human agent solver.
+       Agent: Human CLI agent.
     """
     # we can only run one human agent interaction at a time (use lock to enforce)
     agent_lock = anyio.Lock()
-    async def solve(state: TaskState, generate: Generate) -> TaskState:
+    async def execute(state: AgentState) -> AgentState:
         async with agent_lock:
             # ensure that we have a sandbox to work with
             try:
@@ -58,7 +57,7 @@ def human_agent(
                 )
             # helper function to run the agent (called for fullscreen vs. fallback below)
-            async def run_human_agent(view: HumanAgentView) -> TaskState:
+            async def run_human_agent(view: HumanAgentView) -> AgentState:
                 sandbox_proxy = cast(SandboxEnvironmentProxy, sandbox())
                 with sandbox_proxy.no_events():
                     # create agent commands
@@ -67,7 +66,7 @@ def human_agent(
                     )
                     # install agent tools
-                    await install_human_agent(state, commands, record_session)
+                    await install_human_agent(commands, record_session)
                     # hookup the view ui
                     view.connect(connection)
@@ -82,4 +81,4 @@ def human_agent(
             else:
                 return await run_human_agent(ConsoleView())
-    return solve
+    return execute

inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py RENAMED Viewed

@@ -1,5 +1,4 @@
-from inspect_ai.solver._task_state import TaskState
+from ..._agent import AgentState
 from .clock import StartCommand, StopCommand
 from .command import HumanAgentCommand
 from .instructions import InstructionsCommand
@@ -10,7 +9,7 @@ from .submit import QuitCommand, SubmitCommand, ValidateCommand
 def human_agent_commands(
-    state: TaskState,
+    state: AgentState,
     answer: bool | str,
     intermediate_scoring: bool,
     record_session: bool,

inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py RENAMED Viewed

@@ -62,7 +62,9 @@ class StopCommand(HumanAgentCommand):
         return stop
-def clock_action_event(action: str, state: HumanAgentState) -> None:
+def clock_action_event(
+    action: Literal["start", "stop"], state: HumanAgentState
+) -> None:
     from inspect_ai.log._transcript import transcript
     transcript().info(

inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py RENAMED Viewed

@@ -8,13 +8,13 @@ from inspect_ai._util.ansi import render_text
 from inspect_ai.model._model_output import ModelOutput
 from inspect_ai.scorer._score import score
-from ..._task_state import TaskState
+from ..._agent import AgentState
 from ..state import HumanAgentState, IntermediateScoring
 from .command import HumanAgentCommand, call_human_agent
 class ScoreCommand(HumanAgentCommand):
-    def __init__(self, state: TaskState):
+    def __init__(self, state: AgentState):
         self._state = state
     @property
@@ -52,9 +52,9 @@ class ScoreCommand(HumanAgentCommand):
         async def score_task(answer: str | None) -> str:
             # make a copy of TaskState, add the answer, then score
             if answer:
-                task_state = deepcopy(self._state)
-                task_state.output = ModelOutput.from_content("human_agent", answer)
-                result = await score(task_state)
+                agent_state = deepcopy(self._state)
+                agent_state.output = ModelOutput.from_content("human_agent", answer)
+                result = await score(agent_state)
             else:
                 result = await score(self._state)

inspect_ai/{solver/_human_agent → agent/_human}/install.py RENAMED Viewed

@@ -3,7 +3,6 @@ from textwrap import dedent
 from inspect_ai.util import sandbox
-from .._task_state import TaskState
 from .commands.command import HumanAgentCommand
 INSTALL_DIR = "human_agent_install"
@@ -18,7 +17,7 @@ RECORD_SESSION_DIR = "/var/tmp/user-sessions"
 async def install_human_agent(
-    state: TaskState, commands: list[HumanAgentCommand], record_session: bool
+    commands: list[HumanAgentCommand], record_session: bool
 ) -> None:
     # see if we have already installed
     if not (await sandbox().exec(["mkdir", HUMAN_AGENT_DIR])).success:
@@ -183,8 +182,12 @@ def human_agent_bashrc(commands: list[HumanAgentCommand], record_session: bool)
     fi
     """).lstrip()
+    CLOCK = dedent("""
+    task start
+    """).lstrip()
     # return .bashrc
-    return "\n".join([TERMINAL_CHECK, COMMANDS, RECORDING, INSTRUCTIONS])
+    return "\n".join([TERMINAL_CHECK, COMMANDS, RECORDING, INSTRUCTIONS, CLOCK])
 def human_agent_install_sh() -> str:

inspect_ai/{solver/_human_agent → agent/_human}/service.py RENAMED Viewed

@@ -1,20 +1,24 @@
+from inspect_ai.agent._human.commands.clock import clock_action_event
 from inspect_ai.model import ModelOutput
 from inspect_ai.util._sandbox import sandbox
 from inspect_ai.util._sandbox.service import sandbox_service
-from .._task_state import TaskState
+from .._agent import AgentState
 from .commands.command import HumanAgentCommand
 from .state import HumanAgentState
 from .view import HumanAgentView
 async def run_human_agent_service(
-    state: TaskState, commands: list[HumanAgentCommand], view: HumanAgentView | None
-) -> TaskState:
+    state: AgentState, commands: list[HumanAgentCommand], view: HumanAgentView | None
+) -> AgentState:
     # initialise agent state
     instructions = "\n\n".join([message.text for message in state.messages]).strip()
     agent_state = HumanAgentState(instructions=instructions)
+    # record that clock is stopped
+    clock_action_event("stop", agent_state)
     # extract service methods from commands
     methods = {
         command.name: command.service(agent_state)

inspect-ai 0.3.82__py3-none-any.whl → 0.3.83__py3-none-any.whl

inspect-ai 0.3.82py3-none-any.whl → 0.3.83py3-none-any.whl