PyPI - inspect-ai - Versions diffs - 0.3.82__py3-none-any.whl → 0.3.84__py3-none-any.whl - Mend

inspect-ai 0.3.82py3-none-any.whl → 0.3.84py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (180) hide show

inspect_ai/__init__.py +2 -1
inspect_ai/_display/textual/app.py +14 -3
inspect_ai/_display/textual/display.py +4 -0
inspect_ai/_display/textual/widgets/samples.py +9 -3
inspect_ai/_display/textual/widgets/task_detail.py +3 -4
inspect_ai/_display/textual/widgets/tasks.py +17 -1
inspect_ai/_display/textual/widgets/vscode.py +48 -0
inspect_ai/_eval/eval.py +36 -24
inspect_ai/_eval/evalset.py +17 -18
inspect_ai/_eval/loader.py +34 -11
inspect_ai/_eval/run.py +8 -13
inspect_ai/_eval/score.py +13 -3
inspect_ai/_eval/task/generate.py +8 -9
inspect_ai/_eval/task/log.py +2 -0
inspect_ai/_eval/task/task.py +23 -9
inspect_ai/_util/file.py +13 -0
inspect_ai/_util/json.py +2 -1
inspect_ai/_util/registry.py +1 -0
inspect_ai/_util/vscode.py +37 -0
inspect_ai/_view/www/App.css +6 -0
inspect_ai/_view/www/dist/assets/index.css +304 -128
inspect_ai/_view/www/dist/assets/index.js +47495 -27519
inspect_ai/_view/www/log-schema.json +124 -31
inspect_ai/_view/www/package.json +3 -0
inspect_ai/_view/www/src/App.tsx +12 -0
inspect_ai/_view/www/src/appearance/icons.ts +1 -0
inspect_ai/_view/www/src/components/Card.tsx +6 -4
inspect_ai/_view/www/src/components/LinkButton.module.css +16 -0
inspect_ai/_view/www/src/components/LinkButton.tsx +33 -0
inspect_ai/_view/www/src/components/LiveVirtualList.tsx +1 -1
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +113 -23
inspect_ai/_view/www/src/components/Modal.module.css +38 -0
inspect_ai/_view/www/src/components/Modal.tsx +77 -0
inspect_ai/_view/www/src/plan/DetailStep.module.css +4 -0
inspect_ai/_view/www/src/plan/DetailStep.tsx +6 -3
inspect_ai/_view/www/src/plan/SolverDetailView.module.css +2 -1
inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +7 -0
inspect_ai/_view/www/src/samples/SampleDialog.tsx +7 -0
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +11 -34
inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +6 -0
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
inspect_ai/_view/www/src/samples/SamplesTools.tsx +12 -0
inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +2 -0
inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -0
inspect_ai/_view/www/src/samples/chat/messages.ts +3 -1
inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +1 -0
inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +9 -3
inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +3 -3
inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +4 -4
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +10 -11
inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +2 -1
inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +7 -1
inspect_ai/_view/www/src/samples/list/SampleList.tsx +25 -8
inspect_ai/_view/www/src/samples/list/SampleRow.tsx +1 -1
inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +11 -22
inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.module.css +38 -0
inspect_ai/_view/www/src/samples/scores/SampleScoresGrid.tsx +118 -0
inspect_ai/_view/www/src/samples/scores/{SampleScoreView.module.css → SampleScoresView.module.css} +10 -1
inspect_ai/_view/www/src/samples/scores/SampleScoresView.tsx +78 -0
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +3 -3
inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +25 -4
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +29 -2
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +0 -1
inspect_ai/_view/www/src/state/hooks.ts +5 -3
inspect_ai/_view/www/src/state/logPolling.ts +5 -1
inspect_ai/_view/www/src/state/logSlice.ts +10 -0
inspect_ai/_view/www/src/state/samplePolling.ts +4 -1
inspect_ai/_view/www/src/state/sampleSlice.ts +13 -0
inspect_ai/_view/www/src/types/log.d.ts +34 -26
inspect_ai/_view/www/src/types/markdown-it-katex.d.ts +21 -0
inspect_ai/_view/www/src/utils/json-worker.ts +79 -12
inspect_ai/_view/www/src/workspace/WorkSpace.tsx +18 -16
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +16 -0
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +68 -71
inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.module.css +35 -0
inspect_ai/_view/www/src/workspace/navbar/ScoreGrid.tsx +117 -0
inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +1 -1
inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +3 -2
inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +18 -0
inspect_ai/_view/www/yarn.lock +94 -1
inspect_ai/agent/__init__.py +36 -0
inspect_ai/agent/_agent.py +268 -0
inspect_ai/agent/_as_solver.py +72 -0
inspect_ai/agent/_as_tool.py +122 -0
inspect_ai/{solver → agent}/_bridge/bridge.py +23 -37
inspect_ai/{solver → agent}/_bridge/patch.py +9 -8
inspect_ai/agent/_filter.py +46 -0
inspect_ai/agent/_handoff.py +93 -0
inspect_ai/{solver/_human_agent → agent/_human}/agent.py +11 -12
inspect_ai/{solver/_human_agent → agent/_human}/commands/__init__.py +2 -3
inspect_ai/{solver/_human_agent → agent/_human}/commands/clock.py +3 -1
inspect_ai/{solver/_human_agent → agent/_human}/commands/score.py +5 -5
inspect_ai/{solver/_human_agent → agent/_human}/install.py +6 -3
inspect_ai/{solver/_human_agent → agent/_human}/service.py +7 -3
inspect_ai/{solver/_human_agent → agent/_human}/state.py +5 -5
inspect_ai/agent/_react.py +241 -0
inspect_ai/agent/_run.py +36 -0
inspect_ai/agent/_types.py +81 -0
inspect_ai/log/_log.py +11 -2
inspect_ai/log/_transcript.py +13 -9
inspect_ai/model/__init__.py +7 -1
inspect_ai/model/_call_tools.py +256 -52
inspect_ai/model/_chat_message.py +7 -4
inspect_ai/model/_conversation.py +13 -62
inspect_ai/model/_display.py +85 -0
inspect_ai/model/_model.py +113 -14
inspect_ai/model/_model_output.py +14 -9
inspect_ai/model/_openai.py +16 -4
inspect_ai/model/_openai_computer_use.py +162 -0
inspect_ai/model/_openai_responses.py +319 -165
inspect_ai/model/_providers/anthropic.py +20 -21
inspect_ai/model/_providers/azureai.py +24 -13
inspect_ai/model/_providers/bedrock.py +1 -7
inspect_ai/model/_providers/cloudflare.py +3 -3
inspect_ai/model/_providers/goodfire.py +2 -6
inspect_ai/model/_providers/google.py +11 -10
inspect_ai/model/_providers/groq.py +6 -3
inspect_ai/model/_providers/hf.py +7 -3
inspect_ai/model/_providers/mistral.py +7 -10
inspect_ai/model/_providers/openai.py +47 -17
inspect_ai/model/_providers/openai_o1.py +11 -4
inspect_ai/model/_providers/openai_responses.py +12 -14
inspect_ai/model/_providers/providers.py +2 -2
inspect_ai/model/_providers/together.py +12 -2
inspect_ai/model/_providers/util/chatapi.py +7 -2
inspect_ai/model/_providers/util/hf_handler.py +4 -2
inspect_ai/model/_providers/util/llama31.py +4 -2
inspect_ai/model/_providers/vertex.py +11 -9
inspect_ai/model/_providers/vllm.py +4 -4
inspect_ai/scorer/__init__.py +2 -0
inspect_ai/scorer/_metrics/__init__.py +2 -0
inspect_ai/scorer/_metrics/grouped.py +84 -0
inspect_ai/scorer/_score.py +26 -6
inspect_ai/solver/__init__.py +2 -2
inspect_ai/solver/_basic_agent.py +22 -9
inspect_ai/solver/_bridge.py +31 -0
inspect_ai/solver/_chain.py +20 -12
inspect_ai/solver/_fork.py +5 -1
inspect_ai/solver/_human_agent.py +52 -0
inspect_ai/solver/_prompt.py +3 -1
inspect_ai/solver/_run.py +59 -0
inspect_ai/solver/_solver.py +14 -4
inspect_ai/solver/_task_state.py +5 -3
inspect_ai/tool/_tool_call.py +15 -8
inspect_ai/tool/_tool_def.py +17 -12
inspect_ai/tool/_tool_support_helpers.py +2 -2
inspect_ai/tool/_tool_with.py +14 -11
inspect_ai/tool/_tools/_bash_session.py +11 -2
inspect_ai/tool/_tools/_computer/_common.py +18 -2
inspect_ai/tool/_tools/_computer/_computer.py +18 -2
inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +2 -0
inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +17 -0
inspect_ai/tool/_tools/_think.py +1 -1
inspect_ai/tool/_tools/_web_browser/_web_browser.py +100 -61
inspect_ai/util/__init__.py +2 -0
inspect_ai/util/_anyio.py +27 -0
inspect_ai/util/_sandbox/__init__.py +2 -1
inspect_ai/util/_sandbox/context.py +32 -7
inspect_ai/util/_sandbox/docker/cleanup.py +4 -0
inspect_ai/util/_sandbox/docker/compose.py +2 -2
inspect_ai/util/_sandbox/docker/docker.py +12 -1
inspect_ai/util/_store_model.py +30 -7
inspect_ai/util/_subprocess.py +13 -3
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/METADATA +1 -1
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/RECORD +179 -153
inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +0 -167
/inspect_ai/{solver → agent}/_bridge/__init__.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/__init__.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/command.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/instructions.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/note.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/status.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/commands/submit.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/panel.py +0 -0
/inspect_ai/{solver/_human_agent → agent/_human}/view.py +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/top_level.txt +0 -0

inspect_ai/tool/_tools/_web_browser/_web_browser.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import re
 from pydantic import BaseModel, Field
+from shortuuid import uuid
 from inspect_ai._util.content import ContentText
 from inspect_ai._util.error import PrerequisiteError
@@ -31,22 +32,30 @@ class CrawlerResult(BaseModel):
     error: str | None = None
-def web_browser(interactive: bool = True) -> list[Tool]:
+def web_browser(
+    *, interactive: bool = True, instance: str | None = uuid()
+) -> list[Tool]:
     """Tools used for web browser navigation.
-     See documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-browser>.
+    By default, a separate web browser process is created within the sandbox for each
+    call to `web_browser()`. You can modify this behavior by passing `instance=None`
+    (which will result in a single web browser for the entire sample) or use other
+    `instance` values that implement another scheme).
+    See complete documentation at <https://inspect.aisi.org.uk/tools-standard.html#sec-web-browser>.
     Args:
        interactive: Provide interactive tools (enable
           clicking, typing, and submitting forms). Defaults
           to True.
+       instance: Instance id (each unique instance id has its own web browser process)
     Returns:
        List of tools used for web browser navigation.
     """
     # start with go tool (excluding interactive docs if necessary)
-    go = web_browser_go()
+    go = web_browser_go(instance)
     if not interactive:
         go = go_without_interactive_docs(go)
     tools = [go]
@@ -54,24 +63,27 @@ def web_browser(interactive: bool = True) -> list[Tool]:
     # add interactive tools if requested
     if interactive:
         tools = tools + [
-            web_browser_click(),
-            web_browser_type_submit(),
-            web_browser_type(),
+            tool_with_web_at_viewer(web_browser_click(instance), instance),
+            tool_with_web_at_viewer(web_browser_type_submit(instance), instance),
+            tool_with_web_at_viewer(web_browser_type(instance), instance),
         ]
     # add navigational tools
     return tools + [
-        web_browser_scroll(),
-        web_browser_back(),
-        web_browser_forward(),
-        web_browser_refresh(),
+        web_browser_scroll(instance),
+        web_browser_back(instance),
+        web_browser_forward(instance),
+        web_browser_refresh(instance),
     ]
 @tool(parallel=False)
-def web_browser_go() -> Tool:
+def web_browser_go(instance: str | None = None) -> Tool:
     """Web Browser tool for navigation to a URL.
+    Args:
+       instance: Instance id (each unique instance id has its own web browser process)
     Returns:
        Web browser navigation tool.
     """
@@ -102,7 +114,7 @@ def web_browser_go() -> Tool:
         Returns:
           Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await _web_browser_cmd("web_go", locals())
+        return await _web_browser_cmd("web_go", instance, locals())
     return execute
@@ -126,36 +138,44 @@ class WebBrowserStore(StoreModel):
     session_id: str = Field(default_factory=str)
-def web_at_viewer(call: ToolCall) -> ToolCallView:
-    # get the web accessibility tree, if we have it create a view from it
-    web_at = store_as(WebBrowserStore).web_at
-    element_id = call.arguments.get("element_id", 0)
-    if web_at and element_id:
-        lines = web_at.splitlines()
-        pattern = re.compile(rf"^\s+\[{element_id}\] .*$")
-        for i, line in enumerate(lines):
-            if pattern.match(line):
-                snippet = (
-                    lines[0:1]
-                    + ["  ..."]
-                    + lines[max(i - 2, 1) : i]
-                    + [line.replace(" ", "*", 1)]
-                    + lines[i + 1 : min(i + 3, len(lines))]
-                    + ["  ..."]
-                )
-                return ToolCallView(
-                    context=ToolCallContent(format="text", content="\n".join(snippet))
-                )
-    # no view found
-    return ToolCallView()
-@tool(viewer=web_at_viewer, parallel=False)
-def web_browser_click() -> Tool:
+def tool_with_web_at_viewer(tool: Tool, instance: str | None = None) -> Tool:
+    def web_at_viewer(call: ToolCall) -> ToolCallView:
+        # get the web accessibility tree, if we have it create a view from it
+        web_at = store_as(WebBrowserStore, instance=instance).web_at
+        element_id = call.arguments.get("element_id", 0)
+        if web_at and element_id:
+            lines = web_at.splitlines()
+            pattern = re.compile(rf"^\s+\[{element_id}\] .*$")
+            for i, line in enumerate(lines):
+                if pattern.match(line):
+                    snippet = (
+                        lines[0:1]
+                        + ["  ..."]
+                        + lines[max(i - 2, 1) : i]
+                        + [line.replace(" ", "*", 1)]
+                        + lines[i + 1 : min(i + 3, len(lines))]
+                        + ["  ..."]
+                    )
+                    return ToolCallView(
+                        context=ToolCallContent(
+                            format="text", content="\n".join(snippet)
+                        )
+                    )
+        # no view found
+        return ToolCallView()
+    return tool_with(tool, viewer=web_at_viewer)
+@tool(parallel=False)
+def web_browser_click(instance: str | None = None) -> Tool:
     """Web Browser tool for clicking an element on a web page.
+    Args:
+       instance: Instance id (each unique instance id has its own web browser process)
     Returns:
        Web browser clicking tool.
     """
@@ -182,15 +202,18 @@ def web_browser_click() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await _web_browser_cmd("web_click", locals())
+        return await _web_browser_cmd("web_click", instance, locals())
     return execute
-@tool(viewer=web_at_viewer, parallel=False)
-def web_browser_type_submit() -> Tool:
+@tool(parallel=False)
+def web_browser_type_submit(instance: str | None = None) -> Tool:
     """Web Browser tool for typing and submitting input.
+    Args:
+       instance: Instance id (each unique instance id has its own web browser process)
     Returns:
        Web browser type and submit tool.
     """
@@ -220,15 +243,18 @@ def web_browser_type_submit() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await _web_browser_cmd("web_type_submit", locals())
+        return await _web_browser_cmd("web_type_submit", instance, locals())
     return execute
-@tool(viewer=web_at_viewer, parallel=False)
-def web_browser_type() -> Tool:
+@tool(parallel=False)
+def web_browser_type(instance: str | None = None) -> Tool:
     """Web Browser tool for typing into inputs.
+    Args:
+       instance: Instance id (each unique instance id has its own web browser process)
     Returns:
        Web browser typing tool.
     """
@@ -258,15 +284,18 @@ def web_browser_type() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await _web_browser_cmd("web_type", locals())
+        return await _web_browser_cmd("web_type", instance, locals())
     return execute
 @tool(parallel=False)
-def web_browser_scroll() -> Tool:
+def web_browser_scroll(instance: str | None = None) -> Tool:
     """Web Browser tool for scrolling up or down one page.
+    Args:
+       instance: Instance id (each unique instance id has its own web browser process)
     Returns:
        Web browser scrolling tool.
     """
@@ -288,15 +317,18 @@ def web_browser_scroll() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await _web_browser_cmd("web_scroll", locals())
+        return await _web_browser_cmd("web_scroll", instance, locals())
     return execute
 @tool(parallel=False)
-def web_browser_back() -> Tool:
+def web_browser_back(instance: str | None = None) -> Tool:
     """Web Browser tool for navigating back in the browser history.
+    Args:
+       instance: Instance id (each unique instance id has its own web browser process)
     Returns:
        Web browser back navigation tool.
     """
@@ -309,15 +341,18 @@ def web_browser_back() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await _web_browser_cmd("web_back", locals())
+        return await _web_browser_cmd("web_back", instance, locals())
     return execute
 @tool(parallel=False)
-def web_browser_forward() -> Tool:
+def web_browser_forward(instance: str | None = None) -> Tool:
     """Web Browser tool for navigating forward in the browser history.
+    Args:
+       instance: Instance id (each unique instance id has its own web browser process)
     Returns:
        Web browser forward navigation tool.
     """
@@ -330,15 +365,18 @@ def web_browser_forward() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await _web_browser_cmd("web_forward", locals())
+        return await _web_browser_cmd("web_forward", instance, locals())
     return execute
 @tool(parallel=False)
-def web_browser_refresh() -> Tool:
+def web_browser_refresh(instance: str | None = None) -> Tool:
     """Web Browser tool for refreshing the current page.
+    Args:
+       instance: Instance id (each unique instance id has its own web browser process)
     Returns:
        Web browser page refresh tool.
     """
@@ -351,12 +389,14 @@ def web_browser_refresh() -> Tool:
         Returns:
            Web accessibility tree of the visible elements of the web page. The element_id of each element is displayed in brackets at the beginning of the line.
         """
-        return await _web_browser_cmd("web_refresh", locals())
+        return await _web_browser_cmd("web_refresh", instance, locals())
     return execute
-async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolResult:
+async def _web_browser_cmd(
+    tool_name: str, instance: str | None, params: dict[str, object]
+) -> ToolResult:
     try:
         sandbox_env = await tool_container_sandbox("web browser")
     except PrerequisiteError as e:
@@ -369,7 +409,8 @@ async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolRes
         except PrerequisiteError:
             raise e
-    store = store_as(WebBrowserStore)
+    # bind to store (use instance id if provided)
+    store = store_as(WebBrowserStore, instance=instance)
     if not store.session_id:
         store.session_id = (
@@ -397,10 +438,8 @@ async def _web_browser_cmd(tool_name: str, params: dict[str, object]) -> ToolRes
             line.partition("data:image/png;base64")[0] for line in web_at_lines
         ]
-        store_as(WebBrowserStore).main_content = (
-            main_content or "(no main text summary)"
-        )
-        store_as(WebBrowserStore).web_at = web_at
+        store.main_content = main_content or "(no main text summary)"
+        store.web_at = web_at
         web_at = "\n".join(web_at_lines)
         return (

inspect_ai/util/__init__.py CHANGED Viewed

@@ -16,6 +16,7 @@ from ._sandbox import (
     SandboxEnvironmentSpec,
     SandboxEnvironmentType,
     sandbox,
+    sandbox_default,
     sandbox_with,
     sandboxenv,
 )
@@ -53,6 +54,7 @@ __all__ = [
     "sandboxenv",
     "sandbox",
     "sandbox_with",
+    "sandbox_default",
     "Store",
     "store",
     "StoreModel",

inspect_ai/util/_anyio.py ADDED Viewed

@@ -0,0 +1,27 @@
+import sys
+if sys.version_info < (3, 11):
+    from exceptiongroup import ExceptionGroup
+def inner_exception(exc: Exception) -> Exception:
+    flattended = flatten_exception_group(exc)
+    return flattended[0]
+def flatten_exception_group(exc: Exception) -> list[Exception]:
+    """Recursively flatten an ExceptionGroup to get all contained exceptions."""
+    if (
+        hasattr(exc, "__context__")
+        and exc.__context__ is not None
+        and isinstance(exc.__context__, Exception)
+    ):
+        return flatten_exception_group(exc.__context__) + [exc]
+    if isinstance(exc, ExceptionGroup):
+        flattened = []
+        for nested_exc in exc.exceptions:
+            flattened.extend(flatten_exception_group(nested_exc))
+        return flattened
+    return [exc]

inspect_ai/util/_sandbox/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # note: unused imports are still required to ensure that our built-in sandbox environments are registered
-from .context import sandbox, sandbox_with
+from .context import sandbox, sandbox_default, sandbox_with
 from .docker.docker import DockerSandboxEnvironment  # noqa: F401
 from .environment import (
     SandboxConnection,
@@ -26,4 +26,5 @@ __all__ = [
     "sandboxenv",
     "sandbox",
     "sandbox_with",
+    "sandbox_default",
 ]

inspect_ai/util/_sandbox/context.py CHANGED Viewed

@@ -1,6 +1,7 @@
+from contextlib import contextmanager
 from contextvars import ContextVar
 from logging import getLogger
-from typing import Any, NoReturn, cast
+from typing import Any, Iterator, NoReturn, cast
 from shortuuid import uuid
@@ -39,7 +40,7 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
     # For None, 'default', or a single environment only take the first environment
     if name is None or name == "default" or len(environments) == 1:
-        return list(environments.values())[0]
+        return default_sandbox_environment(environments)
     else:
         environment = environments.get(name, None)
         if not environment:
@@ -146,6 +147,12 @@ async def init_sandbox_environments_sample(
     environments = {k: SandboxEnvironmentProxy(v) for k, v in environments.items()}
     try:
+        # set context
+        sandbox_environments_context_var.set(environments)
+        sandbox_with_environments_context_var.set({})
+        default_name = next(iter(environments.keys()))
+        sandbox_default_context_var.set(default_name)
         # copy files into environments
         await copy_sandbox_environment_files(files, environments)
@@ -153,10 +160,6 @@ async def init_sandbox_environments_sample(
         if setup:
             await setup_sandbox_environment(setup, environments)
-        # set context
-        sandbox_environments_context_var.set(environments)
-        sandbox_with_environments_context_var.set({})
         # return environments
         return environments
@@ -239,7 +242,13 @@ async def setup_sandbox_environment(
 def default_sandbox_environment(
     environments: dict[str, SandboxEnvironment],
 ) -> SandboxEnvironment:
-    return list(environments.values())[0]
+    default_name = sandbox_default_context_var.get()
+    if default_name in environments:
+        return environments[default_name]
+    else:
+        raise ValueError(
+            f"Default sandbox environment '{default_name}' not found in environments"
+        )
 def validate_sandbox_environments(
@@ -253,6 +262,20 @@ def validate_sandbox_environments(
         )
+@contextmanager
+def sandbox_default(name: str) -> Iterator[None]:
+    """Set the default sandbox environment for the current context.
+    Args:
+       name: Sandbox to set as the default.
+    """
+    token = sandbox_default_context_var.set(name)
+    try:
+        yield
+    finally:
+        sandbox_default_context_var.reset(token)
 sandbox_environments_context_var = ContextVar[dict[str, SandboxEnvironment]](
     "sandbox_environments"
 )
@@ -260,3 +283,5 @@ sandbox_environments_context_var = ContextVar[dict[str, SandboxEnvironment]](
 sandbox_with_environments_context_var = ContextVar[dict[str, SandboxEnvironment]](
     "sandbox_with_environments"
 )
+sandbox_default_context_var = ContextVar[str]("sandbox_default")

inspect_ai/util/_sandbox/docker/cleanup.py CHANGED Viewed

@@ -25,6 +25,10 @@ def project_startup(project: ComposeProject) -> None:
     running_projects().append(project)
     # track auto compose we need to cleanup
+    project_record_auto_compose(project)
+def project_record_auto_compose(project: ComposeProject) -> None:
     if project.config and is_auto_compose_file(project.config):
         auto_compose_files().add(project.config)

inspect_ai/util/_sandbox/docker/compose.py CHANGED Viewed

@@ -331,8 +331,8 @@ async def compose_command(
         retries = 0
         while True:
             try:
-                command_timeout = (
-                    timeout if retries == 0 else (min(timeout, 60) // retries)
+                command_timeout = max(
+                    timeout if retries == 0 else (min(timeout, 60) // retries), 1
                 )
                 return await run_command(command_timeout)
             except TimeoutError:

inspect_ai/util/_sandbox/docker/docker.py CHANGED Viewed

@@ -30,6 +30,7 @@ from .cleanup import (
     project_cleanup,
     project_cleanup_shutdown,
     project_cleanup_startup,
+    project_record_auto_compose,
     project_startup,
 )
 from .compose import (
@@ -78,6 +79,9 @@ class DockerSandboxEnvironment(SandboxEnvironment):
                 name=task_project_name(task_name), config=config
             )
+            # record auto compose
+            project_record_auto_compose(project)
             # build containers which are out of date
             await compose_build(project)
@@ -310,7 +314,14 @@ class DockerSandboxEnvironment(SandboxEnvironment):
         # write the file
         if isinstance(contents, str):
             result = await self.exec(
-                ["sh", "-e", "-c", 'tee -- "$1"', "write_file_script", file],
+                [
+                    "sh",
+                    "-e",
+                    "-c",
+                    'tee -- "$1" > /dev/null',
+                    "write_file_script",
+                    file,
+                ],
                 input=contents,
                 timeout=TIMEOUT,
             )

inspect_ai/util/_store_model.py CHANGED Viewed

@@ -15,6 +15,7 @@ class StoreModel(BaseModel):
     """
     store: Store = Field(exclude=True, default_factory=store)
+    instance: str | None = Field(exclude=True, default=None)
     def model_post_init(self, __context: Any) -> None:
         for name in self.model_fields.keys():
@@ -28,12 +29,18 @@ class StoreModel(BaseModel):
             elif name in self.__dict__.keys():
                 self.store.set(ns_name, self.__dict__[name])
+            # validate that we aren't using a nested StoreModel
+            self._validate_value(name, self.__dict__[name])
     def __getattribute__(self, name: str) -> Any:
         # sidestep dunders and pydantic fields
         if name.startswith("__") or name.startswith("model_"):
             return object.__getattribute__(self, name)
-        # handle model_fields (except 'store') by reading the store
-        elif name in object.__getattribute__(self, "model_fields") and name != "store":
+        # handle model_fields (except 'store' and 'namespace') by reading the store
+        elif name in object.__getattribute__(self, "model_fields") and name not in [
+            "store",
+            "instance",
+        ]:
             store_key = self._ns_name(name)
             if store_key in self.store:
                 return self.store.get(store_key)
@@ -44,6 +51,7 @@ class StoreModel(BaseModel):
             return super().__getattribute__(name)
     def __setattr__(self, name: str, value: Any) -> None:
+        self._validate_value(name, value)
         if name in self.model_fields:
             # validate with the new value (can throw ValidationError)
             temp_data = self.store._data.copy()
@@ -86,11 +94,23 @@ class StoreModel(BaseModel):
         # perform validation
         self.__class__.model_validate(validate)
+    def _validate_value(self, name: str, value: Any) -> None:
+        # validate that we aren't using a nested StoreModel
+        if isinstance(value, StoreModel):
+            raise TypeError(
+                f"{name} is a StoreModel and you may not embed a StoreModel "
+                "inside another StoreModel (derive from BaseModel for fields in a StoreModel)."
+            )
     def _ns_name(self, name: str) -> str:
-        return f"{self.__class__.__name__}:{name}"
+        namespace = f"{self.instance}:" if self.instance is not None else ""
+        return f"{self.__class__.__name__}:{namespace}{name}"
     def _un_ns_name(self, name: str) -> str:
-        return name.replace(f"{self.__class__.__name__}:", "", 1)
+        name = name.replace(f"{self.__class__.__name__}:", "", 1)
+        if self.instance:
+            name = name.replace(f"{self.instance}:", "", 1)
+        return name
     model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -98,13 +118,16 @@ class StoreModel(BaseModel):
 SMT = TypeVar("SMT", bound=StoreModel)
-def store_as(model_cls: Type[SMT]) -> SMT:
+def store_as(model_cls: Type[SMT], instance: str | None = None) -> SMT:
     """Get a Pydantic model interface to the store.
     Args:
       model_cls: Pydantic model type (must derive from StoreModel)
+      instance: Optional instance name for store (enables multiple instances
+        of a given StoreModel type within a single sample)
     Returns:
-      StoreModel: Instance of model_cls bound to current Store.
+      StoreModel: model_cls bound to current Store.
     """
-    return model_cls(store=store())
+    return model_cls(store=store(), instance=instance)

inspect_ai/util/_subprocess.py CHANGED Viewed

@@ -117,14 +117,15 @@ async def subprocess(
     async def run_command() -> AsyncGenerator[
         Union[Process, ExecResult[str], ExecResult[bytes]], None
     ]:
-        async with await open_process(
+        process = await open_process(
             args,
             stdin=PIPE if input else DEVNULL,
             stdout=PIPE if capture_output else None,
             stderr=PIPE if capture_output else None,
             cwd=cwd,
             env={**os.environ, **env},
-        ) as process:
+        )
+        try:
             # yield the process so the caller has a handle to it
             yield process
@@ -173,6 +174,15 @@ async def subprocess(
                     stdout=stdout if capture_output else bytes(),
                     stderr=stderr if capture_output else bytes(),
                 )
+        finally:
+            try:
+                await process.aclose()
+            except ProcessLookupError:
+                # the anyio ansycio backend calls process.kill() from within
+                # its aclose() method without an enclosing exception handler
+                # (which in turn can throw ProcessLookupError if the process
+                # is already gone)
+                pass
     # wrapper for run command that implements timeout
     async def run_command_timeout() -> Union[ExecResult[str], ExecResult[bytes]]:
@@ -181,7 +191,7 @@ async def subprocess(
             proc = cast(Process, await anext(rc))
             # await result wrapped in timeout handler if requested
-            if timeout:
+            if timeout is not None:
                 try:
                     with anyio.fail_after(timeout):
                         result = await anext(rc)

{inspect_ai-0.3.82.dist-info → inspect_ai-0.3.84.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inspect_ai
-Version: 0.3.82
+Version: 0.3.84
 Summary: Framework for large language model evaluations
 Author: UK AI Security Institute
 License: MIT License

inspect-ai 0.3.82__py3-none-any.whl → 0.3.84__py3-none-any.whl

inspect-ai 0.3.82py3-none-any.whl → 0.3.84py3-none-any.whl