PyPI - inspect-ai - Versions diffs - 0.3.58__py3-none-any.whl → 0.3.59__py3-none-any.whl - Mend

inspect-ai 0.3.58py3-none-any.whl → 0.3.59py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

inspect_ai/_cli/common.py +3 -1
inspect_ai/_cli/eval.py +15 -2
inspect_ai/_display/core/active.py +4 -1
inspect_ai/_display/core/config.py +3 -3
inspect_ai/_display/core/panel.py +7 -3
inspect_ai/_display/plain/__init__.py +0 -0
inspect_ai/_display/plain/display.py +203 -0
inspect_ai/_display/rich/display.py +0 -5
inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
inspect_ai/_display/textual/widgets/samples.py +78 -11
inspect_ai/_display/textual/widgets/sandbox.py +37 -0
inspect_ai/_eval/score.py +1 -0
inspect_ai/_eval/task/results.py +50 -22
inspect_ai/_eval/task/run.py +41 -7
inspect_ai/_eval/task/sandbox.py +10 -5
inspect_ai/_util/constants.py +1 -0
inspect_ai/_util/port_names.py +61 -0
inspect_ai/_util/text.py +23 -0
inspect_ai/_view/www/App.css +31 -1
inspect_ai/_view/www/dist/assets/index.css +31 -1
inspect_ai/_view/www/dist/assets/index.js +25344 -1849
inspect_ai/_view/www/log-schema.json +32 -2
inspect_ai/_view/www/package.json +2 -0
inspect_ai/_view/www/src/App.mjs +8 -10
inspect_ai/_view/www/src/Types.mjs +0 -1
inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
inspect_ai/_view/www/src/index.js +75 -2
inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +24 -12
inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
inspect_ai/_view/www/src/types/log.d.ts +13 -2
inspect_ai/_view/www/src/utils/Format.mjs +10 -3
inspect_ai/_view/www/src/utils/Json.mjs +12 -6
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
inspect_ai/_view/www/vite.config.js +7 -0
inspect_ai/_view/www/yarn.lock +116 -0
inspect_ai/approval/_human/__init__.py +0 -0
inspect_ai/approval/_policy.py +12 -6
inspect_ai/log/_log.py +1 -1
inspect_ai/log/_samples.py +16 -0
inspect_ai/log/_transcript.py +4 -1
inspect_ai/model/_call_tools.py +4 -0
inspect_ai/model/_conversation.py +20 -8
inspect_ai/model/_generate_config.py +10 -4
inspect_ai/model/_model.py +117 -18
inspect_ai/model/_model_output.py +7 -2
inspect_ai/model/_providers/anthropic.py +100 -44
inspect_ai/model/_providers/azureai.py +20 -20
inspect_ai/model/_providers/bedrock.py +37 -40
inspect_ai/model/_providers/google.py +46 -54
inspect_ai/model/_providers/mistral.py +11 -11
inspect_ai/model/_providers/openai.py +15 -16
inspect_ai/model/_providers/openai_o1.py +9 -8
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/together.py +8 -8
inspect_ai/model/_providers/vertex.py +1 -4
inspect_ai/scorer/_reducer/reducer.py +1 -1
inspect_ai/scorer/_scorer.py +2 -2
inspect_ai/solver/__init__.py +2 -5
inspect_ai/solver/_prompt.py +35 -5
inspect_ai/solver/_task_state.py +80 -38
inspect_ai/tool/__init__.py +2 -0
inspect_ai/tool/_tool.py +12 -1
inspect_ai/tool/_tool_call.py +10 -0
inspect_ai/tool/_tool_def.py +16 -5
inspect_ai/tool/_tool_with.py +21 -4
inspect_ai/tool/beta/__init__.py +5 -0
inspect_ai/tool/beta/_computer/__init__.py +3 -0
inspect_ai/tool/beta/_computer/_common.py +133 -0
inspect_ai/tool/beta/_computer/_computer.py +155 -0
inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/util/__init__.py +2 -0
inspect_ai/util/_limit.py +26 -0
inspect_ai/util/_sandbox/docker/docker.py +64 -1
inspect_ai/util/_sandbox/docker/internal.py +3 -1
inspect_ai/util/_sandbox/environment.py +14 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +126 -98
inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0

inspect_ai/_cli/common.py CHANGED Viewed

@@ -2,6 +2,7 @@ import functools
 from typing import Any, Callable, Literal, cast
 import click
+import rich
 from typing_extensions import TypedDict
 from inspect_ai._util.constants import (
@@ -105,7 +106,8 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
 def process_common_options(options: CommonOptions) -> None:
     # propagate display
     if options["no_ansi"]:
-        display = "plain"
+        display = "rich"
+        rich.reconfigure(no_color=True)
     else:
         display = options["display"].lower().strip()
     init_display_type(display)

inspect_ai/_cli/eval.py CHANGED Viewed

@@ -348,13 +348,13 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         "--logprobs",
         type=bool,
         is_flag=True,
-        help="Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
+        help="Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
         envvar="INSPECT_EVAL_LOGPROBS",
     )
     @click.option(
         "--top-logprobs",
         type=int,
-        help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, TogetherAI, Huggingface, and vLLM only.",
+        help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, TogetherAI, Huggingface, and vLLM only.",
         envvar="INSPECT_EVAL_TOP_LOGPROBS",
     )
     @click.option(
@@ -365,6 +365,14 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         help="Whether to enable parallel function calling during tool use (defaults to True) OpenAI and Groq only.",
         envvar="INSPECT_EVAL_PARALLEL_TOOL_CALLS",
     )
+    @click.option(
+        "--internal-tools/--no-internal-tools",
+        type=bool,
+        is_flag=True,
+        default=True,
+        help="Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic).",
+        envvar="INSPECT_EVAL_INTERNAL_TOOLS",
+    )
     @click.option(
         "--max-tool-output",
         type=int,
@@ -439,6 +447,7 @@ def eval_command(
     logprobs: bool | None,
     top_logprobs: int | None,
     parallel_tool_calls: bool | None,
+    internal_tools: bool | None,
     max_tool_output: int | None,
     cache_prompt: str | None,
     reasoning_effort: str | None,
@@ -598,6 +607,7 @@ def eval_set_command(
     logprobs: bool | None,
     top_logprobs: int | None,
     parallel_tool_calls: bool | None,
+    internal_tools: bool | None,
     max_tool_output: int | None,
     cache_prompt: str | None,
     reasoning_effort: str | None,
@@ -836,6 +846,9 @@ def config_from_locals(locals: dict[str, Any]) -> GenerateConfigArgs:
             if key == "parallel_tool_calls":
                 if value is not False:
                     value = None
+            if key == "internal_tools":
+                if value is not False:
+                    value = None
             config[key] = value  # type: ignore
     return config

inspect_ai/_display/core/active.py CHANGED Viewed

@@ -5,6 +5,7 @@ import rich
 from inspect_ai.util._display import display_type
+from ..plain.display import PlainDisplay
 from ..rich.display import RichDisplay
 from ..textual.display import TextualDisplay
 from .display import Display, TaskScreen
@@ -13,7 +14,9 @@ from .display import Display, TaskScreen
 def display() -> Display:
     global _active_display
     if _active_display is None:
-        if (
+        if display_type() == "plain":
+            _active_display = PlainDisplay()
+        elif (
             display_type() == "full"
             and sys.stdout.isatty()
             and not rich.get_console().is_jupyter

inspect_ai/_display/core/config.py CHANGED Viewed

@@ -13,14 +13,14 @@ def task_config(
         value = task_args[key]
         if is_registry_dict(value):
             task_args[key] = value["name"]
-    config = task_args | dict(profile.eval_config.model_dump(exclude_none=True))
+    config = dict(profile.eval_config.model_dump(exclude_none=True)) | task_args
     if generate_config:
-        config = config | dict(profile.generate_config.model_dump(exclude_none=True))
+        config = dict(profile.generate_config.model_dump(exclude_none=True)) | config
     if profile.tags:
         config["tags"] = ",".join(profile.tags)
     config_print: list[str] = []
     for name, value in config.items():
-        if name == "approval":
+        if name == "approval" and isinstance(value, dict):
             config_print.append(
                 f"{name}: {','.join([approver['name'] for approver in value['approvers']])}"
             )

inspect_ai/_display/core/panel.py CHANGED Viewed

@@ -50,9 +50,13 @@ def task_panel(
         table.add_row(subtitle_table)
     # main progress and task info
-    table.add_row()
-    table.add_row(body)
-    table.add_row()
+    if body:
+        table.add_row()
+        table.add_row(body)
+    # spacing if there is more ocontent
+    if footer or log_location:
+        table.add_row()
     # footer if specified
     if footer:

inspect_ai/_display/plain/__init__.py ADDED Viewed

File without changes

inspect_ai/_display/plain/display.py ADDED Viewed

@@ -0,0 +1,203 @@
+import asyncio
+import contextlib
+from typing import Any, AsyncIterator, Coroutine, Iterator
+import rich
+from inspect_ai._display.core.rich import rich_initialise
+from inspect_ai._util.text import truncate
+from inspect_ai._util.throttle import throttle
+from ...util._concurrency import concurrency_status
+from ..core.config import task_config
+from ..core.display import (
+    TR,
+    Display,
+    Progress,
+    TaskDisplay,
+    TaskDisplayMetric,
+    TaskProfile,
+    TaskResult,
+    TaskScreen,
+    TaskSpec,
+    TaskWithResult,
+)
+from ..core.footer import task_http_rate_limits
+from ..core.panel import task_panel, task_targets
+from ..core.results import task_metric, tasks_results
+class PlainDisplay(Display):
+    def __init__(self) -> None:
+        self.total_tasks: int = 0
+        self.tasks: list[TaskWithResult] = []
+        self.parallel = False
+        rich_initialise()
+    def print(self, message: str) -> None:
+        print(message)
+    @contextlib.contextmanager
+    def progress(self, total: int) -> Iterator[Progress]:
+        yield PlainProgress(total)
+    def run_task_app(self, main: Coroutine[Any, Any, TR]) -> TR:
+        return asyncio.run(main)
+    @contextlib.contextmanager
+    def suspend_task_app(self) -> Iterator[None]:
+        yield
+    @contextlib.asynccontextmanager
+    async def task_screen(
+        self, tasks: list[TaskSpec], parallel: bool
+    ) -> AsyncIterator[TaskScreen]:
+        self.total_tasks = len(tasks)
+        self.multiple_task_names = len({task.name for task in tasks}) > 1
+        self.multiple_model_names = len({str(task.model) for task in tasks}) > 1
+        self.tasks = []
+        self.parallel = parallel
+        try:
+            # Print header for task(s)
+            if parallel:
+                print(f"Running {self.total_tasks} tasks...")
+            yield TaskScreen()
+        finally:
+            # Print final results
+            if self.tasks:
+                self._print_results()
+    @contextlib.contextmanager
+    def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
+        # Print initial task information using a rich panel
+        panel = task_panel(
+            profile=profile,
+            show_model=True,
+            body="",  # Empty body since we haven't started yet
+            subtitle=(task_config(profile), task_targets(profile)),
+            footer=None,
+            log_location=None,
+        )
+        rich.print(panel)
+        # Create and yield task display
+        task = TaskWithResult(profile, None)
+        self.tasks.append(task)
+        yield PlainTaskDisplay(
+            task,
+            show_task_names=self.multiple_task_names,
+            show_model_names=self.multiple_model_names,
+        )
+    def _print_results(self) -> None:
+        """Print final results using rich panels"""
+        panels = tasks_results(self.tasks)
+        rich.print(panels)
+class PlainProgress(Progress):
+    def __init__(self, total: int):
+        self.total = total
+        self.current = 0
+    def update(self, n: int = 1) -> None:
+        self.current += n
+        # No direct printing - PlainTaskDisplay handles it
+    def complete(self) -> None:
+        self.current = self.total
+class PlainTaskDisplay(TaskDisplay):
+    def __init__(
+        self, task: TaskWithResult, *, show_task_names: bool, show_model_names: bool
+    ):
+        self.task = task
+        self.show_task_names = show_task_names
+        self.show_model_names = show_model_names
+        self.progress_display: PlainProgress | None = None
+        self.samples_complete = 0
+        self.samples_total = 0
+        self.current_metrics: list[TaskDisplayMetric] | None = None
+        self.last_progress = 0  # Track last progress percentage
+    @contextlib.contextmanager
+    def progress(self) -> Iterator[Progress]:
+        self.progress_display = PlainProgress(self.task.profile.steps)
+        yield self.progress_display
+    @throttle(1)
+    def _print_status_throttled(self) -> None:
+        self._print_status()
+    def _print_status(self) -> None:
+        """Print status updates on new lines when there's meaningful progress"""
+        if not self.progress_display:
+            return
+        # Calculate current progress percentage
+        current_progress = int(
+            self.progress_display.current / self.progress_display.total * 100
+        )
+        # Only print on percentage changes to avoid too much output
+        if current_progress != self.last_progress:
+            status_parts: list[str] = []
+            # if this is parallel print task and model to distinguish (limit both to 12 chars)
+            MAX_NAME_WIDTH = 12
+            if self.show_task_names:
+                status_parts.append(truncate(self.task.profile.name, MAX_NAME_WIDTH))
+            if self.show_model_names:
+                status_parts.append(
+                    truncate(str(self.task.profile.model), MAX_NAME_WIDTH)
+                )
+            # Add step progress
+            status_parts.append(
+                f"Steps: {self.progress_display.current:3d}/{self.progress_display.total} {current_progress:3d}%"
+            )
+            # Add sample progress
+            status_parts.append(
+                f"Samples: {self.samples_complete:3d}/{self.samples_total:3d}"
+            )
+            # Add metrics
+            if self.current_metrics:
+                metric_str = task_metric(self.current_metrics)
+                status_parts.append(metric_str)
+            # Add resource usage
+            # Very similar to ``inspect_ai._display.core.footer.task_resources``, but without
+            # the rich formatting added in the ``task_dict`` call
+            resources_dict: dict[str, str] = {}
+            for model, resource in concurrency_status().items():
+                resources_dict[model] = f"{resource[0]:2d}/{resource[1]:2d}"
+            resources = ", ".join(
+                [f"{key}: {value}" for key, value in resources_dict.items()]
+            )
+            status_parts.append(resources)
+            # Add rate limits
+            rate_limits = task_http_rate_limits()
+            if rate_limits:
+                status_parts.append(rate_limits)
+            # Print on new line
+            print(" | ".join(status_parts))
+            self.last_progress = current_progress
+    def sample_complete(self, complete: int, total: int) -> None:
+        self.samples_complete = complete
+        self.samples_total = total
+        self._print_status_throttled()
+    def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
+        self.current_metrics = metrics
+        self._print_status_throttled()
+    def complete(self, result: TaskResult) -> None:
+        self.task.result = result
+        self._print_status()

inspect_ai/_display/rich/display.py CHANGED Viewed

@@ -129,11 +129,6 @@ class RichDisplay(Display):
     @override
     @contextlib.contextmanager
     def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
-        # if there is no ansi display than all of the below will
-        # be a no-op, so we print a simple text message for the task
-        if display_type() == "plain":
-            rich.get_console().print(task_no_ansi(profile))
         # for typechekcer
         if self.tasks is None:
             self.tasks = []

inspect_ai/_display/textual/widgets/port_mappings.py ADDED Viewed

@@ -0,0 +1,110 @@
+from typing import Literal
+from textual.app import ComposeResult
+from textual.containers import HorizontalScroll
+from textual.widget import Widget
+from textual.widgets import Link, Static
+from inspect_ai._util.port_names import get_service_by_port
+from inspect_ai.util._sandbox.environment import PortMapping
+class PortMappingsView(HorizontalScroll):
+    DEFAULT_CSS = """
+    PortMappingsView {
+      layout: grid;
+      height: auto;
+      grid-size: 4 3;
+      grid-columns: auto auto auto auto;
+      grid-gutter: 0 1;
+    }
+    """
+    def __init__(self, ports: list[PortMapping] | None) -> None:
+        super().__init__()
+        self.ports = ports
+    def compose(self) -> ComposeResult:
+        if not self.ports:
+            return
+        yield Static("service")
+        yield Static("sandbox")
+        yield Static("client")
+        yield Static("endpoint")
+        mappings_and_services = [
+            (mapping, get_service_by_port(mapping.container_port, mapping.protocol))
+            for mapping in self.ports
+        ]
+        remaining_widgets = [
+            widget
+            for mapping_and_service in mappings_and_services
+            for widget in widgets_from_port_mapping(mapping_and_service)
+        ]
+        for widget in remaining_widgets:
+            yield widget
+def widgets_for_port_mappings(
+    port_mappings: list[PortMapping] | None,
+) -> list[Widget]:
+    if port_mappings is None:
+        return []
+    return [
+        static
+        for mapping in [
+            (mapping, get_service_by_port(mapping.container_port, mapping.protocol))
+            for mapping in port_mappings
+        ]
+        for static in widgets_from_port_mapping(mapping)
+    ]
+def widgets_from_port_mapping(
+    mapping_service_tuple: tuple[PortMapping, str | None],
+) -> list[Widget]:
+    port_mapping, service = mapping_service_tuple
+    return [
+        widget
+        for host_mapping in port_mapping.mappings
+        for widget in get_row_widgets(
+            port_mapping.protocol,
+            host_mapping.host_port,
+            port_mapping.container_port,
+            service,
+        )
+    ]
+def get_row_widgets(
+    protocol: Literal["tcp", "udp"],
+    host_port: int,
+    container_port: int,
+    service: str | None,
+) -> list[Widget]:
+    url = get_url(
+        host_port,
+        service,
+    )
+    return [
+        Static(service if service is not None else protocol),
+        Static(str(container_port)),
+        Static(str(host_port)),
+        Link(url) if url is not None else Static("asdf"),
+    ]
+def get_url(
+    host_port: int,
+    service: str | None,
+) -> str | None:
+    if service is not None:
+        if service == "noVNC":
+            return f"http://localhost:{host_port}?view_only=true&autoconnect=true&resize=scale"
+        if service.startswith("HTTP"):
+            return f"https://localhost:{host_port}"
+        if service.startswith("VNC"):
+            return f"vnc://localhost:{host_port}"
+    return None

inspect_ai/_display/textual/widgets/samples.py CHANGED Viewed

@@ -5,29 +5,28 @@ from rich.console import RenderableType
 from rich.table import Table
 from rich.text import Text
 from textual.app import ComposeResult
-from textual.containers import (
-    Horizontal,
-    HorizontalGroup,
-    Vertical,
-    VerticalGroup,
-)
+from textual.containers import Horizontal, HorizontalGroup, Vertical, VerticalGroup
 from textual.reactive import reactive
 from textual.widget import Widget
 from textual.widgets import (
     Button,
     Collapsible,
+    Link,
     LoadingIndicator,
     OptionList,
     Static,
 )
 from textual.widgets.option_list import Option, Separator
+from inspect_ai._display.textual.widgets.port_mappings import get_url
 from inspect_ai._util.format import format_progress_time
+from inspect_ai._util.port_names import get_service_by_port
 from inspect_ai._util.registry import registry_unqualified_name
 from inspect_ai.log._samples import ActiveSample
 from inspect_ai.log._transcript import ToolEvent
 from .clock import Clock
+from .sandbox import SandboxView
 from .transcript import TranscriptView
@@ -74,6 +73,7 @@ class SamplesView(Widget):
     async def set_highlighted_sample(self, highlighted: int | None) -> None:
         sample_info = self.query_one(SampleInfo)
+        sample_vnc = self.query_one(SampleVNC)
         transcript_view = self.query_one(TranscriptView)
         sample_toolbar = self.query_one(SampleToolbar)
         if highlighted is not None:
@@ -83,12 +83,14 @@ class SamplesView(Widget):
                 transcript_view.display = True
                 sample_toolbar.display = True
                 await sample_info.sync_sample(sample)
+                await sample_vnc.sync_sample(sample)
                 await transcript_view.sync_sample(sample)
                 await sample_toolbar.sync_sample(sample)
                 return
         # otherwise hide ui
         sample_info.display = False
+        sample_vnc.display = False
         transcript_view.display = False
         sample_toolbar.display = False
@@ -182,10 +184,59 @@ class SamplesList(OptionList):
             return None
-class SampleInfo(Horizontal):
+class SampleVNC(Horizontal):
+    DEFAULT_CSS = """
+    SampleVNC {
+        layout: grid;
+        grid-size: 2 1;
+        grid-columns: auto 1fr;
+    }
+    SampleVNC Static {
+        color: $secondary;
+    }
+    SampleVNC Link {
+        color: $accent;
+    }
+    """
+    def __init__(self) -> None:
+        super().__init__()
+        self._sample: ActiveSample | None = None
+    def compose(self) -> ComposeResult:
+        yield Static("VNC: ")
+        yield Link("")
+    async def sync_sample(self, sample: ActiveSample) -> None:
+        if sample == self._sample:
+            return
+        # defult to hidden (show if we find a vnc connection)
+        self.display = False
+        # is there a vnc connection? if so populate
+        for connection in [c for c in sample.sandboxes.values() if c.ports]:
+            for port in connection.ports or []:
+                service = get_service_by_port(port.container_port, port.protocol)
+                if service == "noVNC" and port.mappings:
+                    host_mappings = port.mappings
+                    link = self.query_one(Link)
+                    vnc_url = get_url(host_mappings[0].host_port, service)
+                    if vnc_url:
+                        link.text = vnc_url
+                        link.url = link.text
+                        self.display = True
+                        break
+class SampleInfo(Vertical):
     DEFAULT_CSS = """
     SampleInfo {
         color: $text-muted;
+        layout: grid;
+        grid-size: 1 2;
+        grid-rows: auto 1;
+        grid-gutter: 1;
     }
     SampleInfo Collapsible {
         padding: 0;
@@ -218,11 +269,13 @@ class SampleInfo(Horizontal):
     def __init__(self) -> None:
         super().__init__()
         self._sample: ActiveSample | None = None
+        self._sandbox_count: int | None = None
     def compose(self) -> ComposeResult:
         with Collapsible(title=""):
             yield SampleLimits()
             yield SandboxesView()
+        yield SampleVNC()
     async def sync_sample(self, sample: ActiveSample | None) -> None:
         if sample is None:
@@ -233,12 +286,14 @@ class SampleInfo(Horizontal):
             limits = self.query_one(SampleLimits)
             await limits.sync_sample(sample)
+            new_sandbox_count = len(sample.sandboxes)
             # bail if we've already processed this sample
-            if self._sample == sample:
+            if self._sample == sample and self._sandbox_count == new_sandbox_count:
                 return
             # set sample
             self._sample = sample
+            self._sandbox_count = new_sandbox_count
             # update UI
             self.display = True
@@ -246,6 +301,7 @@ class SampleInfo(Horizontal):
             self.query_one(Collapsible).title = title
             sandboxes = self.query_one(SandboxesView)
             await sandboxes.sync_sample(sample)
+            await self.query_one(SampleVNC).sync_sample(sample)
 class SampleLimits(Widget):
@@ -295,6 +351,9 @@ class SandboxesView(Vertical):
         background: transparent;
         height: auto;
     }
+    #sandboxes-list {
+        height: auto;
+    }
     SandboxesView Static {
         background: transparent;
     }
@@ -312,16 +371,24 @@ class SandboxesView(Vertical):
     async def sync_sample(self, sample: ActiveSample) -> None:
         if len(sample.sandboxes) > 0:
+            multiple_sandboxes = len(sample.sandboxes) > 1
             self.display = True
             sandboxes_caption = cast(Static, self.query_one("#sandboxes-caption"))
-            sandboxes_caption.update("[bold]sandbox containers:[/bold]")
+            sandboxes_caption.update(
+                f"[bold]sandbox container{'s' if multiple_sandboxes else ''}:[/bold]"
+            )
             sandboxes_list = self.query_one("#sandboxes-list")
             await sandboxes_list.remove_children()
             await sandboxes_list.mount_all(
-                [Static(sandbox.command) for sandbox in sample.sandboxes.values()]
+                [
+                    SandboxView(connection, name if multiple_sandboxes else None)
+                    for name, connection in sample.sandboxes.items()
+                ]
             )
-            sandboxes_list.mount(
+            await sandboxes_list.mount(
                 Static(
                     "[italic]Hold down Alt (or Option) to select text for copying[/italic]",
                     classes="clipboard-message",

inspect_ai/_display/textual/widgets/sandbox.py ADDED Viewed

@@ -0,0 +1,37 @@
+from textual.app import ComposeResult
+from textual.containers import Horizontal, Vertical
+from textual.widgets import Static
+from inspect_ai.util._sandbox.environment import SandboxConnection
+from .port_mappings import PortMappingsView
+class SandboxView(Vertical):
+    DEFAULT_CSS = """
+    .indent {
+        width: 2;
+    }
+    .no_indent {
+        width: 0;
+    }
+    """
+    def __init__(
+        self,
+        connection: SandboxConnection,
+        name: str | None,  # if None, no header or indent
+    ) -> None:
+        super().__init__()
+        self.sandbox_name = name
+        self.connection = connection
+    def compose(self) -> ComposeResult:
+        if self.sandbox_name:
+            yield Static(self.sandbox_name)
+        with Horizontal():
+            yield Static("", classes="indent" if self.sandbox_name else "no_indent")
+            with Vertical():
+                yield Static(self.connection.command)
+                if self.connection.ports:
+                    yield PortMappingsView(self.connection.ports)

inspect-ai 0.3.58__py3-none-any.whl → 0.3.59__py3-none-any.whl

inspect-ai 0.3.58py3-none-any.whl → 0.3.59py3-none-any.whl