PyPI - inspect-ai - Versions diffs - 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl - Mend

inspect-ai 0.3.57py3-none-any.whl → 0.3.59py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

inspect_ai/__init__.py +2 -1
inspect_ai/_cli/common.py +7 -3
inspect_ai/_cli/eval.py +17 -2
inspect_ai/_cli/trace.py +21 -2
inspect_ai/_display/core/active.py +4 -3
inspect_ai/_display/core/config.py +3 -3
inspect_ai/_display/core/panel.py +7 -3
inspect_ai/_display/plain/__init__.py +0 -0
inspect_ai/_display/plain/display.py +203 -0
inspect_ai/_display/rich/display.py +4 -9
inspect_ai/_display/textual/app.py +4 -1
inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
inspect_ai/_display/textual/widgets/samples.py +119 -16
inspect_ai/_display/textual/widgets/sandbox.py +37 -0
inspect_ai/_eval/eval.py +32 -20
inspect_ai/_eval/evalset.py +7 -5
inspect_ai/_eval/score.py +1 -0
inspect_ai/_eval/task/__init__.py +2 -2
inspect_ai/_eval/task/images.py +40 -25
inspect_ai/_eval/task/results.py +50 -22
inspect_ai/_eval/task/run.py +180 -124
inspect_ai/_eval/task/sandbox.py +10 -5
inspect_ai/_eval/task/task.py +140 -25
inspect_ai/_util/constants.py +2 -0
inspect_ai/_util/content.py +23 -1
inspect_ai/_util/images.py +20 -17
inspect_ai/_util/kvstore.py +73 -0
inspect_ai/_util/notgiven.py +18 -0
inspect_ai/_util/port_names.py +61 -0
inspect_ai/_util/text.py +23 -0
inspect_ai/_util/thread.py +5 -0
inspect_ai/_view/www/App.css +31 -1
inspect_ai/_view/www/dist/assets/index.css +31 -1
inspect_ai/_view/www/dist/assets/index.js +25375 -1846
inspect_ai/_view/www/log-schema.json +129 -15
inspect_ai/_view/www/package.json +2 -0
inspect_ai/_view/www/src/App.mjs +8 -10
inspect_ai/_view/www/src/Types.mjs +0 -1
inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
inspect_ai/_view/www/src/components/MessageContent.mjs +43 -1
inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
inspect_ai/_view/www/src/index.js +75 -2
inspect_ai/_view/www/src/navbar/Navbar.mjs +3 -0
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +18 -9
inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
inspect_ai/_view/www/src/samples/SampleList.mjs +18 -48
inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +29 -13
inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -1
inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
inspect_ai/_view/www/src/types/log.d.ts +62 -27
inspect_ai/_view/www/src/utils/Format.mjs +10 -3
inspect_ai/_view/www/src/utils/Json.mjs +12 -6
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +10 -4
inspect_ai/_view/www/vite.config.js +7 -0
inspect_ai/_view/www/yarn.lock +116 -0
inspect_ai/approval/_human/__init__.py +0 -0
inspect_ai/approval/_human/util.py +2 -2
inspect_ai/approval/_policy.py +12 -6
inspect_ai/dataset/_sources/csv.py +2 -1
inspect_ai/dataset/_sources/json.py +2 -1
inspect_ai/dataset/_sources/util.py +15 -7
inspect_ai/log/_condense.py +11 -1
inspect_ai/log/_log.py +3 -6
inspect_ai/log/_recorders/eval.py +19 -8
inspect_ai/log/_samples.py +26 -5
inspect_ai/log/_transcript.py +32 -2
inspect_ai/model/__init__.py +10 -2
inspect_ai/model/_call_tools.py +59 -12
inspect_ai/model/_chat_message.py +2 -4
inspect_ai/model/_conversation.py +61 -0
inspect_ai/model/_generate_config.py +10 -4
inspect_ai/model/_model.py +117 -18
inspect_ai/model/_model_output.py +7 -2
inspect_ai/model/_providers/anthropic.py +109 -51
inspect_ai/model/_providers/azureai.py +26 -24
inspect_ai/model/_providers/bedrock.py +43 -44
inspect_ai/model/_providers/google.py +121 -58
inspect_ai/model/_providers/groq.py +7 -5
inspect_ai/model/_providers/hf.py +11 -6
inspect_ai/model/_providers/mistral.py +17 -20
inspect_ai/model/_providers/openai.py +32 -21
inspect_ai/model/_providers/openai_o1.py +9 -8
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/together.py +8 -8
inspect_ai/model/_providers/vertex.py +18 -8
inspect_ai/scorer/__init__.py +13 -2
inspect_ai/scorer/_metrics/__init__.py +2 -2
inspect_ai/scorer/_metrics/std.py +3 -3
inspect_ai/scorer/_reducer/reducer.py +1 -1
inspect_ai/scorer/_scorer.py +2 -2
inspect_ai/solver/__init__.py +2 -5
inspect_ai/solver/_prompt.py +35 -5
inspect_ai/solver/_task_state.py +80 -38
inspect_ai/tool/__init__.py +11 -1
inspect_ai/tool/_tool.py +21 -3
inspect_ai/tool/_tool_call.py +10 -0
inspect_ai/tool/_tool_def.py +16 -5
inspect_ai/tool/_tool_with.py +21 -4
inspect_ai/tool/beta/__init__.py +5 -0
inspect_ai/tool/beta/_computer/__init__.py +3 -0
inspect_ai/tool/beta/_computer/_common.py +133 -0
inspect_ai/tool/beta/_computer/_computer.py +155 -0
inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/util/__init__.py +2 -3
inspect_ai/util/{_trace.py → _conversation.py} +3 -17
inspect_ai/util/_display.py +14 -4
inspect_ai/util/_limit.py +26 -0
inspect_ai/util/_sandbox/context.py +12 -13
inspect_ai/util/_sandbox/docker/compose.py +24 -11
inspect_ai/util/_sandbox/docker/docker.py +84 -14
inspect_ai/util/_sandbox/docker/internal.py +3 -1
inspect_ai/util/_sandbox/environment.py +27 -1
inspect_ai/util/_sandbox/local.py +1 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/METADATA +2 -2
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/RECORD +159 -128
inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
inspect_ai/model/_trace.py +0 -48
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.57.dist-info → inspect_ai-0.3.59.dist-info}/top_level.txt +0 -0

inspect_ai/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ from inspect_ai._eval.evalset import eval_set
 from inspect_ai._eval.list import list_tasks
 from inspect_ai._eval.registry import task
 from inspect_ai._eval.score import score, score_async
-from inspect_ai._eval.task import Epochs, Task, TaskInfo, Tasks
+from inspect_ai._eval.task import Epochs, Task, TaskInfo, Tasks, task_with
 from inspect_ai._util.constants import PKG_NAME
 from inspect_ai.solver._human_agent.agent import human_agent
@@ -29,4 +29,5 @@ __all__ = [
     "TaskInfo",
     "Tasks",
     "task",
+    "task_with",
 ]

inspect_ai/_cli/common.py CHANGED Viewed

@@ -2,6 +2,7 @@ import functools
 from typing import Any, Callable, Literal, cast
 import click
+import rich
 from typing_extensions import TypedDict
 from inspect_ai._util.constants import (
@@ -17,7 +18,7 @@ class CommonOptions(TypedDict):
     log_level: str
     log_level_transcript: str
     log_dir: str
-    display: Literal["full", "rich", "plain", "none"]
+    display: Literal["full", "conversation", "rich", "plain", "none"]
     no_ansi: bool | None
     debug: bool
     debug_port: int
@@ -64,7 +65,9 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
     )
     @click.option(
         "--display",
-        type=click.Choice(["full", "rich", "plain", "none"], case_sensitive=False),
+        type=click.Choice(
+            ["full", "conversation", "rich", "plain", "none"], case_sensitive=False
+        ),
         default=DEFAULT_DISPLAY,
         envvar="INSPECT_DISPLAY",
         help="Set the display type (defaults to 'full')",
@@ -103,7 +106,8 @@ def common_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
 def process_common_options(options: CommonOptions) -> None:
     # propagate display
     if options["no_ansi"]:
-        display = "plain"
+        display = "rich"
+        rich.reconfigure(no_color=True)
     else:
         display = options["display"].lower().strip()
     init_display_type(display)

inspect_ai/_cli/eval.py CHANGED Viewed

@@ -118,6 +118,7 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         "--trace",
         type=bool,
         is_flag=True,
+        hidden=True,
         envvar="INSPECT_EVAL_TRACE",
         help="Trace message interactions with evaluated model to terminal.",
     )
@@ -347,13 +348,13 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         "--logprobs",
         type=bool,
         is_flag=True,
-        help="Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
+        help="Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
         envvar="INSPECT_EVAL_LOGPROBS",
     )
     @click.option(
         "--top-logprobs",
         type=int,
-        help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, TogetherAI, Huggingface, and vLLM only.",
+        help="Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, TogetherAI, Huggingface, and vLLM only.",
         envvar="INSPECT_EVAL_TOP_LOGPROBS",
     )
     @click.option(
@@ -364,6 +365,14 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         help="Whether to enable parallel function calling during tool use (defaults to True) OpenAI and Groq only.",
         envvar="INSPECT_EVAL_PARALLEL_TOOL_CALLS",
     )
+    @click.option(
+        "--internal-tools/--no-internal-tools",
+        type=bool,
+        is_flag=True,
+        default=True,
+        help="Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic).",
+        envvar="INSPECT_EVAL_INTERNAL_TOOLS",
+    )
     @click.option(
         "--max-tool-output",
         type=int,
@@ -438,6 +447,7 @@ def eval_command(
     logprobs: bool | None,
     top_logprobs: int | None,
     parallel_tool_calls: bool | None,
+    internal_tools: bool | None,
     max_tool_output: int | None,
     cache_prompt: str | None,
     reasoning_effort: str | None,
@@ -597,6 +607,7 @@ def eval_set_command(
     logprobs: bool | None,
     top_logprobs: int | None,
     parallel_tool_calls: bool | None,
+    internal_tools: bool | None,
     max_tool_output: int | None,
     cache_prompt: str | None,
     reasoning_effort: str | None,
@@ -835,6 +846,9 @@ def config_from_locals(locals: dict[str, Any]) -> GenerateConfigArgs:
             if key == "parallel_tool_calls":
                 if value is not False:
                     value = None
+            if key == "internal_tools":
+                if value is not False:
+                    value = None
             config[key] = value  # type: ignore
     return config
@@ -886,6 +900,7 @@ def parse_comma_separated(value: str | None) -> list[str] | None:
     "--trace",
     type=bool,
     is_flag=True,
+    hidden=True,
     help="Trace message interactions with evaluated model to terminal.",
     envvar="INSPECT_EVAL_TRACE",
 )

inspect_ai/_cli/trace.py CHANGED Viewed

@@ -62,11 +62,21 @@ def list_command(json: bool) -> None:
 @trace_command.command("dump")
 @click.argument("trace-file", type=str, required=False)
-def dump_command(trace_file: str | None) -> None:
+@click.option(
+    "--filter",
+    type=str,
+    help="Filter (applied to trace message field).",
+)
+def dump_command(trace_file: str | None, filter: str | None) -> None:
     """Dump a trace file to stdout (as a JSON array of log records)."""
     trace_file_path = _resolve_trace_file_path(trace_file)
     traces = read_trace_file(trace_file_path)
+    if filter:
+        filter = filter.lower()
+        traces = [trace for trace in traces if filter in trace.message.lower()]
     print(
         to_json(traces, indent=2, exclude_none=True, fallback=lambda _: None).decode()
     )
@@ -74,17 +84,26 @@ def dump_command(trace_file: str | None) -> None:
 @trace_command.command("anomalies")
 @click.argument("trace-file", type=str, required=False)
+@click.option(
+    "--filter",
+    type=str,
+    help="Filter (applied to trace message field).",
+)
 @click.option(
     "--all",
     is_flag=True,
     default=False,
     help="Show all anomolies including errors and timeouts (by default only still running and cancelled actions are shown).",
 )
-def anomolies_command(trace_file: str | None, all: bool) -> None:
+def anomolies_command(trace_file: str | None, filter: str | None, all: bool) -> None:
     """Look for anomalies in a trace file (never completed or cancelled actions)."""
     trace_file_path = _resolve_trace_file_path(trace_file)
     traces = read_trace_file(trace_file_path)
+    if filter:
+        filter = filter.lower()
+        traces = [trace for trace in traces if filter in trace.message.lower()]
     # Track started actions
     running_actions: dict[str, ActionTraceRecord] = {}
     canceled_actions: dict[str, ActionTraceRecord] = {}

inspect_ai/_display/core/active.py CHANGED Viewed

@@ -4,8 +4,8 @@ from contextvars import ContextVar
 import rich
 from inspect_ai.util._display import display_type
-from inspect_ai.util._trace import trace_enabled
+from ..plain.display import PlainDisplay
 from ..rich.display import RichDisplay
 from ..textual.display import TextualDisplay
 from .display import Display, TaskScreen
@@ -14,10 +14,11 @@ from .display import Display, TaskScreen
 def display() -> Display:
     global _active_display
     if _active_display is None:
-        if (
+        if display_type() == "plain":
+            _active_display = PlainDisplay()
+        elif (
             display_type() == "full"
             and sys.stdout.isatty()
-            and not trace_enabled()
             and not rich.get_console().is_jupyter
         ):
             _active_display = TextualDisplay()

inspect_ai/_display/core/config.py CHANGED Viewed

@@ -13,14 +13,14 @@ def task_config(
         value = task_args[key]
         if is_registry_dict(value):
             task_args[key] = value["name"]
-    config = task_args | dict(profile.eval_config.model_dump(exclude_none=True))
+    config = dict(profile.eval_config.model_dump(exclude_none=True)) | task_args
     if generate_config:
-        config = config | dict(profile.generate_config.model_dump(exclude_none=True))
+        config = dict(profile.generate_config.model_dump(exclude_none=True)) | config
     if profile.tags:
         config["tags"] = ",".join(profile.tags)
     config_print: list[str] = []
     for name, value in config.items():
-        if name == "approval":
+        if name == "approval" and isinstance(value, dict):
             config_print.append(
                 f"{name}: {','.join([approver['name'] for approver in value['approvers']])}"
             )

inspect_ai/_display/core/panel.py CHANGED Viewed

@@ -50,9 +50,13 @@ def task_panel(
         table.add_row(subtitle_table)
     # main progress and task info
-    table.add_row()
-    table.add_row(body)
-    table.add_row()
+    if body:
+        table.add_row()
+        table.add_row(body)
+    # spacing if there is more ocontent
+    if footer or log_location:
+        table.add_row()
     # footer if specified
     if footer:

inspect_ai/_display/plain/__init__.py ADDED Viewed

File without changes

inspect_ai/_display/plain/display.py ADDED Viewed

@@ -0,0 +1,203 @@
+import asyncio
+import contextlib
+from typing import Any, AsyncIterator, Coroutine, Iterator
+import rich
+from inspect_ai._display.core.rich import rich_initialise
+from inspect_ai._util.text import truncate
+from inspect_ai._util.throttle import throttle
+from ...util._concurrency import concurrency_status
+from ..core.config import task_config
+from ..core.display import (
+    TR,
+    Display,
+    Progress,
+    TaskDisplay,
+    TaskDisplayMetric,
+    TaskProfile,
+    TaskResult,
+    TaskScreen,
+    TaskSpec,
+    TaskWithResult,
+)
+from ..core.footer import task_http_rate_limits
+from ..core.panel import task_panel, task_targets
+from ..core.results import task_metric, tasks_results
+class PlainDisplay(Display):
+    def __init__(self) -> None:
+        self.total_tasks: int = 0
+        self.tasks: list[TaskWithResult] = []
+        self.parallel = False
+        rich_initialise()
+    def print(self, message: str) -> None:
+        print(message)
+    @contextlib.contextmanager
+    def progress(self, total: int) -> Iterator[Progress]:
+        yield PlainProgress(total)
+    def run_task_app(self, main: Coroutine[Any, Any, TR]) -> TR:
+        return asyncio.run(main)
+    @contextlib.contextmanager
+    def suspend_task_app(self) -> Iterator[None]:
+        yield
+    @contextlib.asynccontextmanager
+    async def task_screen(
+        self, tasks: list[TaskSpec], parallel: bool
+    ) -> AsyncIterator[TaskScreen]:
+        self.total_tasks = len(tasks)
+        self.multiple_task_names = len({task.name for task in tasks}) > 1
+        self.multiple_model_names = len({str(task.model) for task in tasks}) > 1
+        self.tasks = []
+        self.parallel = parallel
+        try:
+            # Print header for task(s)
+            if parallel:
+                print(f"Running {self.total_tasks} tasks...")
+            yield TaskScreen()
+        finally:
+            # Print final results
+            if self.tasks:
+                self._print_results()
+    @contextlib.contextmanager
+    def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
+        # Print initial task information using a rich panel
+        panel = task_panel(
+            profile=profile,
+            show_model=True,
+            body="",  # Empty body since we haven't started yet
+            subtitle=(task_config(profile), task_targets(profile)),
+            footer=None,
+            log_location=None,
+        )
+        rich.print(panel)
+        # Create and yield task display
+        task = TaskWithResult(profile, None)
+        self.tasks.append(task)
+        yield PlainTaskDisplay(
+            task,
+            show_task_names=self.multiple_task_names,
+            show_model_names=self.multiple_model_names,
+        )
+    def _print_results(self) -> None:
+        """Print final results using rich panels"""
+        panels = tasks_results(self.tasks)
+        rich.print(panels)
+class PlainProgress(Progress):
+    def __init__(self, total: int):
+        self.total = total
+        self.current = 0
+    def update(self, n: int = 1) -> None:
+        self.current += n
+        # No direct printing - PlainTaskDisplay handles it
+    def complete(self) -> None:
+        self.current = self.total
+class PlainTaskDisplay(TaskDisplay):
+    def __init__(
+        self, task: TaskWithResult, *, show_task_names: bool, show_model_names: bool
+    ):
+        self.task = task
+        self.show_task_names = show_task_names
+        self.show_model_names = show_model_names
+        self.progress_display: PlainProgress | None = None
+        self.samples_complete = 0
+        self.samples_total = 0
+        self.current_metrics: list[TaskDisplayMetric] | None = None
+        self.last_progress = 0  # Track last progress percentage
+    @contextlib.contextmanager
+    def progress(self) -> Iterator[Progress]:
+        self.progress_display = PlainProgress(self.task.profile.steps)
+        yield self.progress_display
+    @throttle(1)
+    def _print_status_throttled(self) -> None:
+        self._print_status()
+    def _print_status(self) -> None:
+        """Print status updates on new lines when there's meaningful progress"""
+        if not self.progress_display:
+            return
+        # Calculate current progress percentage
+        current_progress = int(
+            self.progress_display.current / self.progress_display.total * 100
+        )
+        # Only print on percentage changes to avoid too much output
+        if current_progress != self.last_progress:
+            status_parts: list[str] = []
+            # if this is parallel print task and model to distinguish (limit both to 12 chars)
+            MAX_NAME_WIDTH = 12
+            if self.show_task_names:
+                status_parts.append(truncate(self.task.profile.name, MAX_NAME_WIDTH))
+            if self.show_model_names:
+                status_parts.append(
+                    truncate(str(self.task.profile.model), MAX_NAME_WIDTH)
+                )
+            # Add step progress
+            status_parts.append(
+                f"Steps: {self.progress_display.current:3d}/{self.progress_display.total} {current_progress:3d}%"
+            )
+            # Add sample progress
+            status_parts.append(
+                f"Samples: {self.samples_complete:3d}/{self.samples_total:3d}"
+            )
+            # Add metrics
+            if self.current_metrics:
+                metric_str = task_metric(self.current_metrics)
+                status_parts.append(metric_str)
+            # Add resource usage
+            # Very similar to ``inspect_ai._display.core.footer.task_resources``, but without
+            # the rich formatting added in the ``task_dict`` call
+            resources_dict: dict[str, str] = {}
+            for model, resource in concurrency_status().items():
+                resources_dict[model] = f"{resource[0]:2d}/{resource[1]:2d}"
+            resources = ", ".join(
+                [f"{key}: {value}" for key, value in resources_dict.items()]
+            )
+            status_parts.append(resources)
+            # Add rate limits
+            rate_limits = task_http_rate_limits()
+            if rate_limits:
+                status_parts.append(rate_limits)
+            # Print on new line
+            print(" | ".join(status_parts))
+            self.last_progress = current_progress
+    def sample_complete(self, complete: int, total: int) -> None:
+        self.samples_complete = complete
+        self.samples_total = total
+        self._print_status_throttled()
+    def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
+        self.current_metrics = metrics
+        self._print_status_throttled()
+    def complete(self, result: TaskResult) -> None:
+        self.task.result = result
+        self._print_status()

inspect_ai/_display/rich/display.py CHANGED Viewed

@@ -15,7 +15,6 @@ from inspect_ai._util.constants import CONSOLE_DISPLAY_WIDTH
 from inspect_ai.log._transcript import InputEvent, transcript
 from inspect_ai.util._display import display_type
 from inspect_ai.util._throttle import throttle
-from inspect_ai.util._trace import trace_enabled
 from ..core.config import task_config
 from ..core.display import (
@@ -130,11 +129,6 @@ class RichDisplay(Display):
     @override
     @contextlib.contextmanager
     def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
-        # if there is no ansi display than all of the below will
-        # be a no-op, so we print a simple text message for the task
-        if display_type() == "plain":
-            rich.get_console().print(task_no_ansi(profile))
         # for typechekcer
         if self.tasks is None:
             self.tasks = []
@@ -151,7 +145,8 @@ class RichDisplay(Display):
     @throttle(1)
     def _update_display(self) -> None:
         if (
-            self.tasks is not None
+            display_type() != "conversation"
+            and self.tasks is not None
             and self.tasks
             and self.progress_ui is not None
             and self.live is not None
@@ -170,7 +165,7 @@ class RichTaskScreen(TaskScreen):
     def __init__(self, live: Live) -> None:
         self.theme = rich_theme()
         self.live = live
-        status_text = "Working" if trace_enabled() else "Task running"
+        status_text = "Working" if display_type() == "conversation" else "Task running"
         self.status = self.live.console.status(
             f"[{self.theme.meta} bold]{status_text}...[/{self.theme.meta} bold]",
             spinner="clock",
@@ -189,7 +184,7 @@ class RichTaskScreen(TaskScreen):
     ) -> Iterator[Console]:
         # determine transient based on trace mode
         if transient is None:
-            transient = not trace_enabled()
+            transient = display_type() != "conversation"
         # clear live task status and transient status
         self.live.update("", refresh=True)

inspect_ai/_display/textual/app.py CHANGED Viewed

@@ -284,7 +284,10 @@ class TaskScreenApp(App[TR]):
     def update_samples(self) -> None:
         samples_view = self.query_one(SamplesView)
-        samples_view.set_samples(active_samples())
+        active_and_started_samples = [
+            sample for sample in active_samples() if sample.started is not None
+        ]
+        samples_view.set_samples(active_and_started_samples)
     def update_footer(self) -> None:
         left, right = task_footer()

inspect_ai/_display/textual/widgets/port_mappings.py ADDED Viewed

@@ -0,0 +1,110 @@
+from typing import Literal
+from textual.app import ComposeResult
+from textual.containers import HorizontalScroll
+from textual.widget import Widget
+from textual.widgets import Link, Static
+from inspect_ai._util.port_names import get_service_by_port
+from inspect_ai.util._sandbox.environment import PortMapping
+class PortMappingsView(HorizontalScroll):
+    DEFAULT_CSS = """
+    PortMappingsView {
+      layout: grid;
+      height: auto;
+      grid-size: 4 3;
+      grid-columns: auto auto auto auto;
+      grid-gutter: 0 1;
+    }
+    """
+    def __init__(self, ports: list[PortMapping] | None) -> None:
+        super().__init__()
+        self.ports = ports
+    def compose(self) -> ComposeResult:
+        if not self.ports:
+            return
+        yield Static("service")
+        yield Static("sandbox")
+        yield Static("client")
+        yield Static("endpoint")
+        mappings_and_services = [
+            (mapping, get_service_by_port(mapping.container_port, mapping.protocol))
+            for mapping in self.ports
+        ]
+        remaining_widgets = [
+            widget
+            for mapping_and_service in mappings_and_services
+            for widget in widgets_from_port_mapping(mapping_and_service)
+        ]
+        for widget in remaining_widgets:
+            yield widget
+def widgets_for_port_mappings(
+    port_mappings: list[PortMapping] | None,
+) -> list[Widget]:
+    if port_mappings is None:
+        return []
+    return [
+        static
+        for mapping in [
+            (mapping, get_service_by_port(mapping.container_port, mapping.protocol))
+            for mapping in port_mappings
+        ]
+        for static in widgets_from_port_mapping(mapping)
+    ]
+def widgets_from_port_mapping(
+    mapping_service_tuple: tuple[PortMapping, str | None],
+) -> list[Widget]:
+    port_mapping, service = mapping_service_tuple
+    return [
+        widget
+        for host_mapping in port_mapping.mappings
+        for widget in get_row_widgets(
+            port_mapping.protocol,
+            host_mapping.host_port,
+            port_mapping.container_port,
+            service,
+        )
+    ]
+def get_row_widgets(
+    protocol: Literal["tcp", "udp"],
+    host_port: int,
+    container_port: int,
+    service: str | None,
+) -> list[Widget]:
+    url = get_url(
+        host_port,
+        service,
+    )
+    return [
+        Static(service if service is not None else protocol),
+        Static(str(container_port)),
+        Static(str(host_port)),
+        Link(url) if url is not None else Static("asdf"),
+    ]
+def get_url(
+    host_port: int,
+    service: str | None,
+) -> str | None:
+    if service is not None:
+        if service == "noVNC":
+            return f"http://localhost:{host_port}?view_only=true&autoconnect=true&resize=scale"
+        if service.startswith("HTTP"):
+            return f"https://localhost:{host_port}"
+        if service.startswith("VNC"):
+            return f"vnc://localhost:{host_port}"
+    return None

inspect-ai 0.3.57__py3-none-any.whl → 0.3.59__py3-none-any.whl

inspect-ai 0.3.57py3-none-any.whl → 0.3.59py3-none-any.whl