PyPI - inspect-ai - Versions diffs - 0.3.52__py3-none-any.whl → 0.3.54__py3-none-any.whl - Mend

inspect-ai 0.3.52py3-none-any.whl → 0.3.54py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

inspect_ai/_cli/eval.py +55 -1
inspect_ai/_cli/main.py +2 -0
inspect_ai/_cli/trace.py +244 -0
inspect_ai/_display/core/progress.py +9 -3
inspect_ai/_display/core/results.py +8 -4
inspect_ai/_display/textual/app.py +5 -1
inspect_ai/_display/textual/widgets/task_detail.py +3 -0
inspect_ai/_display/textual/widgets/tasks.py +97 -6
inspect_ai/_eval/eval.py +33 -0
inspect_ai/_eval/evalset.py +4 -0
inspect_ai/_eval/registry.py +2 -2
inspect_ai/_eval/task/images.py +4 -14
inspect_ai/_eval/task/results.py +22 -4
inspect_ai/_eval/task/run.py +40 -20
inspect_ai/_eval/task/sandbox.py +72 -43
inspect_ai/_eval/task/task.py +4 -0
inspect_ai/_eval/task/util.py +2 -0
inspect_ai/_util/constants.py +3 -3
inspect_ai/_util/display.py +1 -0
inspect_ai/_util/logger.py +34 -8
inspect_ai/_util/trace.py +275 -0
inspect_ai/_view/www/App.css +13 -0
inspect_ai/_view/www/dist/assets/index.css +13 -0
inspect_ai/_view/www/dist/assets/index.js +80 -43
inspect_ai/_view/www/src/App.mjs +31 -6
inspect_ai/_view/www/src/Types.mjs +6 -0
inspect_ai/_view/www/src/components/JsonPanel.mjs +11 -17
inspect_ai/_view/www/src/components/MessageContent.mjs +9 -2
inspect_ai/_view/www/src/components/Tools.mjs +46 -18
inspect_ai/_view/www/src/navbar/Navbar.mjs +12 -0
inspect_ai/_view/www/src/samples/SampleList.mjs +2 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +2 -2
inspect_ai/log/_log.py +6 -0
inspect_ai/log/_message.py +2 -2
inspect_ai/log/_recorders/eval.py +8 -18
inspect_ai/log/_recorders/json.py +19 -17
inspect_ai/model/_cache.py +22 -16
inspect_ai/model/_call_tools.py +9 -1
inspect_ai/model/_generate_config.py +8 -2
inspect_ai/model/_model.py +11 -12
inspect_ai/model/_providers/azureai.py +1 -1
inspect_ai/model/_providers/bedrock.py +18 -2
inspect_ai/model/_providers/hf.py +1 -1
inspect_ai/model/_providers/openai.py +32 -8
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/vllm.py +1 -1
inspect_ai/tool/_tools/_web_browser/_web_browser.py +1 -1
inspect_ai/util/_sandbox/context.py +7 -3
inspect_ai/util/_sandbox/docker/compose.py +58 -19
inspect_ai/util/_sandbox/docker/config.py +8 -10
inspect_ai/util/_sandbox/docker/docker.py +20 -16
inspect_ai/util/_sandbox/docker/util.py +3 -9
inspect_ai/util/_sandbox/environment.py +7 -2
inspect_ai/util/_sandbox/limits.py +1 -1
inspect_ai/util/_sandbox/local.py +8 -9
inspect_ai/util/_sandbox/service.py +17 -7
inspect_ai/util/_subprocess.py +6 -1
inspect_ai/util/_subtask.py +8 -2
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/METADATA +6 -8
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/RECORD +64 -62
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.52.dist-info → inspect_ai-0.3.54.dist-info}/top_level.txt +0 -0

inspect_ai/_cli/eval.py CHANGED Viewed

@@ -30,6 +30,7 @@ MAX_TASKS_HELP = "Maximum number of tasks to run in parallel (default is 1)"
 MAX_SUBPROCESSES_HELP = (
     "Maximum number of subprocesses to run in parallel (default is os.cpu_count())"
 )
+MAX_SANDBOXES_HELP = "Maximum number of sandboxes (per-provider) to run in parallel."
 NO_SANDBOX_CLEANUP_HELP = "Do not cleanup sandbox environments after task completes"
 FAIL_ON_ERROR_HELP = "Threshold of sample errors to tolerage (by default, evals fail when any error occurs). Value between 0 to 1 to set a proportion; value greater than 1 to set a count."
 NO_LOG_SAMPLES_HELP = "Do not include samples in the log file."
@@ -41,6 +42,7 @@ LOG_BUFFER_HELP = "Number of samples to buffer before writing log file. If not s
 NO_SCORE_HELP = (
     "Do not score model output (use the inspect score command to score output later)"
 )
+NO_SCORE_DISPLAY = "Do not display scoring metrics in realtime."
 MAX_CONNECTIONS_HELP = f"Maximum number of concurrent connections to Model API (defaults to {DEFAULT_MAX_CONNECTIONS})"
 MAX_RETRIES_HELP = (
     f"Maximum number of times to retry request (defaults to {DEFAULT_MAX_RETRIES})"
@@ -192,6 +194,12 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         help=MAX_SUBPROCESSES_HELP,
         envvar="INSPECT_EVAL_MAX_SUBPROCESSES",
     )
+    @click.option(
+        "--max-sandboxes",
+        type=int,
+        help=MAX_SANDBOXES_HELP,
+        envvar="INSPECT_EVAL_MAX_SANDBOXES",
+    )
     @click.option(
         "--message-limit",
         type=int,
@@ -250,6 +258,13 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         help=NO_SCORE_HELP,
         envvar="INSPECT_EVAL_NO_SCORE",
     )
+    @click.option(
+        "--no-score-display",
+        type=bool,
+        is_flag=True,
+        help=NO_SCORE_HELP,
+        envvar="INSPECT_EVAL_SCORE_DISPLAY",
+    )
     @click.option(
         "--max-tokens",
         type=int,
@@ -332,7 +347,7 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         "--logprobs",
         type=bool,
         is_flag=True,
-        help="Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, vLLM only.",
+        help="Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only.",
         envvar="INSPECT_EVAL_LOGPROBS",
     )
     @click.option(
@@ -361,6 +376,12 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         help='Cache prompt prefix (Anthropic only). Defaults to "auto", which will enable caching for requests with tools.',
         envvar="INSPECT_EVAL_CACHE_PROMPT",
     )
+    @click.option(
+        "--reasoning-effort",
+        type=click.Choice(["low", "medium", "high"]),
+        help="Constrains effort on reasoning for reasoning models. Open AI o1 models only.",
+        envvar="INSPECT_EVAL_REASONING_EFFORT",
+    )
     @click.option(
         "--log-format",
         type=click.Choice(["eval", "json"], case_sensitive=False),
@@ -419,18 +440,21 @@ def eval_command(
     parallel_tool_calls: bool | None,
     max_tool_output: int | None,
     cache_prompt: str | None,
+    reasoning_effort: str | None,
     message_limit: int | None,
     token_limit: int | None,
     time_limit: int | None,
     max_samples: int | None,
     max_tasks: int | None,
     max_subprocesses: int | None,
+    max_sandboxes: int | None,
     fail_on_error: bool | float | None,
     no_fail_on_error: bool | None,
     no_log_samples: bool | None,
     log_images: bool | None,
     log_buffer: int | None,
     no_score: bool | None,
+    no_score_display: bool | None,
     log_format: Literal["eval", "json"] | None,
     **common: Unpack[CommonOptions],
 ) -> None:
@@ -472,6 +496,7 @@ def eval_command(
         max_samples=max_samples,
         max_tasks=max_tasks,
         max_subprocesses=max_subprocesses,
+        max_sandboxes=max_sandboxes,
         fail_on_error=fail_on_error,
         no_fail_on_error=no_fail_on_error,
         debug_errors=common["debug_errors"],
@@ -479,6 +504,7 @@ def eval_command(
         log_images=log_images,
         log_buffer=log_buffer,
         no_score=no_score,
+        no_score_display=no_score_display,
         is_eval_set=False,
         **config,
     )
@@ -573,18 +599,21 @@ def eval_set_command(
     parallel_tool_calls: bool | None,
     max_tool_output: int | None,
     cache_prompt: str | None,
+    reasoning_effort: str | None,
     message_limit: int | None,
     token_limit: int | None,
     time_limit: int | None,
     max_samples: int | None,
     max_tasks: int | None,
     max_subprocesses: int | None,
+    max_sandboxes: int | None,
     fail_on_error: bool | float | None,
     no_fail_on_error: bool | None,
     no_log_samples: bool | None,
     log_images: bool | None,
     log_buffer: int | None,
     no_score: bool | None,
+    no_score_display: bool | None,
     bundle_dir: str | None,
     bundle_overwrite: bool | None,
     log_format: Literal["eval", "json"] | None,
@@ -628,6 +657,7 @@ def eval_set_command(
         max_samples=max_samples,
         max_tasks=max_tasks,
         max_subprocesses=max_subprocesses,
+        max_sandboxes=max_sandboxes,
         fail_on_error=fail_on_error,
         no_fail_on_error=no_fail_on_error,
         debug_errors=common["debug_errors"],
@@ -635,6 +665,7 @@ def eval_set_command(
         log_images=log_images,
         log_buffer=log_buffer,
         no_score=no_score,
+        no_score_display=no_score_display,
         is_eval_set=True,
         retry_attempts=retry_attempts,
         retry_wait=retry_wait,
@@ -679,6 +710,7 @@ def eval_exec(
     max_samples: int | None,
     max_tasks: int | None,
     max_subprocesses: int | None,
+    max_sandboxes: int | None,
     fail_on_error: bool | float | None,
     no_fail_on_error: bool | None,
     debug_errors: bool | None,
@@ -686,6 +718,7 @@ def eval_exec(
     log_images: bool | None,
     log_buffer: int | None,
     no_score: bool | None,
+    no_score_display: bool | None,
     is_eval_set: bool = False,
     retry_attempts: int | None = None,
     retry_wait: int | None = None,
@@ -726,6 +759,7 @@ def eval_exec(
     log_images = False if log_images is False else None
     trace = True if trace else None
     score = False if no_score else True
+    score_display = False if no_score_display else None
     # build params
     params: dict[str, Any] = (
@@ -756,10 +790,12 @@ def eval_exec(
             max_samples=max_samples,
             max_tasks=max_tasks,
             max_subprocesses=max_subprocesses,
+            max_sandboxes=max_sandboxes,
             log_samples=log_samples,
             log_images=log_images,
             log_buffer=log_buffer,
             score=score,
+            score_display=score_display,
         )
         | kwargs
     )
@@ -834,6 +870,12 @@ def parse_comma_separated(value: str | None) -> list[str] | None:
     help=MAX_SUBPROCESSES_HELP,
     envvar="INSPECT_EVAL_MAX_SUBPROCESSES",
 )
+@click.option(
+    "--max-sandboxes",
+    type=int,
+    help=MAX_SANDBOXES_HELP,
+    envvar="INSPECT_EVAL_MAX_SANDBOXES",
+)
 @click.option(
     "--no-sandbox-cleanup",
     type=bool,
@@ -888,6 +930,13 @@ def parse_comma_separated(value: str | None) -> list[str] | None:
     help=NO_SCORE_HELP,
     envvar="INSPECT_EVAL_SCORE",
 )
+@click.option(
+    "--no-score-display",
+    type=bool,
+    is_flag=True,
+    help=NO_SCORE_HELP,
+    envvar="INSPECT_EVAL_SCORE_DISPLAY",
+)
 @click.option(
     "--max-connections",
     type=int,
@@ -904,6 +953,7 @@ def eval_retry_command(
     max_samples: int | None,
     max_tasks: int | None,
     max_subprocesses: int | None,
+    max_sandboxes: int | None,
     no_sandbox_cleanup: bool | None,
     trace: bool | None,
     fail_on_error: bool | float | None,
@@ -912,6 +962,7 @@ def eval_retry_command(
     log_images: bool | None,
     log_buffer: int | None,
     no_score: bool | None,
+    no_score_display: bool | None,
     max_connections: int | None,
     max_retries: int | None,
     timeout: int | None,
@@ -926,6 +977,7 @@ def eval_retry_command(
     log_samples = False if no_log_samples else None
     log_images = False if log_images is False else None
     score = False if no_score else True
+    score_display = False if no_score_display else None
     # resolve fail_on_error
     if no_fail_on_error is True:
@@ -947,6 +999,7 @@ def eval_retry_command(
         max_samples=max_samples,
         max_tasks=max_tasks,
         max_subprocesses=max_subprocesses,
+        max_sandboxes=max_sandboxes,
         sandbox_cleanup=sandbox_cleanup,
         trace=trace,
         fail_on_error=fail_on_error,
@@ -955,6 +1008,7 @@ def eval_retry_command(
         log_images=log_images,
         log_buffer=log_buffer,
         score=score,
+        score_display=score_display,
         max_retries=max_retries,
         timeout=timeout,
         max_connections=max_connections,

inspect_ai/_cli/main.py CHANGED Viewed

@@ -11,6 +11,7 @@ from .list import list_command
 from .log import log_command
 from .sandbox import sandbox_command
 from .score import score_command
+from .trace import trace_command
 from .view import view_command
@@ -46,6 +47,7 @@ inspect.add_command(log_command)
 inspect.add_command(score_command)
 inspect.add_command(view_command)
 inspect.add_command(sandbox_command)
+inspect.add_command(trace_command)
 def main() -> None:

inspect_ai/_cli/trace.py ADDED Viewed

@@ -0,0 +1,244 @@
+import os
+import shlex
+import time
+from datetime import datetime
+from json import dumps
+from pathlib import Path
+from typing import Callable, cast
+import click
+from pydantic_core import to_json
+from rich import print as r_print
+from rich.console import Console, RenderableType
+from rich.table import Column, Table
+from inspect_ai._util.error import PrerequisiteError
+from inspect_ai._util.logger import TRACE_FILE_NAME
+from inspect_ai._util.trace import ActionTraceRecord, inspect_trace_dir, read_trace_file
+@click.group("trace")
+def trace_command() -> None:
+    """List and read execution traces.
+    Inspect includes a TRACE log-level which is right below the HTTP and INFO log levels (so not written to the console by default). However, TRACE logs are always recorded to a separate file, and the last 10 TRACE logs are preserved. The 'trace' command provides ways to list and read these traces.
+    """
+    return None
+@trace_command.command("list")
+@click.option(
+    "--json",
+    type=bool,
+    is_flag=True,
+    default=False,
+    help="Output listing as JSON",
+)
+def list_command(json: bool) -> None:
+    """List all trace files."""
+    trace_dir = inspect_trace_dir()
+    trace_files: list[dict[str, float | str]] = [
+        {"mtime": f.lstat().st_mtime, "file": f.absolute().as_posix()}
+        for f in trace_dir.iterdir()
+        if f.is_file()
+    ]
+    trace_files.sort(key=lambda f: cast(float, f["mtime"]), reverse=True)
+    if json:
+        print(dumps(trace_files, indent=2))
+    else:
+        table = Table(box=None, show_header=True, pad_edge=False)
+        table.add_column("Time")
+        table.add_column("Trace File")
+        for file in trace_files:
+            mtime = datetime.fromtimestamp(cast(float, file["mtime"])).astimezone()
+            table.add_row(
+                mtime.strftime("%d-%b %H:%M:%S %Z"), shlex.quote(str(file["file"]))
+            )
+        r_print(table)
+@trace_command.command("dump")
+@click.argument("trace-file", type=str, required=False, default=TRACE_FILE_NAME)
+def read_command(trace_file: str) -> None:
+    """Dump a trace file to stdout (as a JSON array of log records)."""
+    trace_file_path = resolve_trace_file_path(trace_file)
+    traces = read_trace_file(trace_file_path)
+    print(
+        to_json(traces, indent=2, exclude_none=True, fallback=lambda _: None).decode()
+    )
+@trace_command.command("anomalies")
+@click.argument("trace-file", type=str, required=False, default=TRACE_FILE_NAME)
+@click.option(
+    "--all",
+    is_flag=True,
+    default=False,
+    help="Show all anomolies including errors and timeouts (by default only still running and cancelled actions are shown).",
+)
+def anomolies_command(trace_file: str, all: bool) -> None:
+    """Look for anomalies in a trace file (never completed or cancelled actions)."""
+    trace_file_path = resolve_trace_file_path(trace_file)
+    traces = read_trace_file(trace_file_path)
+    # Track started actions
+    running_actions: dict[str, ActionTraceRecord] = {}
+    canceled_actions: dict[str, ActionTraceRecord] = {}
+    error_actions: dict[str, ActionTraceRecord] = {}
+    timeout_actions: dict[str, ActionTraceRecord] = {}
+    def action_started(trace: ActionTraceRecord) -> None:
+        running_actions[trace.trace_id] = trace
+    def action_completed(trace: ActionTraceRecord) -> ActionTraceRecord:
+        start_trace = running_actions.get(trace.trace_id)
+        if start_trace:
+            del running_actions[trace.trace_id]
+            return start_trace
+        else:
+            raise RuntimeError(f"Expected {trace.trace_id} in action dictionary.")
+    def action_failed(trace: ActionTraceRecord) -> None:
+        if all:
+            error_actions[start_trace.trace_id] = trace
+    def action_canceled(trace: ActionTraceRecord) -> None:
+        canceled_actions[start_trace.trace_id] = trace
+    def action_timeout(trace: ActionTraceRecord) -> None:
+        if all:
+            timeout_actions[start_trace.trace_id] = trace
+    for trace in traces:
+        if isinstance(trace, ActionTraceRecord):
+            match trace.event:
+                case "enter":
+                    action_started(trace)
+                case "exit":
+                    action_completed(trace)
+                case "cancel":
+                    start_trace = action_completed(trace)
+                    trace.start_time = start_trace.start_time
+                    action_canceled(trace)
+                case "error":
+                    start_trace = action_completed(trace)
+                    trace.start_time = start_trace.start_time
+                    action_failed(trace)
+                case "timeout":
+                    start_trace = action_completed(trace)
+                    trace.start_time = start_trace.start_time
+                    action_timeout(trace)
+                case _:
+                    print(f"Unknown event type: {trace.event}")
+    # do we have any traces?
+    if (
+        len(running_actions)
+        + len(canceled_actions)
+        + len(error_actions)
+        + len(timeout_actions)
+        == 0
+    ):
+        print(f"TRACE: {shlex.quote(trace_file_path.as_posix())}\n")
+        if all:
+            print("No anomalies found in trace log.")
+        else:
+            print(
+                "No running or cancelled actions found in trace log (pass --all to see errors and timeouts)."
+            )
+        return
+    with open(os.devnull, "w") as f:
+        # generate output
+        console = Console(record=True, file=f)
+        def print_fn(o: RenderableType) -> None:
+            console.print(o, highlight=False)
+        print_fn(f"[bold]TRACE: {shlex.quote(trace_file_path.as_posix())}[bold]")
+        _print_bucket(print_fn, "Running Actions", running_actions)
+        _print_bucket(print_fn, "Cancelled Actions", canceled_actions)
+        _print_bucket(print_fn, "Error Actions", error_actions)
+        _print_bucket(print_fn, "Timeout Actions", timeout_actions)
+        # print
+        print(console.export_text(styles=True).strip())
+def _print_bucket(
+    print_fn: Callable[[RenderableType], None],
+    label: str,
+    bucket: dict[str, ActionTraceRecord],
+) -> None:
+    if len(bucket) > 0:
+        # Sort the items in chronological order of when
+        # they finished so the first finished item is at the top
+        sorted_actions = sorted(
+            bucket.values(),
+            key=lambda record: (record.start_time or 0) + (record.duration or 0),
+            reverse=True,
+        )
+        # create table
+        table = Table(
+            Column(""),
+            Column("", justify="right"),
+            Column(""),
+            Column("", width=22),
+            box=None,
+            title=label,
+            title_justify="left",
+            title_style="bold",
+            pad_edge=False,
+            padding=(0, 1),
+        )
+        for action in sorted_actions:
+            # Compute duration (use the event duration or time since started)
+            duration = (
+                action.duration
+                if action.duration is not None
+                else time.time() - action.start_time
+                if action.start_time is not None
+                else 0.0
+            )
+            # The event start time
+            start_time = formatTime(action.start_time) if action.start_time else "None"
+            # Event detail
+            detail = (
+                f"{action.detail or action.message} {action.error}"
+                if action.event == "error"
+                else (action.detail or action.message)
+            )
+            table.add_row(
+                action.action,
+                f"{round(duration, 2):.2f}s".rjust(8),
+                f" {detail}",
+                start_time,
+            )
+        print_fn("")
+        print_fn(table)
+def resolve_trace_file_path(trace_file: str) -> Path:
+    trace_file_path = Path(trace_file)
+    if not trace_file_path.is_absolute():
+        trace_file_path = inspect_trace_dir() / trace_file_path
+    if not trace_file_path.exists():
+        raise PrerequisiteError(
+            f"The specified trace file '{trace_file_path}' does not exist."
+        )
+    return trace_file_path
+def formatTime(timestamp: float) -> str:
+    dt = datetime.fromtimestamp(timestamp).astimezone()
+    return dt.strftime("%H:%M:%S %Z")

inspect_ai/_display/core/progress.py CHANGED Viewed

@@ -130,9 +130,15 @@ def progress_time(time: float) -> str:
     return f"{hours:2.0f}:{minutes:02.0f}:{seconds:02.0f}"
-def progress_count(complete: int, total: int) -> str:
-    # Pad the display to keep it stable
+def progress_count(complete: int, total: int, width: int | None = None) -> str:
+    # Pad the display to keep it stable as the
+    # complete metrics
     total_str = f"{total:,}"
     complete_str = f"{complete:,}"
     padding = max(0, len(total_str) - len(complete_str))
-    return " " * padding + f"[{complete_str}/{total_str}]"
+    padded = " " * padding + f"[{complete_str}/{total_str}]"
+    # If a width has ben specified, pad up to this width as well
+    if width is not None:
+        padded = padded.rjust(width)
+    return padded

inspect_ai/_display/core/results.py CHANGED Viewed

@@ -166,7 +166,7 @@ def task_interrupted(profile: TaskProfile, samples_completed: int) -> Renderable
     return message
-def task_metric(metrics: list[TaskDisplayMetric]) -> str:
+def task_metric(metrics: list[TaskDisplayMetric], width: int | None = None) -> str:
     reducer_names: Set[str] = {
         metric.reducer for metric in metrics if metric.reducer is not None
     }
@@ -180,10 +180,14 @@ def task_metric(metrics: list[TaskDisplayMetric]) -> str:
     else:
         value = f"{metric.value:.2f}"
-    if show_reducer:
-        return f"{metric.name}/{metric.reducer}: {value}"
+    if show_reducer and metric.reducer is not None:
+        metric_str = f"{metric.name}/{metric.reducer}: {value}"
     else:
-        return f"{metric.name}: {value}"
+        metric_str = f"{metric.name}: {value}"
+    if width is not None:
+        metric_str = metric_str.rjust(width)
+    return metric_str
 def task_metrics(scores: list[EvalScore]) -> str:

inspect_ai/_display/textual/app.py CHANGED Viewed

@@ -197,7 +197,11 @@ class TaskScreenApp(App[TR]):
         # add task
         try:
-            yield self.query_one(TasksView).add_task(task)
+            task_view = self.query_one(TasksView)
+            task_view.set_display_metrics(
+                profile.eval_config.score_display is not False
+            )
+            yield task_view.add_task(task)
         finally:
             pass

inspect_ai/_display/textual/widgets/task_detail.py CHANGED Viewed

@@ -224,6 +224,9 @@ class TaskMetrics(Widget):
         self.recompute_grid()
     def recompute_grid(self) -> None:
+        if not self.is_mounted:
+            return
         grid = self.query_one(f"#{self.grid_id()}")
         grid.remove_children()

inspect-ai 0.3.52__py3-none-any.whl → 0.3.54__py3-none-any.whl

inspect-ai 0.3.52py3-none-any.whl → 0.3.54py3-none-any.whl