PyPI - inspect-ai - Versions diffs - 0.3.51__py3-none-any.whl → 0.3.53__py3-none-any.whl - Mend

inspect-ai 0.3.51py3-none-any.whl → 0.3.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

inspect_ai/_cli/eval.py +44 -2
inspect_ai/_display/core/config.py +4 -0
inspect_ai/_display/core/panel.py +1 -1
inspect_ai/_display/core/progress.py +9 -3
inspect_ai/_display/core/results.py +8 -4
inspect_ai/_display/textual/widgets/task_detail.py +45 -13
inspect_ai/_display/textual/widgets/tasks.py +86 -5
inspect_ai/_display/textual/widgets/transcript.py +4 -17
inspect_ai/_eval/eval.py +29 -1
inspect_ai/_eval/evalset.py +7 -0
inspect_ai/_eval/registry.py +2 -2
inspect_ai/_eval/task/log.py +6 -1
inspect_ai/_eval/task/results.py +22 -4
inspect_ai/_eval/task/run.py +18 -12
inspect_ai/_eval/task/sandbox.py +72 -43
inspect_ai/_eval/task/task.py +4 -0
inspect_ai/_eval/task/util.py +17 -6
inspect_ai/_util/logger.py +10 -2
inspect_ai/_util/samples.py +7 -0
inspect_ai/_util/transcript.py +8 -0
inspect_ai/_view/www/App.css +13 -0
inspect_ai/_view/www/dist/assets/index.css +13 -0
inspect_ai/_view/www/dist/assets/index.js +105 -55
inspect_ai/_view/www/src/App.mjs +31 -6
inspect_ai/_view/www/src/Types.mjs +6 -0
inspect_ai/_view/www/src/components/JsonPanel.mjs +11 -17
inspect_ai/_view/www/src/components/MessageContent.mjs +9 -2
inspect_ai/_view/www/src/components/Tools.mjs +46 -18
inspect_ai/_view/www/src/navbar/Navbar.mjs +12 -0
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +18 -5
inspect_ai/_view/www/src/samples/SampleList.mjs +2 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +2 -2
inspect_ai/log/_log.py +6 -0
inspect_ai/log/_recorders/eval.py +8 -7
inspect_ai/model/_call_tools.py +2 -6
inspect_ai/model/_generate_config.py +6 -0
inspect_ai/model/_model.py +18 -4
inspect_ai/model/_providers/azureai.py +22 -2
inspect_ai/model/_providers/bedrock.py +17 -1
inspect_ai/model/_providers/hf.py +1 -1
inspect_ai/model/_providers/openai.py +32 -8
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/vllm.py +1 -1
inspect_ai/model/_render.py +7 -6
inspect_ai/model/_trace.py +1 -1
inspect_ai/solver/_basic_agent.py +8 -1
inspect_ai/tool/_tool_transcript.py +28 -0
inspect_ai/util/_sandbox/context.py +1 -2
inspect_ai/util/_sandbox/docker/config.py +8 -10
inspect_ai/util/_sandbox/docker/docker.py +9 -5
inspect_ai/util/_sandbox/docker/util.py +3 -3
inspect_ai/util/_sandbox/environment.py +7 -2
inspect_ai/util/_sandbox/limits.py +1 -1
inspect_ai/util/_sandbox/local.py +8 -9
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/METADATA +2 -4
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/RECORD +60 -59
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.53.dist-info}/top_level.txt +0 -0

inspect_ai/_cli/eval.py CHANGED Viewed

@@ -12,7 +12,7 @@ from inspect_ai._util.constants import (
     DEFAULT_MAX_RETRIES,
 )
 from inspect_ai._util.file import filesystem
-from inspect_ai._util.samples import parse_samples_limit
+from inspect_ai._util.samples import parse_sample_id, parse_samples_limit
 from inspect_ai.log._file import log_file_info
 from inspect_ai.model import GenerateConfigArgs
 from inspect_ai.scorer._reducer import create_reducers
@@ -30,6 +30,7 @@ MAX_TASKS_HELP = "Maximum number of tasks to run in parallel (default is 1)"
 MAX_SUBPROCESSES_HELP = (
     "Maximum number of subprocesses to run in parallel (default is os.cpu_count())"
 )
+MAX_SANDBOXES_HELP = "Maximum number of sandboxes (per-provider) to run in parallel."
 NO_SANDBOX_CLEANUP_HELP = "Do not cleanup sandbox environments after task completes"
 FAIL_ON_ERROR_HELP = "Threshold of sample errors to tolerage (by default, evals fail when any error occurs). Value between 0 to 1 to set a proportion; value greater than 1 to set a count."
 NO_LOG_SAMPLES_HELP = "Do not include samples in the log file."
@@ -144,6 +145,12 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         help="Limit samples to evaluate e.g. 10 or 10-20",
         envvar="INSPECT_EVAL_LIMIT",
     )
+    @click.option(
+        "--sample-id",
+        type=str,
+        help="Evaluate specific sample(s) (comma separated list of ids)",
+        envvar="INSPECT_EVAL_SAMPLE_ID",
+    )
     @click.option(
         "--epochs",
         type=int,
@@ -186,6 +193,12 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         help=MAX_SUBPROCESSES_HELP,
         envvar="INSPECT_EVAL_MAX_SUBPROCESSES",
     )
+    @click.option(
+        "--max-sandboxes",
+        type=int,
+        help=MAX_SANDBOXES_HELP,
+        envvar="INSPECT_EVAL_MAX_SANDBOXES",
+    )
     @click.option(
         "--message-limit",
         type=int,
@@ -355,6 +368,12 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         help='Cache prompt prefix (Anthropic only). Defaults to "auto", which will enable caching for requests with tools.',
         envvar="INSPECT_EVAL_CACHE_PROMPT",
     )
+    @click.option(
+        "--reasoning-effort",
+        type=click.Choice(["low", "medium", "high"]),
+        help="Constrains effort on reasoning for reasoning models. Open AI o1 models only.",
+        envvar="INSPECT_EVAL_REASONING_EFFORT",
+    )
     @click.option(
         "--log-format",
         type=click.Choice(["eval", "json"], case_sensitive=False),
@@ -391,6 +410,7 @@ def eval_command(
     epochs: int | None,
     epochs_reducer: str | None,
     limit: str | None,
+    sample_id: str | None,
     max_retries: int | None,
     timeout: int | None,
     max_connections: int | None,
@@ -412,12 +432,14 @@ def eval_command(
     parallel_tool_calls: bool | None,
     max_tool_output: int | None,
     cache_prompt: str | None,
+    reasoning_effort: str | None,
     message_limit: int | None,
     token_limit: int | None,
     time_limit: int | None,
     max_samples: int | None,
     max_tasks: int | None,
     max_subprocesses: int | None,
+    max_sandboxes: int | None,
     fail_on_error: bool | float | None,
     no_fail_on_error: bool | None,
     no_log_samples: bool | None,
@@ -458,12 +480,14 @@ def eval_command(
         epochs=epochs,
         epochs_reducer=epochs_reducer,
         limit=limit,
+        sample_id=sample_id,
         message_limit=message_limit,
         token_limit=token_limit,
         time_limit=time_limit,
         max_samples=max_samples,
         max_tasks=max_tasks,
         max_subprocesses=max_subprocesses,
+        max_sandboxes=max_sandboxes,
         fail_on_error=fail_on_error,
         no_fail_on_error=no_fail_on_error,
         debug_errors=common["debug_errors"],
@@ -543,6 +567,7 @@ def eval_set_command(
     epochs: int | None,
     epochs_reducer: str | None,
     limit: str | None,
+    sample_id: str | None,
     max_retries: int | None,
     timeout: int | None,
     max_connections: int | None,
@@ -564,12 +589,14 @@ def eval_set_command(
     parallel_tool_calls: bool | None,
     max_tool_output: int | None,
     cache_prompt: str | None,
+    reasoning_effort: str | None,
     message_limit: int | None,
     token_limit: int | None,
     time_limit: int | None,
     max_samples: int | None,
     max_tasks: int | None,
     max_subprocesses: int | None,
+    max_sandboxes: int | None,
     fail_on_error: bool | float | None,
     no_fail_on_error: bool | None,
     no_log_samples: bool | None,
@@ -612,12 +639,14 @@ def eval_set_command(
         epochs=epochs,
         epochs_reducer=epochs_reducer,
         limit=limit,
+        sample_id=sample_id,
         message_limit=message_limit,
         token_limit=token_limit,
         time_limit=time_limit,
         max_samples=max_samples,
         max_tasks=max_tasks,
         max_subprocesses=max_subprocesses,
+        max_sandboxes=max_sandboxes,
         fail_on_error=fail_on_error,
         no_fail_on_error=no_fail_on_error,
         debug_errors=common["debug_errors"],
@@ -662,12 +691,14 @@ def eval_exec(
     epochs: int | None,
     epochs_reducer: str | None,
     limit: str | None,
+    sample_id: str | None,
     message_limit: int | None,
     token_limit: int | None,
     time_limit: int | None,
     max_samples: int | None,
     max_tasks: int | None,
     max_subprocesses: int | None,
+    max_sandboxes: int | None,
     fail_on_error: bool | float | None,
     no_fail_on_error: bool | None,
     debug_errors: bool | None,
@@ -699,8 +730,9 @@ def eval_exec(
         else None
     )
-    # resolve range
+    # resolve range and sample id
     eval_limit = parse_samples_limit(limit)
+    eval_sample_id = parse_sample_id(sample_id)
     # resolve fail_on_error
     if no_fail_on_error is True:
@@ -734,6 +766,7 @@ def eval_exec(
             log_dir=log_dir,
             log_format=log_format,
             limit=eval_limit,
+            sample_id=eval_sample_id,
             epochs=eval_epochs,
             fail_on_error=fail_on_error,
             debug_errors=debug_errors,
@@ -743,6 +776,7 @@ def eval_exec(
             max_samples=max_samples,
             max_tasks=max_tasks,
             max_subprocesses=max_subprocesses,
+            max_sandboxes=max_sandboxes,
             log_samples=log_samples,
             log_images=log_images,
             log_buffer=log_buffer,
@@ -821,6 +855,12 @@ def parse_comma_separated(value: str | None) -> list[str] | None:
     help=MAX_SUBPROCESSES_HELP,
     envvar="INSPECT_EVAL_MAX_SUBPROCESSES",
 )
+@click.option(
+    "--max-sandboxes",
+    type=int,
+    help=MAX_SANDBOXES_HELP,
+    envvar="INSPECT_EVAL_MAX_SANDBOXES",
+)
 @click.option(
     "--no-sandbox-cleanup",
     type=bool,
@@ -891,6 +931,7 @@ def eval_retry_command(
     max_samples: int | None,
     max_tasks: int | None,
     max_subprocesses: int | None,
+    max_sandboxes: int | None,
     no_sandbox_cleanup: bool | None,
     trace: bool | None,
     fail_on_error: bool | float | None,
@@ -934,6 +975,7 @@ def eval_retry_command(
         max_samples=max_samples,
         max_tasks=max_tasks,
         max_subprocesses=max_subprocesses,
+        max_sandboxes=max_sandboxes,
         sandbox_cleanup=sandbox_cleanup,
         trace=trace,
         fail_on_error=fail_on_error,

inspect_ai/_display/core/config.py CHANGED Viewed

@@ -24,6 +24,10 @@ def task_config(
             config_print.append(
                 f"{name}: {','.join([approver['name'] for approver in value['approvers']])}"
             )
+        elif name == "sample_id":
+            value = value if isinstance(value, list) else [value]
+            value = [str(v) for v in value]
+            config_print.append(f"{name}: {','.join(value)}")
         elif name not in ["limit", "model"]:
             config_print.append(f"{name}: {value}")
     values = ", ".join(config_print)

inspect_ai/_display/core/panel.py CHANGED Viewed

@@ -112,7 +112,7 @@ def tasks_title(completed: int, total: int) -> str:
 def task_title(profile: TaskProfile, show_model: bool) -> str:
     eval_epochs = profile.eval_config.epochs or 1
     epochs = f" x {profile.eval_config.epochs}" if eval_epochs > 1 else ""
-    samples = f"{profile.samples//eval_epochs:,}{epochs} sample{'s' if profile.samples > 1 else ''}"
+    samples = f"{profile.samples//eval_epochs:,}{epochs} sample{'s' if profile.samples != 1 else ''}"
     title = f"{registry_unqualified_name(profile.name)} ({samples})"
     if show_model:
         title = f"{title}: {profile.model}"

inspect_ai/_display/core/progress.py CHANGED Viewed

@@ -130,9 +130,15 @@ def progress_time(time: float) -> str:
     return f"{hours:2.0f}:{minutes:02.0f}:{seconds:02.0f}"
-def progress_count(complete: int, total: int) -> str:
-    # Pad the display to keep it stable
+def progress_count(complete: int, total: int, width: int | None = None) -> str:
+    # Pad the display to keep it stable as the
+    # complete metrics
     total_str = f"{total:,}"
     complete_str = f"{complete:,}"
     padding = max(0, len(total_str) - len(complete_str))
-    return " " * padding + f"[{complete_str}/{total_str}]"
+    padded = " " * padding + f"[{complete_str}/{total_str}]"
+    # If a width has ben specified, pad up to this width as well
+    if width is not None:
+        padded = padded.rjust(width)
+    return padded

inspect_ai/_display/core/results.py CHANGED Viewed

@@ -166,7 +166,7 @@ def task_interrupted(profile: TaskProfile, samples_completed: int) -> Renderable
     return message
-def task_metric(metrics: list[TaskDisplayMetric]) -> str:
+def task_metric(metrics: list[TaskDisplayMetric], width: int | None = None) -> str:
     reducer_names: Set[str] = {
         metric.reducer for metric in metrics if metric.reducer is not None
     }
@@ -180,10 +180,14 @@ def task_metric(metrics: list[TaskDisplayMetric]) -> str:
     else:
         value = f"{metric.value:.2f}"
-    if show_reducer:
-        return f"{metric.name}/{metric.reducer}: {value}"
+    if show_reducer and metric.reducer is not None:
+        metric_str = f"{metric.name}/{metric.reducer}: {value}"
     else:
-        return f"{metric.name}: {value}"
+        metric_str = f"{metric.name}: {value}"
+    if width is not None:
+        metric_str = metric_str.rjust(width)
+    return metric_str
 def task_metrics(scores: list[EvalScore]) -> str:

inspect_ai/_display/textual/widgets/task_detail.py CHANGED Viewed

@@ -63,6 +63,9 @@ class TaskDetail(Widget):
     def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
         # Group by reducer then scorer within reducers
         self.metrics = metrics
+        # clear the existing computed reducers
+        self.by_reducer = {}
         for metric in metrics:
             reducer_group = (
                 self.by_reducer[metric.reducer]
@@ -117,6 +120,7 @@ class TaskDetail(Widget):
         for remove in to_remove:
             task_metric = self.existing_metrics[remove]
             task_metric.remove()
+            del self.existing_metrics[remove]
         # add or update widgets with metrics
         for reducer, scorers in self.by_reducer.items():
@@ -187,24 +191,52 @@ class TaskMetrics(Widget):
         self.grid: Grid = Grid()
         self.value_widgets: dict[str, Static] = {}
+    def grid_id(self) -> str:
+        return f"{self.id}-grid"
     def compose(self) -> ComposeResult:
-        # Just yield a single DataTable widget
+        # Yield the title and base grid
         yield Center(self._title())
-        with Grid():
-            for metric in self.metrics:
-                # Add the value static but keep it around
-                # for future updates
-                self.value_widgets[metric.name] = Static(
-                    self._metric_value(metric.value)
-                )
-                yield Static(metric.name)
-                yield self.value_widgets[metric.name]
+        yield Grid(id=self.grid_id())
     def update(self, metrics: list[TaskMetric]) -> None:
+        self.metrics = metrics
+        # We assume that generally the initial metric names will
+        # always match future updates (so we can just update values in line)
+        # but if an unrecognized metric appears on the scene, just
+        # recompute the whole grid
+        need_recompute = False
         for metric in metrics:
-            widget = self.value_widgets[metric.name]
-            widget.update(content=f"{metric.value:,.3f}")
+            widget = self.value_widgets.get(metric.name)
+            if widget:
+                # Just update the values themselves
+                widget.update(content=f"{metric.value:,.3f}")
+            else:
+                # Don't have a widget for this, recompute the whole grid
+                need_recompute = True
+                break
+        if need_recompute:
+            self.recompute_grid()
+    def on_mount(self) -> None:
+        self.recompute_grid()
+    def recompute_grid(self) -> None:
+        if not self.is_mounted:
+            return
+        grid = self.query_one(f"#{self.grid_id()}")
+        grid.remove_children()
+        for metric in self.metrics:
+            # Add the value static but keep it around
+            # for future updates
+            self.value_widgets[metric.name] = Static(self._metric_value(metric.value))
+            grid.mount(Static(metric.name))
+            grid.mount(self.value_widgets[metric.name])
     def _title(self) -> Widget:
         if self.scorer is None:

inspect_ai/_display/textual/widgets/tasks.py CHANGED Viewed

@@ -36,6 +36,9 @@ from ...core.progress import (
     progress_model_name,
 )
+MAX_METRIC_WIDTH = 25
+MAX_COUNT_WIDTH = 15
 class TasksView(Container):
     DEFAULT_CSS = """
@@ -68,6 +71,7 @@ class TasksView(Container):
         super().__init__()
         self.description_width = MAX_DESCRIPTION_WIDTH
         self.model_name_width = MAX_MODEL_NAME_WIDTH
+        self.sample_count_width = 0
     def init_tasks(self, tasks: list[TaskSpec]) -> None:
         # clear existing tasks
@@ -80,15 +84,41 @@ class TasksView(Container):
         self.model_name_width = min(
             max([len(str(task.model)) for task in tasks]), MAX_MODEL_NAME_WIDTH
         )
+        self.update_progress_widths()
     def add_task(self, task: TaskWithResult) -> TaskDisplay:
+        self.update_count_width(task.profile.samples)
         task_display = TaskProgressView(
-            task, self.description_width, self.model_name_width
+            task, self.description_width, self.model_name_width, self.sample_count_width
         )
         self.tasks.mount(task_display)
         self.tasks.scroll_to_widget(task_display)
+        self.update_progress_widths()
         return task_display
+    def update_count_width(self, samples: int) -> None:
+        sample_count_str = progress_count(samples, samples, self.sample_count_width)
+        self.sample_count_width = min(
+            max(self.sample_count_width, len(sample_count_str)), MAX_COUNT_WIDTH
+        )
+    def update_progress_widths(self) -> None:
+        progress_views = self.tasks.query_children(TaskProgressView)
+        metrics_size = 0
+        for progress_view in progress_views:
+            metrics_size = max(
+                metrics_size,
+                progress_view.metrics_width
+                if progress_view.metrics_width is not None
+                else 0,
+            )
+        metrics_size = min(metrics_size, MAX_METRIC_WIDTH)
+        for progress_view in progress_views:
+            progress_view.update_metrics_width(metrics_size)
+            progress_view.update_count_width(self.sample_count_width)
     def compose(self) -> ComposeResult:
         yield Static(id="tasks-config")
         yield Static(id="tasks-targets")
@@ -139,13 +169,18 @@ class TaskProgressView(Widget):
     """
     def __init__(
-        self, task: TaskWithResult, description_width: int, model_name_width: int
+        self,
+        task: TaskWithResult,
+        description_width: int,
+        model_name_width: int,
+        sample_count_width: int,
     ) -> None:
         super().__init__()
         self.t = task
         self.description_width = description_width
         self.model_name_width = model_name_width
         self.progress_bar = ProgressBar(total=task.profile.steps, show_eta=False)
         self.count_display = Static()
         self.metrics_display = Static(id="task-metrics")
@@ -154,6 +189,14 @@ class TaskProgressView(Widget):
         self.toggle = Toggle()
         self.task_detail = TaskDetail(id="task-detail", classes="hidden")
+        self.sample_count_width: int = sample_count_width
+    metrics: reactive[list[TaskDisplayMetric] | None] = reactive(None)
+    metrics_width: reactive[int | None] = reactive(None)
+    sample_count_width: reactive[int] = reactive(0)
+    samples_complete: reactive[int] = reactive(0)
+    samples_total: reactive[int] = reactive(0)
     def compose(self) -> ComposeResult:
         yield self.toggle
         yield TaskStatusIcon()
@@ -191,13 +234,51 @@ class TaskProgressView(Widget):
         self.task_progress.complete()
     def sample_complete(self, complete: int, total: int) -> None:
-        self.count_display.update(progress_count(complete, total))
+        self.samples_complete = complete
+        self.samples_total = total
     def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
-        if len(metrics) > 0:
-            self.metrics_display.update(task_metric(metrics))
+        self.metrics = metrics
+    def update_metrics_width(self, width: int) -> None:
+        self.metrics_width = width
+    def update_count_width(self, width: int) -> None:
+        self.sample_count_width = width
+    def _watch_sample_count_width(self, width: int) -> None:
+        self.refresh_count()
+    def _watch_samples_complete(self, complete: int) -> None:
+        self.refresh_count()
+    def _watch_samples_total(self, total: int) -> None:
+        self.refresh_count()
+    def _watch_metrics_width(self, width: int) -> None:
+        self.update_metrics_label()
+    def _watch_metrics(self, metrics: list[TaskDisplayMetric] | None) -> None:
+        if metrics is not None and len(metrics) > 0:
+            # update label
+            self.update_metrics_label()
+            # update details
             self.task_detail.update_metrics(metrics)
+    def refresh_count(self) -> None:
+        progress_label = progress_count(
+            self.samples_complete, self.samples_total, self.sample_count_width
+        )
+        self.count_display.update(progress_label)
+    def update_metrics_label(self) -> None:
+        # compute the label (with a min size)
+        if self.metrics is not None:
+            metric_label = task_metric(self.metrics, self.metrics_width)
+            self.metrics_width = len(metric_label)
+            self.metrics_display.update(metric_label)
 class TaskStatusIcon(Static):
     result: reactive[TaskResult | None] = reactive(None)

inspect_ai/_display/textual/widgets/transcript.py CHANGED Viewed

@@ -10,10 +10,10 @@ from textual.widget import Widget
 from textual.widgets import Static
 from inspect_ai._util.content import ContentText
-from inspect_ai._util.format import format_function_call
 from inspect_ai._util.rich import lines_display
 from inspect_ai._util.transcript import (
     set_transcript_markdown_options,
+    transcript_function,
     transcript_markdown,
     transcript_separator,
 )
@@ -36,6 +36,7 @@ from inspect_ai.log._transcript import (
 from inspect_ai.model._chat_message import ChatMessage, ChatMessageUser
 from inspect_ai.model._render import messages_preceding_assistant
 from inspect_ai.tool._tool import ToolResult
+from inspect_ai.tool._tool_transcript import transcript_tool_call
 class TranscriptView(ScrollableContainer):
@@ -195,16 +196,7 @@ def render_tool_event(event: ToolEvent) -> list[EventDisplay]:
             display.extend(render_event(e) or [])
     # render the call
-    content: list[RenderableType] = []
-    if event.view:
-        if event.view.title:
-            content.append(Text.from_markup(f"[bold]{event.view.title}[/bold]\n"))
-        if event.view.format == "markdown":
-            content.append(transcript_markdown(event.view.content))
-        else:
-            content.append(event.view.content)
-    else:
-        content.append(render_function_call(event.function, event.arguments))
+    content = transcript_tool_call(event)
     # render the output
     if isinstance(event.result, list):
@@ -266,7 +258,7 @@ def render_subtask_event(event: SubtaskEvent) -> list[EventDisplay]:
         for e in event.events:
             display.extend(render_event(e) or [])
-    content: list[RenderableType] = [render_function_call(event.name, event.input)]
+    content: list[RenderableType] = [transcript_function(event.name, event.input)]
     if event.result:
         content.append(Text())
         if isinstance(event.result, str | int | float | bool | None):
@@ -309,11 +301,6 @@ def render_error_event(event: ErrorEvent) -> EventDisplay:
     return EventDisplay("error", event.error.traceback.strip())
-def render_function_call(function: str, arguments: dict[str, Any]) -> RenderableType:
-    call = format_function_call(function, arguments)
-    return transcript_markdown("```python\n" + call + "\n```\n")
 def render_as_json(json: Any) -> RenderableType:
     return transcript_markdown(
         "```json\n"

inspect-ai 0.3.51__py3-none-any.whl → 0.3.53__py3-none-any.whl

inspect-ai 0.3.51py3-none-any.whl → 0.3.53py3-none-any.whl