PyPI - inspect-ai - Versions diffs - 0.3.51__py3-none-any.whl → 0.3.52__py3-none-any.whl - Mend

inspect-ai 0.3.51py3-none-any.whl → 0.3.52py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

inspect_ai/_cli/eval.py +15 -2
inspect_ai/_display/core/config.py +4 -0
inspect_ai/_display/core/panel.py +1 -1
inspect_ai/_display/textual/widgets/task_detail.py +42 -13
inspect_ai/_display/textual/widgets/transcript.py +4 -17
inspect_ai/_eval/eval.py +13 -1
inspect_ai/_eval/evalset.py +3 -0
inspect_ai/_eval/task/log.py +6 -1
inspect_ai/_eval/task/run.py +4 -2
inspect_ai/_eval/task/util.py +15 -6
inspect_ai/_util/logger.py +10 -2
inspect_ai/_util/samples.py +7 -0
inspect_ai/_util/transcript.py +8 -0
inspect_ai/_view/www/dist/assets/index.js +25 -12
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +18 -5
inspect_ai/log/_log.py +3 -0
inspect_ai/model/_call_tools.py +2 -6
inspect_ai/model/_model.py +18 -4
inspect_ai/model/_providers/azureai.py +22 -2
inspect_ai/model/_render.py +7 -6
inspect_ai/model/_trace.py +1 -1
inspect_ai/solver/_basic_agent.py +8 -1
inspect_ai/tool/_tool_transcript.py +28 -0
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/METADATA +2 -2
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/RECORD +29 -28
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/top_level.txt +0 -0

inspect_ai/_cli/eval.py CHANGED Viewed

@@ -12,7 +12,7 @@ from inspect_ai._util.constants import (
     DEFAULT_MAX_RETRIES,
 )
 from inspect_ai._util.file import filesystem
-from inspect_ai._util.samples import parse_samples_limit
+from inspect_ai._util.samples import parse_sample_id, parse_samples_limit
 from inspect_ai.log._file import log_file_info
 from inspect_ai.model import GenerateConfigArgs
 from inspect_ai.scorer._reducer import create_reducers
@@ -144,6 +144,12 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
         help="Limit samples to evaluate e.g. 10 or 10-20",
         envvar="INSPECT_EVAL_LIMIT",
     )
+    @click.option(
+        "--sample-id",
+        type=str,
+        help="Evaluate specific sample(s) (comma separated list of ids)",
+        envvar="INSPECT_EVAL_SAMPLE_ID",
+    )
     @click.option(
         "--epochs",
         type=int,
@@ -391,6 +397,7 @@ def eval_command(
     epochs: int | None,
     epochs_reducer: str | None,
     limit: str | None,
+    sample_id: str | None,
     max_retries: int | None,
     timeout: int | None,
     max_connections: int | None,
@@ -458,6 +465,7 @@ def eval_command(
         epochs=epochs,
         epochs_reducer=epochs_reducer,
         limit=limit,
+        sample_id=sample_id,
         message_limit=message_limit,
         token_limit=token_limit,
         time_limit=time_limit,
@@ -543,6 +551,7 @@ def eval_set_command(
     epochs: int | None,
     epochs_reducer: str | None,
     limit: str | None,
+    sample_id: str | None,
     max_retries: int | None,
     timeout: int | None,
     max_connections: int | None,
@@ -612,6 +621,7 @@ def eval_set_command(
         epochs=epochs,
         epochs_reducer=epochs_reducer,
         limit=limit,
+        sample_id=sample_id,
         message_limit=message_limit,
         token_limit=token_limit,
         time_limit=time_limit,
@@ -662,6 +672,7 @@ def eval_exec(
     epochs: int | None,
     epochs_reducer: str | None,
     limit: str | None,
+    sample_id: str | None,
     message_limit: int | None,
     token_limit: int | None,
     time_limit: int | None,
@@ -699,8 +710,9 @@ def eval_exec(
         else None
     )
-    # resolve range
+    # resolve range and sample id
     eval_limit = parse_samples_limit(limit)
+    eval_sample_id = parse_sample_id(sample_id)
     # resolve fail_on_error
     if no_fail_on_error is True:
@@ -734,6 +746,7 @@ def eval_exec(
             log_dir=log_dir,
             log_format=log_format,
             limit=eval_limit,
+            sample_id=eval_sample_id,
             epochs=eval_epochs,
             fail_on_error=fail_on_error,
             debug_errors=debug_errors,

inspect_ai/_display/core/config.py CHANGED Viewed

@@ -24,6 +24,10 @@ def task_config(
             config_print.append(
                 f"{name}: {','.join([approver['name'] for approver in value['approvers']])}"
             )
+        elif name == "sample_id":
+            value = value if isinstance(value, list) else [value]
+            value = [str(v) for v in value]
+            config_print.append(f"{name}: {','.join(value)}")
         elif name not in ["limit", "model"]:
             config_print.append(f"{name}: {value}")
     values = ", ".join(config_print)

inspect_ai/_display/core/panel.py CHANGED Viewed

@@ -112,7 +112,7 @@ def tasks_title(completed: int, total: int) -> str:
 def task_title(profile: TaskProfile, show_model: bool) -> str:
     eval_epochs = profile.eval_config.epochs or 1
     epochs = f" x {profile.eval_config.epochs}" if eval_epochs > 1 else ""
-    samples = f"{profile.samples//eval_epochs:,}{epochs} sample{'s' if profile.samples > 1 else ''}"
+    samples = f"{profile.samples//eval_epochs:,}{epochs} sample{'s' if profile.samples != 1 else ''}"
     title = f"{registry_unqualified_name(profile.name)} ({samples})"
     if show_model:
         title = f"{title}: {profile.model}"

inspect_ai/_display/textual/widgets/task_detail.py CHANGED Viewed

@@ -63,6 +63,9 @@ class TaskDetail(Widget):
     def update_metrics(self, metrics: list[TaskDisplayMetric]) -> None:
         # Group by reducer then scorer within reducers
         self.metrics = metrics
+        # clear the existing computed reducers
+        self.by_reducer = {}
         for metric in metrics:
             reducer_group = (
                 self.by_reducer[metric.reducer]
@@ -117,6 +120,7 @@ class TaskDetail(Widget):
         for remove in to_remove:
             task_metric = self.existing_metrics[remove]
             task_metric.remove()
+            del self.existing_metrics[remove]
         # add or update widgets with metrics
         for reducer, scorers in self.by_reducer.items():
@@ -187,24 +191,49 @@ class TaskMetrics(Widget):
         self.grid: Grid = Grid()
         self.value_widgets: dict[str, Static] = {}
+    def grid_id(self) -> str:
+        return f"{self.id}-grid"
     def compose(self) -> ComposeResult:
-        # Just yield a single DataTable widget
+        # Yield the title and base grid
         yield Center(self._title())
-        with Grid():
-            for metric in self.metrics:
-                # Add the value static but keep it around
-                # for future updates
-                self.value_widgets[metric.name] = Static(
-                    self._metric_value(metric.value)
-                )
-                yield Static(metric.name)
-                yield self.value_widgets[metric.name]
+        yield Grid(id=self.grid_id())
     def update(self, metrics: list[TaskMetric]) -> None:
+        self.metrics = metrics
+        # We assume that generally the initial metric names will
+        # always match future updates (so we can just update values in line)
+        # but if an unrecognized metric appears on the scene, just
+        # recompute the whole grid
+        need_recompute = False
         for metric in metrics:
-            widget = self.value_widgets[metric.name]
-            widget.update(content=f"{metric.value:,.3f}")
+            widget = self.value_widgets.get(metric.name)
+            if widget:
+                # Just update the values themselves
+                widget.update(content=f"{metric.value:,.3f}")
+            else:
+                # Don't have a widget for this, recompute the whole grid
+                need_recompute = True
+                break
+        if need_recompute:
+            self.recompute_grid()
+    def on_mount(self) -> None:
+        self.recompute_grid()
+    def recompute_grid(self) -> None:
+        grid = self.query_one(f"#{self.grid_id()}")
+        grid.remove_children()
+        for metric in self.metrics:
+            # Add the value static but keep it around
+            # for future updates
+            self.value_widgets[metric.name] = Static(self._metric_value(metric.value))
+            grid.mount(Static(metric.name))
+            grid.mount(self.value_widgets[metric.name])
     def _title(self) -> Widget:
         if self.scorer is None:

inspect_ai/_display/textual/widgets/transcript.py CHANGED Viewed

@@ -10,10 +10,10 @@ from textual.widget import Widget
 from textual.widgets import Static
 from inspect_ai._util.content import ContentText
-from inspect_ai._util.format import format_function_call
 from inspect_ai._util.rich import lines_display
 from inspect_ai._util.transcript import (
     set_transcript_markdown_options,
+    transcript_function,
     transcript_markdown,
     transcript_separator,
 )
@@ -36,6 +36,7 @@ from inspect_ai.log._transcript import (
 from inspect_ai.model._chat_message import ChatMessage, ChatMessageUser
 from inspect_ai.model._render import messages_preceding_assistant
 from inspect_ai.tool._tool import ToolResult
+from inspect_ai.tool._tool_transcript import transcript_tool_call
 class TranscriptView(ScrollableContainer):
@@ -195,16 +196,7 @@ def render_tool_event(event: ToolEvent) -> list[EventDisplay]:
             display.extend(render_event(e) or [])
     # render the call
-    content: list[RenderableType] = []
-    if event.view:
-        if event.view.title:
-            content.append(Text.from_markup(f"[bold]{event.view.title}[/bold]\n"))
-        if event.view.format == "markdown":
-            content.append(transcript_markdown(event.view.content))
-        else:
-            content.append(event.view.content)
-    else:
-        content.append(render_function_call(event.function, event.arguments))
+    content = transcript_tool_call(event)
     # render the output
     if isinstance(event.result, list):
@@ -266,7 +258,7 @@ def render_subtask_event(event: SubtaskEvent) -> list[EventDisplay]:
         for e in event.events:
             display.extend(render_event(e) or [])
-    content: list[RenderableType] = [render_function_call(event.name, event.input)]
+    content: list[RenderableType] = [transcript_function(event.name, event.input)]
     if event.result:
         content.append(Text())
         if isinstance(event.result, str | int | float | bool | None):
@@ -309,11 +301,6 @@ def render_error_event(event: ErrorEvent) -> EventDisplay:
     return EventDisplay("error", event.error.traceback.strip())
-def render_function_call(function: str, arguments: dict[str, Any]) -> RenderableType:
-    call = format_function_call(function, arguments)
-    return transcript_markdown("```python\n" + call + "\n```\n")
 def render_as_json(json: Any) -> RenderableType:
     return transcript_markdown(
         "```json\n"

inspect_ai/_eval/eval.py CHANGED Viewed

@@ -61,6 +61,7 @@ def eval(
     log_dir: str | None = None,
     log_format: Literal["eval", "json"] | None = None,
     limit: int | tuple[int, int] | None = None,
+    sample_id: str | int | list[str | int] | None = None,
     epochs: int | Epochs | None = None,
     fail_on_error: bool | float | None = None,
     debug_errors: bool | None = None,
@@ -110,6 +111,7 @@ def eval(
            to "eval", the native high-performance format).
         limit (int | tuple[int, int] | None): Limit evaluated samples
            (defaults to all samples).
+        sample_id (str | int | list[str | int] | None): Evaluate specific sample(s) from the dataset.
         epochs (int | Epochs | None): Epochs to repeat samples for and optional score
            reducer function(s) used to combine sample scores (defaults to "mean")
         fail_on_error (bool | float | None): `True` to fail on first sample error
@@ -163,6 +165,7 @@ def eval(
             log_dir=log_dir,
             log_format=log_format,
             limit=limit,
+            sample_id=sample_id,
             epochs=epochs,
             fail_on_error=fail_on_error,
             debug_errors=debug_errors,
@@ -198,6 +201,7 @@ async def eval_async(
     log_dir: str | None = None,
     log_format: Literal["eval", "json"] | None = None,
     limit: int | tuple[int, int] | None = None,
+    sample_id: str | int | list[str | int] | None = None,
     epochs: int | Epochs | None = None,
     fail_on_error: bool | float | None = None,
     debug_errors: bool | None = None,
@@ -245,8 +249,9 @@ async def eval_async(
             (defaults to file log in ./logs directory).
         log_format (Literal["eval", "json"] | None): Format for writing log files (defaults
            to "eval", the native high-performance format).
-        limit (int | tuple[int, int] | None): Limit evaluated samples
+        limit (str | int | list[str | int] | None): Limit evaluated samples
             (defaults to all samples).
+        sample_id (str | list[str] | None): Evaluate specific sample(s) from the dataset.
         epochs (int | Epochs | None): Epochs to repeat samples for and optional score
             reducer function(s) used to combine sample scores (defaults to "mean")
         fail_on_error (bool | float | None): `True` to fail on first sample error
@@ -335,6 +340,10 @@ async def eval_async(
         # resolve solver
         solver = chain(solver) if isinstance(solver, list) else solver
+        # ensure consistency of limit and sample_id
+        if sample_id is not None and limit is not None:
+            raise ValueError("You cannot specify both sample_id and limit.")
         # resolve epochs
         if isinstance(epochs, int):
             epochs = Epochs(epochs)
@@ -345,6 +354,7 @@ async def eval_async(
         epochs_reducer = epochs.reducer if epochs else None
         eval_config = EvalConfig(
             limit=limit,
+            sample_id=sample_id,
             epochs=epochs.epochs if epochs else None,
             epochs_reducer=reducer_log_names(epochs_reducer)
             if epochs_reducer
@@ -642,6 +652,7 @@ async def eval_retry_async(
         task_args = eval_log.eval.task_args
         tags = eval_log.eval.tags
         limit = eval_log.eval.config.limit
+        sample_id = eval_log.eval.config.sample_id
         epochs = (
             Epochs(eval_log.eval.config.epochs, eval_log.eval.config.epochs_reducer)
             if eval_log.eval.config.epochs
@@ -699,6 +710,7 @@ async def eval_retry_async(
                 log_dir=log_dir,
                 log_format=log_format,
                 limit=limit,
+                sample_id=sample_id,
                 epochs=epochs,
                 fail_on_error=fail_on_error,
                 debug_errors=debug_errors,

inspect_ai/_eval/evalset.py CHANGED Viewed

@@ -65,6 +65,7 @@ def eval_set(
     log_level_transcript: str | None = None,
     log_format: Literal["eval", "json"] | None = None,
     limit: int | tuple[int, int] | None = None,
+    sample_id: str | int | list[str | int] | None = None,
     epochs: int | Epochs | None = None,
     fail_on_error: bool | float | None = None,
     debug_errors: bool | None = None,
@@ -125,6 +126,7 @@ def eval_set(
           log files (defaults to "eval", the native high-performance format).
         limit (int | tuple[int, int] | None): Limit evaluated samples
            (defaults to all samples).
+        sample_id (str | int | list[str | int] | None): Evaluate specific sample(s) from the dataset.
         epochs (int | Epochs | None): Epochs to repeat samples for and optional score
            reducer function(s) used to combine sample scores (defaults to "mean")
         fail_on_error (bool | float | None): `True` to fail on first sample error
@@ -181,6 +183,7 @@ def eval_set(
             log_dir=log_dir,
             log_format=log_format,
             limit=limit,
+            sample_id=sample_id,
             epochs=epochs,
             fail_on_error=fail_on_error,
             debug_errors=debug_errors,

inspect_ai/_eval/task/log.py CHANGED Viewed

@@ -83,7 +83,12 @@ class TaskLogger:
         # ensure that the dataset has sample ids and record them
         sample_ids = cast(
             list[int | str],
-            [sample.id for sample in slice_dataset(dataset, eval_config.limit)],
+            [
+                sample.id
+                for sample in slice_dataset(
+                    dataset, eval_config.limit, eval_config.sample_id
+                )
+            ],
         )
         # create eval spec

inspect_ai/_eval/task/run.py CHANGED Viewed

@@ -162,6 +162,7 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
             dataset=task.dataset,
             model_name=model_name,
             limit=config.limit,
+            sample_id=config.sample_id,
             epochs=epochs,
             log_images=log_images,
             message_limit=config.message_limit,
@@ -748,13 +749,14 @@ async def resolve_dataset(
     dataset: Dataset,
     model_name: ModelName,
     limit: int | tuple[int, int] | None,
+    sample_id: str | int | list[str | int] | None,
     epochs: int,
     log_images: bool,
     message_limit: int | None,
     token_limit: int | None,
 ) -> tuple[Dataset, list[Sample], list[TaskState]]:
-    # apply limit to dataset
-    dataset = slice_dataset(dataset, limit)
+    # slice dataset
+    dataset = slice_dataset(dataset, limit, sample_id)
     # apply epochs (deepcopy the samples so they remain independent)
     samples: list[Sample] = []

inspect_ai/_eval/task/util.py CHANGED Viewed

@@ -39,10 +39,19 @@ def task_file(task: Task, relative: bool = False) -> str | None:
 def slice_dataset(
     dataset: Dataset,
     limit: int | tuple[int, int] | None,
+    sample_id: str | int | list[str | int] | None,
 ) -> Dataset:
-    dataset_limit = (
-        slice(0, len(dataset))
-        if limit is None
-        else (slice(*limit) if isinstance(limit, tuple) else slice(0, limit))
-    )
-    return dataset[dataset_limit]
+    def normalise(id: str | int | None) -> str:
+        return id if isinstance(id, str) else str(id).zfill(20)
+    if sample_id is not None:
+        sample_id = sample_id if isinstance(sample_id, list) else [sample_id]
+        sample_id = [normalise(id) for id in sample_id]
+        return dataset.filter(lambda sample: normalise(sample.id) in sample_id)
+    else:
+        dataset_limit = (
+            slice(0, len(dataset))
+            if limit is None
+            else (slice(*limit) if isinstance(limit, tuple) else slice(0, limit))
+        )
+        return dataset[dataset_limit]

inspect_ai/_util/logger.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 from logging import (
+    DEBUG,
     INFO,
     WARNING,
     FileHandler,
@@ -129,7 +130,7 @@ def init_logger(
     # init logging handler on demand
     global _logHandler
     if not _logHandler:
-        _logHandler = LogHandler(min(HTTP, levelno), transcript_levelno)
+        _logHandler = LogHandler(min(DEBUG, levelno), transcript_levelno)
         getLogger().addHandler(_logHandler)
     # establish default capture level
@@ -139,6 +140,7 @@ def init_logger(
     getLogger().setLevel(capture_level)
     getLogger(PKG_NAME).setLevel(capture_level)
     getLogger("httpx").setLevel(capture_level)
+    getLogger("botocore").setLevel(DEBUG)
     # set the levelno on the global handler
     _logHandler.display_level = levelno
@@ -154,7 +156,13 @@ def notify_logger_record(record: LogRecord, write: bool) -> None:
     if write:
         transcript()._event(LoggerEvent(message=LoggingMessage.from_log_record(record)))
     global _rate_limit_count
-    if record.levelno <= INFO and "429" in record.getMessage():
+    if (record.levelno <= INFO and "429" in record.getMessage()) or (
+        record.levelno == DEBUG
+        # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html#validating-retry-attempts
+        # for boto retry logic / log messages (this is tracking standard or adapative retries)
+        and "botocore.retries.standard" in record.name
+        and "Retry needed, retrying request after delay of:" in record.getMessage()
+    ):
         _rate_limit_count = _rate_limit_count + 1

inspect_ai/_util/samples.py CHANGED Viewed

@@ -7,3 +7,10 @@ def parse_samples_limit(limit: str | None) -> int | tuple[int, int] | None:
             return (limit_split[0] - 1, limit_split[1])
     else:
         return None
+def parse_sample_id(sample_id: str | None) -> list[str] | None:
+    if sample_id is not None:
+        return [id.strip() for id in sample_id.split(",")]
+    else:
+        return None

inspect_ai/_util/transcript.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import html
+from typing import Any
 from rich.align import AlignMethod
 from rich.box import ROUNDED, Box
@@ -8,6 +9,8 @@ from rich.panel import Panel
 from rich.rule import Rule
 from rich.text import Text
+from .format import format_function_call
 def transcript_code_theme() -> str:
     return "github-dark"
@@ -81,6 +84,11 @@ def transcript_separator(title: str, color: str) -> RenderableType:
     return Rule(title=title, style=f"{color} bold", align="center", end="\n\n")
+def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableType:
+    call = format_function_call(function, arguments)
+    return transcript_markdown("```python\n" + call + "\n```\n")
 LINE = Box(" ── \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n")
 DOTTED = Box(" ·· \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n")

inspect_ai/_view/www/dist/assets/index.js CHANGED Viewed

@@ -20225,7 +20225,7 @@ const metadataViewsForSample = (id, sample) => {
   }
   return sampleMetadatas;
 };
-const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
+const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => {
   const input = (sampleDescriptor == null ? void 0 : sampleDescriptor.messageShape.normalized.input) > 0 ? Math.max(0.15, sampleDescriptor.messageShape.normalized.input) : 0;
   const target = (sampleDescriptor == null ? void 0 : sampleDescriptor.messageShape.normalized.target) > 0 ? Math.max(0.15, sampleDescriptor.messageShape.normalized.target) : 0;
   const answer = (sampleDescriptor == null ? void 0 : sampleDescriptor.messageShape.normalized.answer) > 0 ? Math.max(0.15, sampleDescriptor.messageShape.normalized.answer) : 0;
@@ -20246,7 +20246,7 @@ const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
   const columns = [];
   columns.push({
     label: "Id",
-    value: id,
+    value: sample.id,
     size: `${idSize}em`
   });
   columns.push({
@@ -20299,7 +20299,7 @@ const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
   });
   return m$1`
     <div
-      id=${`sample-heading-${id}`}
+      id=${`sample-heading-${parent_id}`}
       style=${{
     display: "grid",
     gridTemplateColumns: `${columns.map((col) => {
@@ -25449,22 +25449,35 @@ const createsSamplesDescriptor = (scorers, samples, epochs, selectedScore) => {
         if (!sample || !sample.scores) {
           return [];
         }
-        scorers.map((score2) => {
+        const scoreNames = scorers.map((score2) => {
           return score2.name;
         });
         const sampleScorer = sample.scores[scorer];
         const scoreVal = sampleScorer.value;
         if (typeof scoreVal === "object") {
           const names = Object.keys(scoreVal);
-          const scores = names.map((name) => {
-            return {
-              name,
-              rendered: () => {
-                return scoreDescriptor.render(scoreVal[name]);
+          if (names.find((name) => {
+            return scoreNames.includes(name);
+          })) {
+            const scores = names.map((name) => {
+              return {
+                name,
+                rendered: () => {
+                  return scoreDescriptor.render(scoreVal[name]);
+                }
+              };
+            });
+            return scores;
+          } else {
+            return [
+              {
+                name: scorer,
+                rendered: () => {
+                  return scoreDescriptor.render(scoreVal);
+                }
               }
-            };
-          });
-          return scores;
+            ];
+          }
         } else {
           return [
             {

inspect_ai/_view/www/src/samples/SampleDisplay.mjs CHANGED Viewed

@@ -350,7 +350,17 @@ const metadataViewsForSample = (id, sample) => {
   return sampleMetadatas;
 };
-const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
+/**
+ * Component to display a sample with relevant context and visibility control.
+ *
+ * @param {Object} props - The properties passed to the component.
+ * @param {string} props.parent_id - The id of the parent com
+ * @param {import("../types/log").EvalSample} [props.sample] - the sample
+ * @param {Object} [props.style] - Inline styles for the table element.
+ * @param {import("../samples/SamplesDescriptor.mjs").SamplesDescriptor} props.sampleDescriptor - the sample descriptor
+ * @returns {import("preact").JSX.Element} The TranscriptView component.
+ */
+const SampleSummary = ({ parent_id, sample, style, sampleDescriptor }) => {
   const input =
     sampleDescriptor?.messageShape.normalized.input > 0
       ? Math.max(0.15, sampleDescriptor.messageShape.normalized.input)
@@ -386,7 +396,7 @@ const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
   const columns = [];
   columns.push({
     label: "Id",
-    value: id,
+    value: sample.id,
     size: `${idSize}em`,
   });
@@ -412,7 +422,8 @@ const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
   const fullAnswer =
     sample && sampleDescriptor
-      ? sampleDescriptor.selectedScorer(sample).answer()
+      ? // @ts-ignore
+        sampleDescriptor.selectedScorer(sample).answer()
       : undefined;
   if (fullAnswer) {
     columns.push({
@@ -445,14 +456,16 @@ const SampleSummary = ({ id, sample, style, sampleDescriptor }) => {
           message=${sample.error.message}
           style=${{ marginTop: "0.4rem" }}
         />`
-      : sampleDescriptor?.selectedScore(sample).render(),
+      : // TODO: Cleanup once the PR lands which makes sample / sample summary share common interface
+        // @ts-ignore
+        sampleDescriptor?.selectedScore(sample).render(),
     size: "minmax(2em, auto)",
     center: true,
   });
   return html`
     <div
-      id=${`sample-heading-${id}`}
+      id=${`sample-heading-${parent_id}`}
       style=${{
         display: "grid",
         gridTemplateColumns: `${columns

inspect_ai/log/_log.py CHANGED Viewed

@@ -37,6 +37,9 @@ class EvalConfig(BaseModel):
     limit: int | tuple[int, int] | None = Field(default=None)
     """Sample limit (number of samples or range of samples)."""
+    sample_id: str | int | list[str | int] | None = Field(default=None)
+    """Evaluate specific sample(s)."""
     epochs: int | None = Field(default=None)
     """Number of epochs to run samples over."""

inspect_ai/model/_call_tools.py CHANGED Viewed

@@ -68,10 +68,6 @@ async def call_tools(
             # create a transript for this call
             init_transcript(Transcript(name=call.function))
-            # Amend the tool call with a custom view
-            view = tool_call_view(call, tdefs)
-            call.view = view
             result: Any = ""
             tool_error: ToolCallError | None = None
             try:
@@ -142,7 +138,7 @@ async def call_tools(
                 arguments=call.arguments,
                 result=content,
                 truncated=truncated,
-                view=view,
+                view=call.view,
                 error=tool_error,
                 events=list(transcript().events),
             )
@@ -163,7 +159,7 @@ async def call_tools(
                 id=call.id,
                 function=call.function,
                 arguments=call.arguments,
-                view=tool_call_view(call, tdefs),
+                view=call.view,
                 pending=True,
             )
             transcript()._event(event)

inspect_ai/model/_model.py CHANGED Viewed

@@ -31,11 +31,11 @@ from inspect_ai._util.registry import (
 )
 from inspect_ai._util.retry import log_rate_limit_retry
 from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo
-from inspect_ai.tool._tool_def import ToolDef
+from inspect_ai.tool._tool_def import ToolDef, tool_defs
 from inspect_ai.util import concurrency
 from ._cache import CacheEntry, CachePolicy, cache_fetch, cache_store
-from ._call_tools import disable_parallel_tools, tools_info
+from ._call_tools import disable_parallel_tools, tool_call_view, tools_info
 from ._chat_message import (
     ChatMessage,
     ChatMessageAssistant,
@@ -248,7 +248,7 @@ class Model:
         async with self._connection_concurrency(config):
             return await self._generate(
                 input=input,
-                tools=tools_info(tools),
+                tools=tools,
                 tool_choice=tool_choice,
                 config=config,
                 cache=cache,
@@ -257,7 +257,10 @@ class Model:
     async def _generate(
         self,
         input: list[ChatMessage],
-        tools: list[ToolInfo],
+        tools: list[Tool]
+        | list[ToolDef]
+        | list[ToolInfo]
+        | list[Tool | ToolDef | ToolInfo],
         tool_choice: ToolChoice | None,
         config: GenerateConfig,
         cache: bool | CachePolicy = False,
@@ -265,6 +268,12 @@ class Model:
         # default to 'auto' for tool_choice (same as underlying model apis)
         tool_choice = tool_choice if tool_choice else "auto"
+        # extract tool defs if we can
+        tdefs = tool_defs([tool for tool in tools if not isinstance(tool, ToolInfo)])
+        # resolve all tools into tool_info
+        tools = tools_info(tools)
         # if we have a specific tool selected then filter out the others
         if isinstance(tool_choice, ToolFunction):
             tools = [tool for tool in tools if tool.name == tool_choice.name]
@@ -374,6 +383,11 @@ class Model:
             # update output with time elapsed
             output.time = time_elapsed
+            # add views to tool calls
+            for choice in output.choices:
+                for tool_call in choice.message.tool_calls or []:
+                    tool_call.view = tool_call_view(tool_call, tdefs)
             # complete the transcript event
             complete(output, call)

inspect_ai/model/_providers/azureai.py CHANGED Viewed

@@ -89,6 +89,19 @@ class AzureAIAPI(ModelAPI):
             config=config,
         )
+        # collect known model_args (then delete them so we can pass the rest on)
+        def collect_model_arg(name: str) -> Any | None:
+            nonlocal model_args
+            value = model_args.get(name, None)
+            if value:
+                model_args.pop(name)
+            return value
+        emulate_tools = collect_model_arg("emulate_tools")
+        self.emulate_tools = (
+            not not emulate_tools if emulate_tools is not None else None
+        )
         # resolve api_key
         if not self.api_key:
             self.api_key = os.environ.get(
@@ -118,8 +131,15 @@ class AzureAIAPI(ModelAPI):
         tool_choice: ToolChoice,
         config: GenerateConfig,
     ) -> ModelOutput | tuple[ModelOutput, ModelCall]:
-        # if its llama then do fake tool calls
-        handler: ChatAPIHandler | None = Llama31Handler() if self.is_llama() else None
+        # emulate tools (auto for llama, opt-in for others)
+        if self.emulate_tools is None and self.is_llama():
+            handler: ChatAPIHandler | None = Llama31Handler()
+        elif self.emulate_tools:
+            handler = Llama31Handler()
+        else:
+            handler = None
+        # resolve input
         if handler:
             input = handler.input_with_tools(input, tools)

inspect_ai/model/_render.py CHANGED Viewed

@@ -1,8 +1,7 @@
 from rich.console import RenderableType
-from inspect_ai._util.format import format_function_call
-from inspect_ai._util.transcript import transcript_markdown
 from inspect_ai.tool._tool_call import ToolCall
+from inspect_ai.tool._tool_transcript import transcript_tool_call
 from ._chat_message import ChatMessage, ChatMessageAssistant, ChatMessageTool
@@ -17,8 +16,10 @@ def messages_preceding_assistant(messages: list[ChatMessage]) -> list[ChatMessag
     return list(reversed(preceding))
-def render_tool_calls(tool_calls: list[ToolCall]) -> RenderableType:
-    formatted_calls: list[str] = []
+def render_tool_calls(tool_calls: list[ToolCall]) -> list[RenderableType]:
+    formatted_calls: list[RenderableType] = []
     for call in tool_calls:
-        formatted_calls.append(format_function_call(call.function, call.arguments))
-    return transcript_markdown("```python\n" + "\n\n".join(formatted_calls) + "\n```\n")
+        formatted_calls.extend(transcript_tool_call(call))
+    return formatted_calls

inspect_ai/model/_trace.py CHANGED Viewed

@@ -42,7 +42,7 @@ def trace_assistant_message(
         # print tool calls
         if message.tool_calls:
             content.append(Text())
-            content.append(render_tool_calls(message.tool_calls))
+            content.extend(render_tool_calls(message.tool_calls))
         # print the assistant message
         trace_panel(title="Assistant", content=content)

inspect_ai/solver/_basic_agent.py CHANGED Viewed

@@ -54,6 +54,7 @@ def basic_agent(
     max_attempts: int = 1,
     message_limit: int | None = None,
     token_limit: int | None = None,
+    max_tool_output: int | None = None,
     score_value: ValueToFloat | None = None,
     incorrect_message: str
     | Callable[[TaskState, list[Score]], str] = DEFAULT_INCORRECT_MESSAGE,
@@ -87,6 +88,8 @@ def basic_agent(
           If not specified, will use limit_messages defined for the task. If there is none
           defined for the task, 50 will be used as a default.
        token_limit (int | None): Limit on tokens used in sample before terminating agent.
+       max_tool_output (int | None): Maximum output length (in bytes).
+          Defaults to max_tool_output from active GenerateConfig.
        score_value (ValueToFloat): Function used to extract float from scores (defaults
          to standard value_to_float())
        incorrect_message (str | Callable[[TaskState, list[Score]], str]): User message reply for an
@@ -182,7 +185,9 @@ def basic_agent(
                 # resolve tools calls (if any)
                 if state.output.message.tool_calls:
                     # call tool functions
-                    tool_results = await call_tools(state.output.message, state.tools)
+                    tool_results = await call_tools(
+                        state.output.message, state.tools, max_output=max_tool_output
+                    )
                     state.messages.extend(tool_results)
                     # was an answer submitted?
@@ -194,11 +199,13 @@ def basic_agent(
                         # exit if we are at max_attempts
                         attempts += 1
                         if attempts >= max_attempts:
+                            state.completed = True
                             break
                         # exit if the submission is successful
                         answer_scores = await score(state)
                         if score_value_fn(answer_scores[0].value) == 1.0:
+                            state.completed = True
                             break
                         # otherwise notify the model that it was incorrect and continue

inspect_ai/tool/_tool_transcript.py ADDED Viewed

@@ -0,0 +1,28 @@
+from pydantic import JsonValue
+from rich.console import RenderableType
+from rich.text import Text
+from typing_extensions import Protocol
+from inspect_ai._util.transcript import transcript_function, transcript_markdown
+from ._tool_call import ToolCallContent
+class TranscriptToolCall(Protocol):
+    function: str
+    arguments: dict[str, JsonValue]
+    view: ToolCallContent | None
+def transcript_tool_call(call: TranscriptToolCall) -> list[RenderableType]:
+    content: list[RenderableType] = []
+    if call.view:
+        if call.view.title:
+            content.append(Text.from_markup(f"[bold]{call.view.title}[/bold]\n"))
+        if call.view.format == "markdown":
+            content.append(transcript_markdown(call.view.content))
+        else:
+            content.append(call.view.content)
+    else:
+        content.append(transcript_function(call.function, call.arguments))
+    return content

{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: inspect_ai
-Version: 0.3.51
+Version: 0.3.52
 Summary: Framework for large language model evaluations
 Author: UK AI Safety Institute
 License: MIT License
@@ -68,7 +68,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
 Requires-Dist: pytest-cov; extra == "dev"
 Requires-Dist: pytest-dotenv; extra == "dev"
 Requires-Dist: pytest-xdist; extra == "dev"
-Requires-Dist: ruff==0.8.2; extra == "dev"
+Requires-Dist: ruff==0.8.3; extra == "dev"
 Requires-Dist: textual-dev>=0.86.2; extra == "dev"
 Requires-Dist: types-PyYAML; extra == "dev"
 Requires-Dist: types-aiofiles; extra == "dev"

{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ inspect_ai/__main__.py,sha256=oWX4YwDZDg3GS3-IG0yPGoSEOfSzWihELg7QmrUlxjM,67
 inspect_ai/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 inspect_ai/_cli/cache.py,sha256=nOX9ysB3rZ-V8b_ryTpkgpoAynNlq4Op_fIqAIl4EVg,3910
 inspect_ai/_cli/common.py,sha256=f6OGE63OoN5Y4tk0i8pllfSdmhGlf8teZY8NCvWexGY,3573
-inspect_ai/_cli/eval.py,sha256=N0n2WZSUyoKhi-1sahmN5xP25lmjrjziPslU04lM16Y,29503
+inspect_ai/_cli/eval.py,sha256=5rxtpJw-px4ugIJDaZOJCrSWNGc9Hs9DgDyvhJLv0Wo,29940
 inspect_ai/_cli/info.py,sha256=d5W7VA5buinGcsdQjWqlsMM6iSNNGRUHQrr4JS2k8nk,1749
 inspect_ai/_cli/list.py,sha256=GreVEhJRpagiCpzVc3FSGhcdpTq9B8Jh--mfgs4ueFQ,2454
 inspect_ai/_cli/log.py,sha256=boSzHZkiabhnYWHLRVsZVENCryG-MDaNHIIbpSp0Mcs,5729
@@ -14,11 +14,11 @@ inspect_ai/_cli/util.py,sha256=rOyKR5p08-04IwJdcjakNXD1Gm-dGFtzaTTx7hyArPE,1402
 inspect_ai/_cli/view.py,sha256=9UstZ5a1upMZlVKfQCK8L_HvTR1WgLpBp3zpSgTyrLo,2786
 inspect_ai/_display/__init__.py,sha256=t9Xj8FbxvdBNsalnr16U0r3jSTFX9w4yXcUJwb06_6k,405
 inspect_ai/_display/core/active.py,sha256=ZXH9KzuXr9ViAGcAZRWtegbO_YqApvgBjr_7TGAxDFQ,1219
-inspect_ai/_display/core/config.py,sha256=1wdzgzFmMsPOtk6_AVH3xY4qp5y-44NexTSxxHWbX8c,1458
+inspect_ai/_display/core/config.py,sha256=zvF8jW3NhDGhVSCAqjXsblNjvKGUtMUdn2YMor3cFC4,1664
 inspect_ai/_display/core/display.py,sha256=3Tu6XO5v1oMhG9ZgmMUz11PncwmO_bK5W2q67IhQkl0,3064
 inspect_ai/_display/core/footer.py,sha256=2e5kimg8drlJgdhHYuHUiP-B8U6LtYLyORWB4a1kXx8,805
 inspect_ai/_display/core/group.py,sha256=z8CIwQ-8Mm9adQ8JDuMjw94ih9GfymU5s-1qnbKoEPs,2871
-inspect_ai/_display/core/panel.py,sha256=dGwhkYl7JOXsug-Spd0oLaOBNZyXJEq1TTnaWpq6fQA,3753
+inspect_ai/_display/core/panel.py,sha256=G2sb1uayFmFb0qiU37OziRXaxZF2t3APXadPbQcsZqY,3754
 inspect_ai/_display/core/progress.py,sha256=lqpMSjpvVwewqcLH5U5PzgCjFHniiZfjz30WXFpThR4,4176
 inspect_ai/_display/core/results.py,sha256=slA7cRUmGCenofc7rcfCx8dcwx5I6QRERbmJl2W0P8s,7192
 inspect_ai/_display/core/rich.py,sha256=ve4y1O_P9E1py6s4jGq1lHa7t6IlF5Brgue2-xikJOg,2729
@@ -33,14 +33,14 @@ inspect_ai/_display/textual/widgets/clock.py,sha256=8NijI2cqgTY1DJF8-nvK_QUJXxcn
 inspect_ai/_display/textual/widgets/console.py,sha256=lp5lbT9erPjxE1NWzvuJ5Bj8mN2ZZSBTgKQWHinMKgA,1590
 inspect_ai/_display/textual/widgets/footer.py,sha256=_4Dlzp9kriUVELBEk6HQzgiwLigHe5mH0dZqWr6sGi4,1078
 inspect_ai/_display/textual/widgets/samples.py,sha256=KPL2UMnKQe2zpFdVnTGOaIgebKLEdSXeZ59j2Yn1js0,14767
-inspect_ai/_display/textual/widgets/task_detail.py,sha256=vF16J-AOaTAJ8hZB-_d6DbIF66ynA2E9bH8quIrMIao,7301
+inspect_ai/_display/textual/widgets/task_detail.py,sha256=AC_GxBxphTO6b7XiWA9Xf6ZoJH4ndMCJ4PxahfKr4QQ,8237
 inspect_ai/_display/textual/widgets/tasks.py,sha256=vtPEP93rSlUdfJGo4pIkX3rtdrjhc_AG3qkBBRhdAVs,8018
 inspect_ai/_display/textual/widgets/titlebar.py,sha256=Gh_vnsco_1lStPb34TXM9MZJffjy83-1ekoRzUQF_6w,2144
 inspect_ai/_display/textual/widgets/toggle.py,sha256=ToYs-S4n90yuxWcAW2OTg6AbRf0GhSz61XxfhE6XZ3Y,895
-inspect_ai/_display/textual/widgets/transcript.py,sha256=qYbs_gQwQ64-yPkybWcFpjDqufkicDMejOK_oKn09Ww,11561
+inspect_ai/_display/textual/widgets/transcript.py,sha256=rRTM9aSNHau89mSmlH6Ni9qIckYS9_Han0RtuO6QZmk,10999
 inspect_ai/_eval/context.py,sha256=YdbaFJfzYEa-iF1fP8BEpB3l6ZnlOJZ8ntPfIrhdacQ,1080
-inspect_ai/_eval/eval.py,sha256=DPqjOT0cbMPnl1a93QMZxhM8-z324HlRsz2gc-xjh8o,34422
-inspect_ai/_eval/evalset.py,sha256=qScAjunjDVn3EJD8Wox84c6NGXn1WbVu3MxmQIN89es,26646
+inspect_ai/_eval/eval.py,sha256=SbgF53XwvzzuP-qRKcow4rMahKknzcgvo6x1KGz-dYM,35076
+inspect_ai/_eval/evalset.py,sha256=G7nuwRkHfraKaKKmOst6oCMxjAtNO41pEGd6NSqk19g,26839
 inspect_ai/_eval/list.py,sha256=VbZ-2EI6MqrXvCN7VTz21TQSoU5K5_Q0hqhxmj5A_m0,3744
 inspect_ai/_eval/loader.py,sha256=d_7PcajwAF8sG-sMeYOpkrSt5TcUz_tewgWocIIIlKQ,16267
 inspect_ai/_eval/registry.py,sha256=j-HBhwgg-3GsOqEZHmtALZnBqgJLSQWuEzwe0hsfug4,5422
@@ -52,13 +52,13 @@ inspect_ai/_eval/task/epochs.py,sha256=Ci7T6CQniSOTChv5Im2dCdSDrP-5hq19rV6iJ2uBc
 inspect_ai/_eval/task/error.py,sha256=gJnd8X7LHpPz5zcOq_az6ONZICGJ0_VpSz9yhF0yRyY,1233
 inspect_ai/_eval/task/generate.py,sha256=Edm-_6Wp1mkb7XpGkfTAqobWPfjJeWB0sb8-76UjNrc,1999
 inspect_ai/_eval/task/images.py,sha256=gnhOK97qFBnZYFisv1lgD4hLTaK83cHcJ5D9zA-NoLE,3524
-inspect_ai/_eval/task/log.py,sha256=kJI7XyyrkGBU9qfCacNwSM7LExaqMxgk28qig12AHUs,5961
+inspect_ai/_eval/task/log.py,sha256=wxA7rc-YGY7eri_IgJXIBPo4i6hy6j7-8FJlEO0TxRg,6068
 inspect_ai/_eval/task/results.py,sha256=eRT_nBEZrUEOA4tw0n2Zy04K9g_OyihXoXuyR1sODUk,13724
-inspect_ai/_eval/task/run.py,sha256=bnu6o5QemLsRtsJdsOQAWDAsqo5GWhXI7p-SOPMyUSE,31085
+inspect_ai/_eval/task/run.py,sha256=EpNrTru4u103aHRy63H9BBIOCWc5aFLn3hciaGDMBcM,31178
 inspect_ai/_eval/task/rundir.py,sha256=QXetLfqi1lRo-PcIuu7maQpVO57c2ebnsjfZk0lsAFk,2001
 inspect_ai/_eval/task/sandbox.py,sha256=k1QmGZAvObwQLCJmtmpSUqOODNHbBU3Kpevbn6q-4yM,4536
 inspect_ai/_eval/task/task.py,sha256=6TKyKrhjlTTZvG5bWAhqQM4gKO7x4yedrfhtnO_1ZW4,7813
-inspect_ai/_eval/task/util.py,sha256=qpgZnLWJrijfIZKo63KGkpUxV4r2Odel_muycOhtTcg,1321
+inspect_ai/_eval/task/util.py,sha256=9OEE4k-jESyTOpOEcHk_4cvCZrd1sUJ_00fI6Ubt6o8,1763
 inspect_ai/_util/_async.py,sha256=K5lVPKwl25JkLkcXfb0m3aZ-RJ4O3fog5HQm5EqbjM4,981
 inspect_ai/_util/appdirs.py,sha256=lhURbDS9xT2YBzWOe0jjxsdK4ZdiVAv_WwXQC83V_jw,563
 inspect_ai/_util/config.py,sha256=nuWVZbShE8IPnotDfRJx0uBZJxwbV36M0qKVYsQDEEI,848
@@ -84,7 +84,7 @@ inspect_ai/_util/http.py,sha256=c4yvH48ZkETZ7sNDuNzBR0NUS4r-6WzCaolW9my13ns,3628
 inspect_ai/_util/images.py,sha256=pCaL_GwWGQsnawu4WmbubY7eHZzgqXvHrmK5-GJikn8,1245
 inspect_ai/_util/json.py,sha256=1mgVURVVPo34hQcGmX6u25hlbRdRY4hmNHXZGVwYaKc,3342
 inspect_ai/_util/list.py,sha256=6_5r5jI5RKK34kCmIqqVQ5hYG-G8v0F5H7L-DmQQ2E4,279
-inspect_ai/_util/logger.py,sha256=i5vl-ahfGk9pzXJPXljeCswqYLu6p9Hq2XJ64aYLfX0,5707
+inspect_ai/_util/logger.py,sha256=erX7YLYwrtZ1Z1SI2sun70f40zn7EU38DlUKB5Sgt9E,6155
 inspect_ai/_util/notebook.py,sha256=Mgz3J4uBh-MqVBRmpiJqDHRpn2hd7HIOBeJBwLG-bbk,2998
 inspect_ai/_util/package.py,sha256=2ntItRYaYBaVWI5eDaB4FdpI1IUBiBWNRxq7FChvk1I,2729
 inspect_ai/_util/path.py,sha256=fLfKXOXuQstwVfl2i1Gzxix8TZLxLqtZIHqyqK3MnvY,3795
@@ -93,12 +93,12 @@ inspect_ai/_util/platform.py,sha256=knsijYYaif5rgkGuYRwr_p7DlsD1VD-yfbt08dKOrGM,
 inspect_ai/_util/registry.py,sha256=yoajCn16miznXpFiFNOT-1TTrS5ZuZbly5mt7T4UJbM,11689
 inspect_ai/_util/retry.py,sha256=fAupOVgGJ0ImnmtXmCHBKRa3AQC7jDA-Zf_zilSCRl0,1899
 inspect_ai/_util/rich.py,sha256=sNWEsGlGmkkZZLo4AcEv-_yJI1bI0HcpZVt7wNJFsXg,692
-inspect_ai/_util/samples.py,sha256=UfjZUnHwKnkIMVw14jkaEnNtSQxbU2AUFhEZfkckh_w,322
+inspect_ai/_util/samples.py,sha256=uobAN2i1U-6YBxCBvaW6z1-xFufQIuFXHnnnK-oTDKc,507
 inspect_ai/_util/terminal.py,sha256=I4NDy7Ln5YSCzxbd0O9OPslEHQMBVKZfqJl3TOCegTg,4166
 inspect_ai/_util/text.py,sha256=1Q5tNqB-61yXXo_bQxzxJCEXCMU7aVs3qpXQir2xKu0,3174
 inspect_ai/_util/throttle.py,sha256=Yoz-qnGULR88QFrJfeo4TQgfQ9AHsVNVKqdDlxgSipc,663
 inspect_ai/_util/timeouts.py,sha256=-iC1LjpNqB6Hx-i36MfSrLy819RVhKNo4KXjZDuskZQ,5193
-inspect_ai/_util/transcript.py,sha256=SqGCfsH5mCk41MndwRK46BspBq5djcK3dTPx4INp23Q,2474
+inspect_ai/_util/transcript.py,sha256=KG6Vz57fTYDinrscd-iT8K2JfYPZ7SJrpedrlLKK9Lo,2744
 inspect_ai/_util/url.py,sha256=Ci9I1T7hSRuA1CvgQmN9TWTSzUPO4ILMkd_8maZlO6k,622
 inspect_ai/_util/version.py,sha256=PRh1HSHD_EgRW_VYIPquOPj-gL9DM164ePQ9LL3d9lU,739
 inspect_ai/_view/notify.py,sha256=6lI42tz4Ui7ThvwsJgBamRcCczSTWGXGWtciP3M8VaA,1394
@@ -123,7 +123,7 @@ inspect_ai/_view/www/yarn.lock,sha256=UsPRVYxWj1teJWLag7l8znvHprucro96dR3xLkmGZw
 inspect_ai/_view/www/dist/index.html,sha256=ErXXXs-OUDOAZexWCmn_u-frxXROpg4kBt6Yvjj0a0U,998
 inspect_ai/_view/www/dist/assets/favicon.svg,sha256=b9AHYZaO2zBzeKH6G4PwXZMGGW_UxY0omKHam-c9MAs,1508
 inspect_ai/_view/www/dist/assets/index.css,sha256=KEUcz_Eijk8JxCiFDRpkc8X1fWm6xLq2gmkKXOXZDQk,813685
-inspect_ai/_view/www/dist/assets/index.js,sha256=OSaMvUDbT66h9MLMMesBdZgVnAvRuGGsjBVzhC_dpzc,923612
+inspect_ai/_view/www/dist/assets/index.js,sha256=UuWrNHF1GZdpQ8M_5_HEY7wjsTzlVLeZy7O-rbTUXhM,924009
 inspect_ai/_view/www/src/App.mjs,sha256=7uCBYgClLj804nK_5Nt0imusq7PqYTuaW7WwxYlKDkc,28679
 inspect_ai/_view/www/src/Register.mjs,sha256=jpIqov7fyyLPW9Ouhc9KOy__o8s-P5h-VF9S1RDsIDU,116
 inspect_ai/_view/www/src/Types.mjs,sha256=-GHM7V0ememrEteUSvA1-sh7eucDLgVpOgJ5pEDYVfs,623
@@ -183,7 +183,7 @@ inspect_ai/_view/www/src/navbar/Navbar.mjs,sha256=y9WZXlvVk7aubkE5IhF190IuF7FG6q
 inspect_ai/_view/www/src/navbar/SecondaryBar.mjs,sha256=5Sn8jh_rdD0s1CT1qW3fsl-_MX-woYuDDSFDJWkVlmc,4424
 inspect_ai/_view/www/src/plan/PlanCard.mjs,sha256=2JGBPFiGTJmf9DR9IuwgcrACO_3mPFpap2mwokCzwTk,10326
 inspect_ai/_view/www/src/samples/SampleDialog.mjs,sha256=5cBb2UIRe7Z6BfFAJ4Q1djDJSZZdia-2E0rC6X2a5m0,3709
-inspect_ai/_view/www/src/samples/SampleDisplay.mjs,sha256=RoktTmo_lq4G7NGqE-UmlUW4fcvempMRBKE0WsURFHw,15031
+inspect_ai/_view/www/src/samples/SampleDisplay.mjs,sha256=wSn0vPR5KGwywK7AKefy1zEDp2tZpSckRvrB8vIOBL4,15756
 inspect_ai/_view/www/src/samples/SampleError.mjs,sha256=48in4mqQpe2KxQK9__3rBfK5mEKIqPrIscWeCW1r_Hk,2674
 inspect_ai/_view/www/src/samples/SampleLimit.mjs,sha256=_IT5kKng8L3A26fHjG0vANAn2y73Nb6j5A_90osQz2E,627
 inspect_ai/_view/www/src/samples/SampleList.mjs,sha256=39dNnMlIay8qTFC_jBU4kz-VSDUrK8TXEd4RQ8BUaqs,11149
@@ -274,7 +274,7 @@ inspect_ai/log/_bundle.py,sha256=5Uy-s64_SFokZ7WRzti9mD7yoKrd2sOzdvqKyahoiC4,804
 inspect_ai/log/_condense.py,sha256=zWSvdq3bQ6Pe8yiOWHVj5Gc--he9oogQ3SI_RXJ5z3o,9568
 inspect_ai/log/_convert.py,sha256=62cwEY5zhz76GaTB5cwkRUgWQz5kehxQ82o7ItBcX2U,3461
 inspect_ai/log/_file.py,sha256=RqadF_vNbhgx7PxghVmg-1WiJkpz2vwfq3AJ1C_umOM,17685
-inspect_ai/log/_log.py,sha256=natE9pn0ykJ9cmXkug3y65Ra2-EdyrKSiQ99K3u80tg,18620
+inspect_ai/log/_log.py,sha256=jmjS2k75i4iftPsYjjrUH6UBPeyKZMX97TgRqrfWckk,18732
 inspect_ai/log/_message.py,sha256=VCKvYcTOH6M9AvYykqx8D7fgXnvti0yDS7vY5To3zOI,1927
 inspect_ai/log/_retry.py,sha256=e7a2hjl3Ncl8b8sU7CsDpvK8DV0b1uSRLeokRX1mt34,2109
 inspect_ai/log/_samples.py,sha256=b-5-wT1rD5KQTfVqsYlrQPSabj9glmHM9SmZg9mDk-c,3814
@@ -287,18 +287,18 @@ inspect_ai/log/_recorders/json.py,sha256=edsglc71m-RUqddv5D_N4g24XsGBVOK6CoVXPZ_
 inspect_ai/log/_recorders/recorder.py,sha256=yvW_D99QxUQmnF5EiGsWIVetBXdssMUcsq5us9oRzx4,1551
 inspect_ai/model/__init__.py,sha256=mBs6hmdWu6KhTQCXFh_NHrCw9oQ9-mn6wDLTDqjguN8,2028
 inspect_ai/model/_cache.py,sha256=VlMQGPgFxJGPXktqWy_wMpA3iJWmvA8siSdmX71MEHQ,13517
-inspect_ai/model/_call_tools.py,sha256=ptYAPtlS4Wh6YbFHVVqx2B7u-gFciVNPWKjcdky74A8,13611
+inspect_ai/model/_call_tools.py,sha256=IxykjNqUAxBpPSJvW_0GuJRJX8jvkjlu51T63sNIhHI,13468
 inspect_ai/model/_chat_message.py,sha256=Zc2hHMLbWONuhLx-XYPOekDD20mF4uTU62WRTsry3c4,4355
 inspect_ai/model/_generate_config.py,sha256=0bk3FKFrwgEBkjrdtwzHOPDBIJG9VIzTdgh2xrvTaZI,8360
 inspect_ai/model/_image.py,sha256=kpO2Bn_-c-dK80HuPOPH1eSNmcoc39kofwf4yTTiTFE,477
-inspect_ai/model/_model.py,sha256=ZUvOhHUYutJ5QwBrhOCYt58O52k8WHFXYBp17IjRvF0,28662
+inspect_ai/model/_model.py,sha256=pI7nwBio0Kx4dYz5GSV1o5h31gSI-61dOBo5Zp-qAH8,29184
 inspect_ai/model/_model_call.py,sha256=r6ObHZwm7jy1qX2qxvAP6iPV3BhdGThL-VH-QEawQhA,2017
 inspect_ai/model/_model_output.py,sha256=iv0bk6U5jRnhqeULIMAVrC57j2Be-EjOhe7kvxAzCcg,6537
 inspect_ai/model/_registry.py,sha256=Cr2y32EqLnOqLbSWoXHVK4ivTTzCUhJuACxoTyPt8kY,2032
-inspect_ai/model/_render.py,sha256=aawzq3KZMXbM5xwJXQRgE3V3fRO0ymFQ8J4Abqvl0nc,921
-inspect_ai/model/_trace.py,sha256=uw_RraVxWPWVpBuUcXwAl9ZgLuw0RG1FwjWznGlhNfw,1601
+inspect_ai/model/_render.py,sha256=bGtGdFFWvNdeBjs60Junk0I8xVnBOj4Oe3a-86BjPtc,802
+inspect_ai/model/_trace.py,sha256=Zr4cZGICQO85L0tRqW5oehuvPFk1EO5NBUtaJtLKBwk,1601
 inspect_ai/model/_providers/anthropic.py,sha256=lgWQDhr4d7EjPJYwj021YRc8VZAtPRmTwzJthVyMqGg,20801
-inspect_ai/model/_providers/azureai.py,sha256=TBnavnhwyi-RUjWHQkyCDkdABeXKsK3nIRZ-i9U_SIY,13267
+inspect_ai/model/_providers/azureai.py,sha256=ZTNmFd0PePFjJjGA5o3JYud5qdfJvrLDp3ah7XOkvp4,13927
 inspect_ai/model/_providers/bedrock.py,sha256=AVej7e6OoYGW-C2jR8H4jBn3aKwTj5RwPOMzVxO_2XE,22702
 inspect_ai/model/_providers/cloudflare.py,sha256=h6ubjf0kxyMM7Aj2tm68tWa-2R7RAXNGp1O6KMvi0Gw,4143
 inspect_ai/model/_providers/google.py,sha256=WA1eRIjMwJoOrnaPq1TiodIIGLBURPaBBRwQaOHM4VU,20281
@@ -342,7 +342,7 @@ inspect_ai/scorer/_reducer/reducer.py,sha256=dRFIW9_gi30i64g-AZWxDTHla7mJfqyF0dJ
 inspect_ai/scorer/_reducer/registry.py,sha256=J2tvuuxf4jBC09_SCBZg99Qb2qQUWG8STEsw7ASWpXQ,5388
 inspect_ai/scorer/_reducer/types.py,sha256=uimvzIBRK7x1Dof77gsHYe9PU3hekB1opm9DTAa4sL4,340
 inspect_ai/solver/__init__.py,sha256=LLemW4Szs7uJuJFjp0KLlVS1V7M-2_PpkYS_17GGtaE,3234
-inspect_ai/solver/_basic_agent.py,sha256=cQUzpYJmhEEa7mgLl8ZY3yHJFwLapLT3UuPacalRD78,9241
+inspect_ai/solver/_basic_agent.py,sha256=vvC0v4g-7hLzGCsmb_I0xa_Z3RCLwp4ak7Qk-EVl7F8,9594
 inspect_ai/solver/_chain.py,sha256=F-2ZHE2KOlDAIgH1_Q23whUMH5TjYGvCHhcOgbRxe7I,2234
 inspect_ai/solver/_critique.py,sha256=ddO8J7VqSEsT6kofawpn3PrcUpLjLUMFmJi0hocDZpI,3504
 inspect_ai/solver/_fork.py,sha256=Ge1PwpCHjeZhm2CLAWKss2uFuQd9BGzVinLOW6UOnfE,2880
@@ -362,6 +362,7 @@ inspect_ai/tool/_tool_def.py,sha256=OQo6jhtDfrj2uDDKeoT4g8Ju1r8uroK6DqQkpyUikEA,
 inspect_ai/tool/_tool_description.py,sha256=SZTQzehReNNKwQ0iUL6v4pPfEptgf3UOP4J888JV18M,524
 inspect_ai/tool/_tool_info.py,sha256=zoAUkA99VbgSc5bLPGwkYRT5u8rzS9NjrrxHR24A214,7865
 inspect_ai/tool/_tool_params.py,sha256=oLYlxcyKtIbMxZh5yowSynNrBR5sWj4nrdel6pFSIbc,1158
+inspect_ai/tool/_tool_transcript.py,sha256=rMibJoBN5Nn41RwInqk45h9RDPxZGu81saDf4SkpqTs,904
 inspect_ai/tool/_tool_with.py,sha256=iZYVhuZSL0Q9PFKz-ob_923E77WzuQ2U1Qo4DfdWuBo,1881
 inspect_ai/tool/_tools/_execute.py,sha256=DkFlvUTvI595H1zH5IKArhbyBo8YZWqq9tvoUMdvlaw,2823
 inspect_ai/tool/_tools/_web_search.py,sha256=YqZ3E65ssdq1X2NSH9Mqt5INXdPVQOdKa3PbKi7XjAY,7828
@@ -406,9 +407,9 @@ inspect_ai/util/_sandbox/docker/docker.py,sha256=eb8yJQbG-3ZWT2zvzhAd7l3ejrdxyv7
 inspect_ai/util/_sandbox/docker/internal.py,sha256=pwK3xl-fx_5FVmVmvautE8R7op3XCjgiA-1JqlRcHII,1311
 inspect_ai/util/_sandbox/docker/prereqs.py,sha256=LKWt2T5CJ8hZ25SFDGX9FTGXFAtHzvQoolOffjQKlm8,3341
 inspect_ai/util/_sandbox/docker/util.py,sha256=VyB9_Aab8vmgs6dJkAyt3oO_jejRPAppyaibZdetayc,2836
-inspect_ai-0.3.51.dist-info/LICENSE,sha256=aYPffOl9TwBXDQ8g33Jh6AsBhobb3A76qNm7r2HZsps,1079
-inspect_ai-0.3.51.dist-info/METADATA,sha256=gyhqUPXr7sIz01JUJ4O8Rcv0Mo6WPeUJydphTrsNfMk,4564
-inspect_ai-0.3.51.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-inspect_ai-0.3.51.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
-inspect_ai-0.3.51.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
-inspect_ai-0.3.51.dist-info/RECORD,,
+inspect_ai-0.3.52.dist-info/LICENSE,sha256=aYPffOl9TwBXDQ8g33Jh6AsBhobb3A76qNm7r2HZsps,1079
+inspect_ai-0.3.52.dist-info/METADATA,sha256=zUCf5WJGaR7f7rNipmHuDY7HKygcKAJICduIbX_Dyso,4564
+inspect_ai-0.3.52.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+inspect_ai-0.3.52.dist-info/entry_points.txt,sha256=WGGLmzTzDWLzYfiyovSY6oEKuf-gqzSDNOb5V-hk3fM,54
+inspect_ai-0.3.52.dist-info/top_level.txt,sha256=Tp3za30CHXJEKLk8xLe9qGsW4pBzJpEIOMHOHNCXiVo,11
+inspect_ai-0.3.52.dist-info/RECORD,,

{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/LICENSE RENAMED Viewed

File without changes

{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/WHEEL RENAMED Viewed

File without changes

{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{inspect_ai-0.3.51.dist-info → inspect_ai-0.3.52.dist-info}/top_level.txt RENAMED Viewed

File without changes

inspect-ai 0.3.51__py3-none-any.whl → 0.3.52__py3-none-any.whl

inspect-ai 0.3.51py3-none-any.whl → 0.3.52py3-none-any.whl