PyPI - inspect-ai - Versions diffs - 0.3.68__py3-none-any.whl → 0.3.69__py3-none-any.whl - Mend

inspect-ai 0.3.68py3-none-any.whl → 0.3.69py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

inspect_ai/_display/plain/display.py +9 -11
inspect_ai/_display/textual/app.py +3 -4
inspect_ai/_display/textual/widgets/samples.py +43 -8
inspect_ai/_util/interrupt.py +9 -0
inspect_ai/_util/logger.py +4 -0
inspect_ai/_util/text.py +288 -1
inspect_ai/_view/www/dist/assets/index.js +1 -1
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +1 -1
inspect_ai/log/_samples.py +0 -4
inspect_ai/model/_model.py +3 -0
inspect_ai/model/_providers/google.py +356 -302
inspect_ai/model/_providers/mistral.py +10 -8
inspect_ai/model/_providers/providers.py +5 -5
inspect_ai/solver/_plan.py +3 -0
inspect_ai/solver/_solver.py +3 -0
inspect_ai/solver/_task_state.py +3 -1
inspect_ai/util/_sandbox/docker/cleanup.py +8 -3
inspect_ai/util/_sandbox/docker/compose.py +5 -9
inspect_ai/util/_sandbox/docker/docker.py +14 -2
inspect_ai/util/_sandbox/docker/util.py +10 -1
inspect_ai/util/_sandbox/self_check.py +2 -1
inspect_ai/util/_subprocess.py +4 -1
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.69.dist-info}/METADATA +3 -3
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.69.dist-info}/RECORD +28 -27
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.69.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.69.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.69.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.68.dist-info → inspect_ai-0.3.69.dist-info}/top_level.txt +0 -0

inspect_ai/_display/plain/display.py CHANGED Viewed

@@ -119,14 +119,14 @@ class PlainTaskDisplay(TaskDisplay):
         self.samples_complete = 0
         self.samples_total = 0
         self.current_metrics: list[TaskDisplayMetric] | None = None
-        self.last_progress = 0  # Track last progress percentage
+        self.last_progress = 0
     @contextlib.contextmanager
     def progress(self) -> Iterator[Progress]:
         self.progress_display = PlainProgress(self.task.profile.steps)
         yield self.progress_display
-    @throttle(1)
+    @throttle(5)
     def _print_status_throttled(self) -> None:
         self._print_status()
@@ -135,13 +135,8 @@ class PlainTaskDisplay(TaskDisplay):
         if not self.progress_display:
             return
-        # Calculate current progress percentage
-        current_progress = int(
-            self.progress_display.current / self.progress_display.total * 100
-        )
-        # Only print on percentage changes to avoid too much output
-        if current_progress != self.last_progress:
+        # Only print when step count changes to avoid too much output
+        if self.progress_display.current != self.last_progress:
             status_parts: list[str] = []
             # if this is parallel print task and model to distinguish (limit both to 12 chars)
@@ -154,8 +149,11 @@ class PlainTaskDisplay(TaskDisplay):
                 )
             # Add step progress
+            progress_percent = int(
+                self.progress_display.current / self.progress_display.total * 100
+            )
             status_parts.append(
-                f"Steps: {self.progress_display.current:3d}/{self.progress_display.total} {current_progress:3d}%"
+                f"Steps: {self.progress_display.current:3d}/{self.progress_display.total} {progress_percent:3d}%"
             )
             # Add sample progress
@@ -187,7 +185,7 @@ class PlainTaskDisplay(TaskDisplay):
             # Print on new line
             print(" | ".join(status_parts))
-            self.last_progress = current_progress
+            self.last_progress = self.progress_display.current
     def sample_complete(self, complete: int, total: int) -> None:
         self.samples_complete = complete

inspect_ai/_display/textual/app.py CHANGED Viewed

@@ -13,7 +13,6 @@ from typing import (
 import rich
 from rich.console import Console
-from rich.text import Text
 from textual.app import App, ComposeResult
 from textual.binding import Binding, BindingType
 from textual.css.query import NoMatches
@@ -316,9 +315,9 @@ class TaskScreenApp(App[TR]):
         def set_unread(unread: int | None) -> None:
             if unread is not None:
-                console_tab.label = Text(f"Console ({unread}")
+                console_tab.label = f"Console ({unread}"  # type: ignore[assignment]
             else:
-                console_tab.label = Text("Console")
+                console_tab.label = "Console"  # type: ignore[assignment]
         self.watch(console_view, "unread", set_unread)
@@ -385,7 +384,7 @@ class TaskScreenApp(App[TR]):
         def set_title(self, title: str) -> None:
             tabs = self.app.query_one(TabbedContent)
             tab = tabs.get_tab(self.tab_id)
-            tab.label = Text(title)
+            tab.label = title  # type: ignore[assignment]
         def activate(self) -> None:
             # show the tab

inspect_ai/_display/textual/widgets/samples.py CHANGED Viewed

@@ -6,6 +6,7 @@ from rich.table import Table
 from rich.text import Text
 from textual.app import ComposeResult
 from textual.containers import Horizontal, HorizontalGroup, Vertical, VerticalGroup
+from textual.css.query import NoMatches
 from textual.reactive import reactive
 from textual.widget import Widget
 from textual.widgets import (
@@ -61,7 +62,10 @@ class SamplesView(Widget):
         )
     async def notify_active(self, active: bool) -> None:
-        await self.query_one(TranscriptView).notify_active(active)
+        try:
+            await self.query_one(TranscriptView).notify_active(active)
+        except NoMatches:
+            pass
     def set_samples(self, samples: list[ActiveSample]) -> None:
         # throttle to no more than 1 second per 100 samples
@@ -408,6 +412,16 @@ class SampleToolbar(Horizontal):
     PENDING_STATUS = "pending_status"
     PENDING_CAPTION = "pending_caption"
+    TIMEOUT_TOOL_CALL_ENABLED = (
+        "Cancel the tool call and report a timeout to the model."
+    )
+    TIMEOUT_TOOL_CALL_DISABLED = "Cancelling tool call..."
+    CANCEL_SCORE_OUTPUT_ENABLED = (
+        "Cancel the sample and score whatever output has been generated so far."
+    )
+    CANCEL_RAISE_ERROR_ENABLED = "Cancel the sample and raise an error"
+    CANCEL_DISABLED = "Cancelling sample..."
     DEFAULT_CSS = f"""
     SampleToolbar {{
         grid-size: 5 1;
@@ -445,18 +459,18 @@ class SampleToolbar(Horizontal):
         yield Button(
             Text("Timeout Tool"),
             id=self.TIMEOUT_TOOL_CALL,
-            tooltip="Cancel the tool call and report a timeout to the model.",
+            tooltip=self.TIMEOUT_TOOL_CALL_ENABLED,
         )
         yield Horizontal()
         yield Button(
             Text("Cancel (Score)"),
             id=self.CANCEL_SCORE_OUTPUT,
-            tooltip="Cancel the sample and score whatever output has been generated so far.",
+            tooltip=self.CANCEL_SCORE_OUTPUT_ENABLED,
         )
         yield Button(
             Text("Cancel (Error)"),
             id=self.CANCEL_RAISE_ERROR,
-            tooltip="Cancel the sample and raise an error (task will exit unless fail_on_error is set)",
+            tooltip=self.CANCEL_RAISE_ERROR_ENABLED,
         )
     def on_mount(self) -> None:
@@ -475,14 +489,26 @@ class SampleToolbar(Horizontal):
                 )
                 if isinstance(last_event, ToolEvent):
                     last_event._cancel()
-            elif event.button.id == self.CANCEL_SCORE_OUTPUT:
-                self.sample.interrupt("score")
-            elif event.button.id == self.CANCEL_RAISE_ERROR:
-                self.sample.interrupt("error")
+                    event.button.disabled = True
+                    event.button.tooltip = self.TIMEOUT_TOOL_CALL_DISABLED
+            else:
+                if event.button.id == self.CANCEL_SCORE_OUTPUT:
+                    self.sample.interrupt("score")
+                elif event.button.id == self.CANCEL_RAISE_ERROR:
+                    self.sample.interrupt("error")
+                cancel_score_output = self.query_one("#" + self.CANCEL_SCORE_OUTPUT)
+                cancel_score_output.disabled = True
+                cancel_score_output.tooltip = self.CANCEL_DISABLED
+                cancel_with_error = self.query_one("#" + self.CANCEL_RAISE_ERROR)
+                cancel_with_error.disabled = True
+                cancel_with_error.tooltip = self.CANCEL_DISABLED
     async def sync_sample(self, sample: ActiveSample | None) -> None:
         from inspect_ai.log._transcript import ModelEvent
+        # is it a new sample?
+        new_sample = sample != self.sample
         # track the sample
         self.sample = sample
@@ -499,6 +525,13 @@ class SampleToolbar(Horizontal):
             cancel_score_output.display = True
             cancel_with_error.display = not sample.fails_on_error
+            # if its a new sample then reset enabled states
+            if new_sample:
+                cancel_score_output.disabled = False
+                cancel_score_output.tooltip = self.CANCEL_SCORE_OUTPUT_ENABLED
+                cancel_with_error.disabled = False
+                cancel_with_error.tooltip = self.CANCEL_RAISE_ERROR_ENABLED
             # if we have a pending event then start the clock and show pending status
             last_event = (
                 sample.transcript.events[-1]
@@ -520,6 +553,8 @@ class SampleToolbar(Horizontal):
                 )
                 timeout_tool.display = isinstance(last_event, ToolEvent)
+                timeout_tool.disabled = False
+                timeout_tool.tooltip = self.TIMEOUT_TOOL_CALL_ENABLED
                 clock.start(last_event.timestamp.timestamp())
             else:

inspect_ai/_util/interrupt.py ADDED Viewed

@@ -0,0 +1,9 @@
+import asyncio
+def check_sample_interrupt() -> None:
+    from inspect_ai.log._samples import sample_active
+    sample = sample_active()
+    if sample and sample.interrupt_action:
+        raise asyncio.CancelledError()

inspect_ai/_util/logger.py CHANGED Viewed

@@ -90,6 +90,10 @@ class LogHandler(RichHandler):
         if "Event loop is closed" in record.getMessage():
             return
+        # skip google-genai AFC message
+        if "AFC is enabled with max remote calls" in record.getMessage():
+            return
         # write to stderr if we are at or above the threshold
         if record.levelno >= self.display_level:
             super().emit(record)

inspect_ai/_util/text.py CHANGED Viewed

@@ -1,7 +1,8 @@
+import random
 import re
 import string
 from logging import getLogger
-from typing import NamedTuple
+from typing import List, NamedTuple
 logger = getLogger(__name__)
@@ -131,3 +132,289 @@ def truncate(text: str, length: int, overflow: str = "...", pad: bool = True) ->
     truncated = text[: length - overflow_length] + overflow
     return truncated
+def generate_large_text(target_tokens: int) -> str:
+    """Generate a large amount of text with approximately the target number of tokens"""
+    generated_text = []
+    estimated_tokens = 0
+    while estimated_tokens < target_tokens:
+        sentence = generate_sentence()
+        # Add paragraph breaks occasionally
+        if random.random() < 0.1:
+            sentence += "\n\n"
+        generated_text.append(sentence)
+        # Rough estimate of tokens (words + punctuation)
+        estimated_tokens += len(sentence.split()) + 2
+    return " ".join(generated_text)
+def generate_sentence() -> str:
+    """Generate a random sentence using predefined templates"""
+    adjectives, nouns, verbs = create_word_lists()
+    templates = [
+        f"The {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)} the {random.choice(adjectives)} {random.choice(nouns)}.",
+        f"A {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)} near the {random.choice(nouns)}.",
+        f"In the {random.choice(adjectives)} {random.choice(nouns)}, the {random.choice(nouns)} {random.choice(verbs)} {random.choice(adjectives)}.",
+        f"When the {random.choice(nouns)} {random.choice(verbs)}, a {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)}.",
+        f"The {random.choice(nouns)} {random.choice(verbs)} while the {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)}.",
+    ]
+    return random.choice(templates)
+def create_word_lists() -> tuple[List[str], List[str], List[str]]:
+    """Create basic word lists for sentence generation"""
+    # Common adjectives
+    adjectives = [
+        "red",
+        "blue",
+        "green",
+        "dark",
+        "bright",
+        "quiet",
+        "loud",
+        "small",
+        "large",
+        "quick",
+        "slow",
+        "happy",
+        "sad",
+        "clever",
+        "wise",
+        "ancient",
+        "modern",
+        "complex",
+        "simple",
+        "elegant",
+        "rough",
+        "smooth",
+        "sharp",
+        "dull",
+        "fresh",
+        "stale",
+        "clean",
+        "dirty",
+        "heavy",
+        "light",
+        "hot",
+        "cold",
+        "dry",
+        "wet",
+        "rich",
+        "poor",
+        "thick",
+        "thin",
+        "strong",
+        "weak",
+        "early",
+        "late",
+        "young",
+        "old",
+        "good",
+        "bad",
+        "high",
+        "low",
+        "long",
+        "short",
+        "deep",
+        "shallow",
+        "hard",
+        "soft",
+        "near",
+        "far",
+        "wide",
+        "narrow",
+        "big",
+        "little",
+        "fast",
+        "slow",
+        "busy",
+        "lazy",
+        "new",
+        "old",
+        "full",
+        "empty",
+        "loud",
+        "quiet",
+        "sweet",
+        "sour",
+        "brave",
+        "scared",
+    ]
+    # Common nouns
+    nouns = [
+        "time",
+        "person",
+        "year",
+        "way",
+        "day",
+        "thing",
+        "man",
+        "world",
+        "life",
+        "hand",
+        "part",
+        "child",
+        "eye",
+        "woman",
+        "place",
+        "work",
+        "week",
+        "case",
+        "point",
+        "group",
+        "number",
+        "room",
+        "fact",
+        "idea",
+        "water",
+        "money",
+        "month",
+        "book",
+        "line",
+        "city",
+        "business",
+        "night",
+        "question",
+        "story",
+        "job",
+        "word",
+        "house",
+        "power",
+        "game",
+        "country",
+        "plant",
+        "animal",
+        "tree",
+        "stone",
+        "river",
+        "fire",
+        "problem",
+        "theory",
+        "street",
+        "family",
+        "history",
+        "mind",
+        "car",
+        "music",
+        "art",
+        "nation",
+        "science",
+        "nature",
+        "truth",
+        "peace",
+        "voice",
+        "class",
+        "paper",
+        "space",
+        "ground",
+        "market",
+        "court",
+        "force",
+        "price",
+        "action",
+        "reason",
+        "love",
+        "law",
+        "bird",
+        "literature",
+        "knowledge",
+        "society",
+        "valley",
+        "ocean",
+        "machine",
+        "energy",
+        "metal",
+        "mountain",
+    ]
+    # Common verbs (present tense)
+    verbs = [
+        "run",
+        "walk",
+        "jump",
+        "sing",
+        "dance",
+        "write",
+        "read",
+        "speak",
+        "listen",
+        "watch",
+        "think",
+        "grow",
+        "live",
+        "play",
+        "work",
+        "move",
+        "stop",
+        "start",
+        "create",
+        "destroy",
+        "build",
+        "break",
+        "push",
+        "pull",
+        "open",
+        "close",
+        "rise",
+        "fall",
+        "increase",
+        "decrease",
+        "begin",
+        "end",
+        "love",
+        "hate",
+        "help",
+        "hurt",
+        "make",
+        "take",
+        "give",
+        "receive",
+        "buy",
+        "sell",
+        "eat",
+        "drink",
+        "sleep",
+        "wake",
+        "laugh",
+        "cry",
+        "learn",
+        "teach",
+        "change",
+        "stay",
+        "come",
+        "go",
+        "arrive",
+        "leave",
+        "enter",
+        "exit",
+        "succeed",
+        "fail",
+        "win",
+        "lose",
+        "fight",
+        "defend",
+        "attack",
+        "protect",
+        "save",
+        "waste",
+        "gather",
+        "scatter",
+        "collect",
+        "distribute",
+        "join",
+        "separate",
+        "unite",
+        "divide",
+        "share",
+    ]
+    return adjectives, nouns, verbs

inspect_ai/_view/www/dist/assets/index.js CHANGED Viewed

@@ -14470,7 +14470,7 @@ var require_assets = __commonJS({
             const value2 = score2[key2];
             const formattedValue = value2 && isNumeric(value2) ? formatPrettyDecimal(
               typeof value2 === "number" ? value2 : parseFloat(value2 === true ? "1" : value2)
-            ) : value2;
+            ) : String(value2);
             scores2.push(
               /* @__PURE__ */ jsxRuntimeExports.jsxs(
                 "div",

inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx CHANGED Viewed

@@ -49,7 +49,7 @@ export const objectScoreDescriptor = (values: Value2[]): ScoreDescriptor => {
                   ? value
                   : parseFloat(value === true ? "1" : value),
               )
-            : value;
+            : String(value);
         scores.push(
           <div
             className={clsx(

inspect_ai/log/_samples.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import asyncio
 import contextlib
 from contextvars import ContextVar
 from datetime import datetime
@@ -43,7 +42,6 @@ class ActiveSample:
         self.total_tokens = 0
         self.transcript = transcript
         self.sandboxes = sandboxes
-        self._sample_task = asyncio.current_task()
         self._interrupt_action: Literal["score", "error"] | None = None
     @property
@@ -60,8 +58,6 @@ class ActiveSample:
     def interrupt(self, action: Literal["score", "error"]) -> None:
         self._interrupt_action = action
-        assert self._sample_task
-        self._sample_task.cancel()
     @property
     def interrupt_action(self) -> Literal["score", "error"] | None:

inspect_ai/model/_model.py CHANGED Viewed

@@ -23,6 +23,7 @@ from tenacity import (
 from inspect_ai._util.constants import DEFAULT_MAX_CONNECTIONS
 from inspect_ai._util.content import Content, ContentImage, ContentText
 from inspect_ai._util.hooks import init_hooks, override_api_key, send_telemetry
+from inspect_ai._util.interrupt import check_sample_interrupt
 from inspect_ai._util.platform import platform_init
 from inspect_ai._util.registry import (
     RegistryInfo,
@@ -390,6 +391,8 @@ class Model:
             before_sleep=functools.partial(log_rate_limit_retry, self.api.model_name),
         )
         async def generate() -> ModelOutput:
+            check_sample_interrupt()
             if cache:
                 if isinstance(cache, CachePolicy):
                     policy = cache

inspect-ai 0.3.68__py3-none-any.whl → 0.3.69__py3-none-any.whl

inspect-ai 0.3.68py3-none-any.whl → 0.3.69py3-none-any.whl