PyPI - inspect-ai - Versions diffs - 0.3.94__py3-none-any.whl → 0.3.96__py3-none-any.whl - Mend

inspect-ai 0.3.94py3-none-any.whl → 0.3.96py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (75) hide show

inspect_ai/_eval/loader.py +1 -1
inspect_ai/_eval/task/run.py +12 -6
inspect_ai/_util/exception.py +4 -0
inspect_ai/_util/hash.py +39 -0
inspect_ai/_util/local_server.py +16 -0
inspect_ai/_util/path.py +22 -0
inspect_ai/_util/trace.py +1 -1
inspect_ai/_util/working.py +4 -0
inspect_ai/_view/www/dist/assets/index.css +9 -9
inspect_ai/_view/www/dist/assets/index.js +117 -120
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
inspect_ai/_view/www/src/app/types.ts +12 -2
inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
inspect_ai/_view/www/src/state/hooks.ts +19 -3
inspect_ai/_view/www/src/state/logSlice.ts +23 -5
inspect_ai/_view/www/yarn.lock +9 -9
inspect_ai/agent/_bridge/patch.py +1 -3
inspect_ai/agent/_types.py +1 -1
inspect_ai/analysis/__init__.py +0 -0
inspect_ai/analysis/beta/__init__.py +67 -0
inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
inspect_ai/analysis/beta/_dataframe/evals/table.py +177 -0
inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/events/columns.py +87 -0
inspect_ai/analysis/beta/_dataframe/events/extract.py +26 -0
inspect_ai/analysis/beta/_dataframe/events/table.py +100 -0
inspect_ai/analysis/beta/_dataframe/extract.py +73 -0
inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
inspect_ai/analysis/beta/_dataframe/messages/table.py +79 -0
inspect_ai/analysis/beta/_dataframe/progress.py +26 -0
inspect_ai/analysis/beta/_dataframe/record.py +377 -0
inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/samples/columns.py +77 -0
inspect_ai/analysis/beta/_dataframe/samples/extract.py +54 -0
inspect_ai/analysis/beta/_dataframe/samples/table.py +370 -0
inspect_ai/analysis/beta/_dataframe/util.py +160 -0
inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
inspect_ai/log/_file.py +10 -3
inspect_ai/log/_log.py +21 -1
inspect_ai/model/_call_tools.py +2 -1
inspect_ai/model/_model.py +6 -4
inspect_ai/model/_openai_responses.py +17 -18
inspect_ai/model/_providers/anthropic.py +30 -5
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/solver/_multiple_choice.py +4 -1
inspect_ai/solver/_task_state.py +8 -4
inspect_ai/tool/_mcp/_context.py +3 -5
inspect_ai/tool/_mcp/_sandbox.py +17 -14
inspect_ai/tool/_mcp/server.py +1 -1
inspect_ai/tool/_tools/_think.py +1 -1
inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
inspect_ai/util/_sandbox/events.py +3 -2
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/METADATA +9 -2
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/RECORD +75 -46
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/WHEEL +1 -1
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.94.dist-info → inspect_ai-0.3.96.dist-info}/top_level.txt +0 -0

inspect_ai/_eval/loader.py CHANGED Viewed

@@ -428,7 +428,7 @@ def solver_from_spec(spec: SolverSpec) -> Solver:
                 return as_solver(agent)
             else:
                 raise ValueError(
-                    f"Unkonwn solver {solver_name} (not registered as a @solver or @agent)"
+                    f"Unknown solver {solver_name} (not registered as a @solver or @agent)"
                 )
         # we do have a solver file

inspect_ai/_eval/task/run.py CHANGED Viewed

@@ -27,6 +27,7 @@ from inspect_ai._util.constants import (
 )
 from inspect_ai._util.datetime import iso_now
 from inspect_ai._util.error import exception_message
+from inspect_ai._util.exception import TerminateSampleError
 from inspect_ai._util.hooks import send_telemetry
 from inspect_ai._util.json import to_json_str_safe
 from inspect_ai._util.registry import (
@@ -35,6 +36,7 @@ from inspect_ai._util.registry import (
     registry_unqualified_name,
 )
 from inspect_ai._util.working import (
+    end_sample_working_limit,
     init_sample_working_limit,
     sample_waiting_time,
 )
@@ -639,10 +641,11 @@ async def task_run_sample(
                 ) = contextlib.nullcontext()
                 try:
                     # update active sample wth sandboxes now that we are initialised
-                    active.sandboxes = await sandbox_connections()
-                    # end init
-                    await init_span.__aexit__(None, None, None)
+                    # (ensure that we still exit init context in presence of sandbox error)
+                    try:
+                        active.sandboxes = await sandbox_connections()
+                    finally:
+                        await init_span.__aexit__(None, None, None)
                     # initialise timeout context manager
                     timeout_cm = (
@@ -674,6 +677,9 @@ async def task_run_sample(
                         # set progress for plan then run it
                         state = await plan(state, generate)
+                    # disable sample working limit after execution
+                    end_sample_working_limit()
                 except TimeoutError:
                     if time_limit is not None:
                         transcript()._event(
@@ -715,7 +721,7 @@ async def task_run_sample(
                         # handle the cancel exception
                         raise
-                except LimitExceededError:
+                except (LimitExceededError, TerminateSampleError):
                     # capture most recent state for scoring
                     state = sample_state() or state
@@ -925,7 +931,7 @@ async def log_sample(
         input=sample.input,
         choices=sample.choices,
         target=sample.target,
-        metadata=sample.metadata or {},
+        metadata=state.metadata or {},
         sandbox=sample.sandbox,
         files=list(sample.files.keys()) if sample.files else None,
         setup=sample.setup,

inspect_ai/_util/exception.py ADDED Viewed

@@ -0,0 +1,4 @@
+class TerminateSampleError(RuntimeError):
+    def __init__(self, reason: str) -> None:
+        self.reason = reason
+        super().__init__(reason)

inspect_ai/_util/hash.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import hashlib
 import mmh3
@@ -7,3 +9,40 @@ def mm3_hash(message: str) -> str:
     # Convert to unsigned integers and then to hexadecimal
     return f"{h1 & 0xFFFFFFFFFFFFFFFF:016x}{h2 & 0xFFFFFFFFFFFFFFFF:016x}"
+def base57_id_hash(content: str) -> str:
+    """Generate base67 hash for content.
+    Hash the content, truncate to 128 bits, and then further truncate to 93 bits,
+    returning a 22-character Base-57-URL string. Collision probability reaches 50%
+    at approximately 70 trillion items.
+    """
+    digest_size = 16  # 128 bits
+    digest = hashlib.blake2s(content.encode(), digest_size=digest_size).digest()
+    # Truncate to ~93 bits (log₂57^22 ≈ 128.3)
+    as_int = int.from_bytes(digest, "big")
+    base57_str = to_base57(as_int)
+    if len(base57_str) > 22:
+        return base57_str[-22:]  # Take last 22 chars if longer
+    else:
+        # This is unlikely with a 128-bit input
+        return base57_str.rjust(22, ALPHABET57[0])
+# shortuuid uses these 57 characters (excluding similar-looking characters like 0/O, 1/I/l, etc.)
+ALPHABET57 = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
+def to_base57(n: int) -> str:
+    if n == 0:
+        return ALPHABET57[0]
+    out = []
+    while n:
+        n, rem = divmod(n, 57)
+        out.append(ALPHABET57[rem])
+    # reverse and return
+    return "".join(reversed(out))

inspect_ai/_util/local_server.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import os
+import platform
 import random
 import socket
 import subprocess
@@ -33,6 +34,21 @@ def reserve_port(
     Returns:
         A tuple (port, lock_socket) where `lock_socket` is kept open to hold the lock.
     """
+    is_macos = platform.system() == "Darwin"
+    if is_macos:
+        logger.info(
+            "MacOS system detected. A free binding port will be identified, but not reserved until the server binds to it."
+        )
+        # On macOS, let the OS pick a free port but not open it
+        # It leads to a small racode condition window until the port
+        # is actually opened by the llm server
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+            s.bind((host, 0))  # Bind to any free port
+            port = s.getsockname()[1]
+        return port, s
+    # Non-macOS behavior: try ports in range
     candidates = list(range(start, end))
     random.shuffle(candidates)

inspect_ai/_util/path.py CHANGED Viewed

@@ -6,6 +6,10 @@ from copy import deepcopy
 from pathlib import PurePath
 from typing import Any, Iterator, overload
+from fsspec.implementations.local import LocalFileSystem  # type: ignore
+from inspect_ai._util.file import filesystem
 @contextmanager
 def add_to_path(p: str) -> Iterator[None]:
@@ -98,6 +102,24 @@ def cwd_relative_path(file: str | None, walk_up: bool = False) -> str | None:
         return None
+def pretty_path(file: str) -> str:
+    fs = filesystem(file)
+    if fs.is_local():
+        file = LocalFileSystem._strip_protocol(file)
+        return cwd_relative_path(file)
+    else:
+        return file
+def native_path(file: str) -> str:
+    fs = filesystem(file)
+    if fs.is_local():
+        file = LocalFileSystem._strip_protocol(file)
+        return file
+    else:
+        return file
 # A slightly modified implementation of task_path.relative(d, walk_up=True)
 # since that wasn't introduced until python 3.12
 def relative_walk(from_path: PurePath, to_path: PurePath) -> str:

inspect_ai/_util/trace.py CHANGED Viewed

@@ -287,7 +287,7 @@ def rotate_trace_files() -> None:
         rotate_files = list_trace_files()[10:]
         for file in rotate_files:
             file.file.unlink(missing_ok=True)
-    except FileNotFoundError:
+    except (FileNotFoundError, OSError):
         pass

inspect_ai/_util/working.py CHANGED Viewed

@@ -10,6 +10,10 @@ def init_sample_working_limit(start_time: float, working_limit: float | None) ->
     _sample_waiting_time.set(0)
+def end_sample_working_limit() -> None:
+    _sample_working_limit.set(None)
 def sample_waiting_time() -> float:
     return _sample_waiting_time.get()

inspect_ai/_view/www/dist/assets/index.css CHANGED Viewed

@@ -15489,34 +15489,34 @@ pre[class*="language-"] {
   padding: 0.1rem 0.6rem;
   border-radius: var(--bs-border-radius);
 }
-._expandableBordered_1wpxz_1 {
+._expandableBordered_59eal_1 {
   border: solid var(--bs-light-border-subtle) 1px;
 }
-._expandableTogglable_1wpxz_5 {
+._expandableTogglable_59eal_5 {
   margin-bottom: 1em;
 }
-._expandableContents_1wpxz_9 {
+._expandableContents_59eal_9 {
   font-size: var(--inspect-font-size-base);
 }
-._expandableCollapsed_1wpxz_13 {
+._expandableCollapsed_59eal_13 {
   overflow: hidden;
 }
-._moreToggle_1wpxz_17 {
+._moreToggle_59eal_17 {
   display: flex;
   margin-top: 0;
   position: relative;
-  height: 8px;
+  height: 18px;
 }
-._moreToggle_1wpxz_17._bordered_1wpxz_24 {
+._moreToggle_59eal_17._bordered_59eal_24 {
   border-top: solid var(--bs-light-border-subtle) 1px;
 }
-._moreToggleContainer_1wpxz_28 {
+._moreToggleContainer_59eal_28 {
   position: absolute;
   top: -1px;
   right: 0;
@@ -15527,7 +15527,7 @@ pre[class*="language-"] {
   margin-right: 0;
 }
-._moreToggleButton_1wpxz_39 {
+._moreToggleButton_59eal_39 {
   font-size: var(--inspect-font-size-smaller);
   border: none;
   padding: 0.1rem 0.5rem;

inspect-ai 0.3.94__py3-none-any.whl → 0.3.96__py3-none-any.whl

inspect-ai 0.3.94py3-none-any.whl → 0.3.96py3-none-any.whl