PyPI - inspect-ai - Versions diffs - 0.3.93__py3-none-any.whl → 0.3.95__py3-none-any.whl - Mend

inspect-ai 0.3.93py3-none-any.whl → 0.3.95py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (115) hide show

inspect_ai/_display/textual/widgets/samples.py +3 -3
inspect_ai/_display/textual/widgets/transcript.py +3 -29
inspect_ai/_eval/loader.py +1 -1
inspect_ai/_eval/task/run.py +21 -12
inspect_ai/_util/answer.py +26 -0
inspect_ai/_util/constants.py +0 -1
inspect_ai/_util/exception.py +4 -0
inspect_ai/_util/hash.py +39 -0
inspect_ai/_util/local_server.py +51 -21
inspect_ai/_util/path.py +22 -0
inspect_ai/_util/trace.py +1 -1
inspect_ai/_util/working.py +4 -0
inspect_ai/_view/www/dist/assets/index.css +23 -22
inspect_ai/_view/www/dist/assets/index.js +517 -204
inspect_ai/_view/www/log-schema.json +375 -0
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/@types/log.d.ts +90 -12
inspect_ai/_view/www/src/app/log-view/navbar/SecondaryBar.tsx +2 -2
inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +1 -4
inspect_ai/_view/www/src/app/samples/SamplesTools.tsx +3 -13
inspect_ai/_view/www/src/app/samples/sample-tools/SelectScorer.tsx +45 -48
inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +16 -15
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +47 -75
inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +9 -9
inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
inspect_ai/_view/www/src/app/types.ts +12 -2
inspect_ai/_view/www/src/components/ExpandablePanel.module.css +1 -1
inspect_ai/_view/www/src/components/ExpandablePanel.tsx +5 -5
inspect_ai/_view/www/src/state/hooks.ts +19 -3
inspect_ai/_view/www/src/state/logSlice.ts +23 -5
inspect_ai/_view/www/yarn.lock +9 -9
inspect_ai/agent/_as_solver.py +3 -1
inspect_ai/agent/_as_tool.py +6 -4
inspect_ai/agent/_bridge/patch.py +1 -3
inspect_ai/agent/_handoff.py +5 -1
inspect_ai/agent/_react.py +4 -3
inspect_ai/agent/_run.py +6 -1
inspect_ai/agent/_types.py +9 -0
inspect_ai/analysis/__init__.py +0 -0
inspect_ai/analysis/beta/__init__.py +57 -0
inspect_ai/analysis/beta/_dataframe/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/columns.py +145 -0
inspect_ai/analysis/beta/_dataframe/evals/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/evals/columns.py +132 -0
inspect_ai/analysis/beta/_dataframe/evals/extract.py +23 -0
inspect_ai/analysis/beta/_dataframe/evals/table.py +140 -0
inspect_ai/analysis/beta/_dataframe/events/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/events/columns.py +37 -0
inspect_ai/analysis/beta/_dataframe/events/table.py +14 -0
inspect_ai/analysis/beta/_dataframe/extract.py +54 -0
inspect_ai/analysis/beta/_dataframe/messages/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/messages/columns.py +60 -0
inspect_ai/analysis/beta/_dataframe/messages/extract.py +21 -0
inspect_ai/analysis/beta/_dataframe/messages/table.py +87 -0
inspect_ai/analysis/beta/_dataframe/record.py +377 -0
inspect_ai/analysis/beta/_dataframe/samples/__init__.py +0 -0
inspect_ai/analysis/beta/_dataframe/samples/columns.py +73 -0
inspect_ai/analysis/beta/_dataframe/samples/extract.py +82 -0
inspect_ai/analysis/beta/_dataframe/samples/table.py +329 -0
inspect_ai/analysis/beta/_dataframe/util.py +157 -0
inspect_ai/analysis/beta/_dataframe/validate.py +171 -0
inspect_ai/dataset/_dataset.py +6 -3
inspect_ai/log/__init__.py +10 -0
inspect_ai/log/_convert.py +4 -9
inspect_ai/log/_file.py +1 -1
inspect_ai/log/_log.py +21 -1
inspect_ai/log/_samples.py +14 -17
inspect_ai/log/_transcript.py +77 -35
inspect_ai/log/_tree.py +118 -0
inspect_ai/model/_call_tools.py +44 -35
inspect_ai/model/_model.py +51 -44
inspect_ai/model/_openai_responses.py +17 -18
inspect_ai/model/_providers/anthropic.py +30 -5
inspect_ai/model/_providers/hf.py +27 -1
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/model/_providers/sglang.py +8 -2
inspect_ai/model/_providers/vllm.py +6 -2
inspect_ai/scorer/_choice.py +1 -2
inspect_ai/solver/_chain.py +1 -1
inspect_ai/solver/_fork.py +1 -1
inspect_ai/solver/_multiple_choice.py +9 -23
inspect_ai/solver/_plan.py +2 -2
inspect_ai/solver/_task_state.py +7 -3
inspect_ai/solver/_transcript.py +6 -7
inspect_ai/tool/_mcp/_context.py +3 -5
inspect_ai/tool/_mcp/_mcp.py +6 -5
inspect_ai/tool/_mcp/server.py +1 -1
inspect_ai/tool/_tools/_execute.py +4 -1
inspect_ai/tool/_tools/_think.py +1 -1
inspect_ai/tool/_tools/_web_search/__init__.py +3 -0
inspect_ai/tool/_tools/{_web_search.py → _web_search/_google.py} +56 -103
inspect_ai/tool/_tools/_web_search/_tavily.py +77 -0
inspect_ai/tool/_tools/_web_search/_web_search.py +85 -0
inspect_ai/util/__init__.py +4 -0
inspect_ai/util/_anyio.py +11 -0
inspect_ai/util/_collect.py +50 -0
inspect_ai/util/_sandbox/events.py +3 -2
inspect_ai/util/_span.py +58 -0
inspect_ai/util/_subtask.py +27 -42
{inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/METADATA +8 -1
{inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/RECORD +114 -82
{inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/WHEEL +1 -1
inspect_ai/_display/core/group.py +0 -79
{inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.93.dist-info → inspect_ai-0.3.95.dist-info}/top_level.txt +0 -0

inspect_ai/_display/textual/widgets/samples.py CHANGED Viewed

@@ -591,10 +591,10 @@ class SampleToolbar(Horizontal):
                 )
                 if isinstance(last_event, ModelEvent):
                     # see if there are retries in play
-                    if sample.retry_count > 0:
-                        suffix = "retry" if sample.retry_count == 1 else "retries"
+                    if last_event.retries:
+                        suffix = "retry" if last_event.retries == 1 else "retries"
                         pending_caption_text = (
-                            f"Generating ({sample.retry_count:,} {suffix})..."
+                            f"Generating ({last_event.retries:,} {suffix})..."
                         )
                     else:
                         pending_caption_text = "Generating..."

inspect_ai/_display/textual/widgets/transcript.py CHANGED Viewed

@@ -30,7 +30,7 @@ from inspect_ai.log._transcript import (
     SampleInitEvent,
     SampleLimitEvent,
     ScoreEvent,
-    StepEvent,
+    SpanBeginEvent,
     SubtaskEvent,
     ToolEvent,
 )
@@ -211,10 +211,6 @@ def render_tool_event(event: ToolEvent) -> list[EventDisplay]:
     # render the call
     content = transcript_tool_call(event)
-    # render sub-events
-    if event.events:
-        content.extend(render_sub_events(event.events))
     # render the output
     if isinstance(event.result, list):
         result: ToolResult = "\n".join(
@@ -235,23 +231,6 @@ def render_tool_event(event: ToolEvent) -> list[EventDisplay]:
     return [EventDisplay("tool call", Group(*content))]
-def render_step_event(event: StepEvent) -> EventDisplay:
-    if event.type == "solver":
-        return render_solver_event(event)
-    if event.type == "scorer":
-        return render_scorer_event(event)
-    else:
-        return EventDisplay(step_title(event))
-def render_solver_event(event: StepEvent) -> EventDisplay:
-    return EventDisplay(step_title(event))
-def render_scorer_event(event: StepEvent) -> EventDisplay:
-    return EventDisplay(step_title(event))
 def render_score_event(event: ScoreEvent) -> EventDisplay:
     table = Table(box=None, show_header=False)
     table.add_column("", min_width=10, justify="left")
@@ -272,10 +251,6 @@ def render_subtask_event(event: SubtaskEvent) -> list[EventDisplay]:
     # render header
     content: list[RenderableType] = [transcript_function(event.name, event.input)]
-    # render sub-events
-    if event.events:
-        content.extend(render_sub_events(event.events))
     if event.result:
         content.append(Text())
         if isinstance(event.result, str | int | float | bool | None):
@@ -345,8 +320,8 @@ def render_message(message: ChatMessage) -> list[RenderableType]:
     return content
-def step_title(event: StepEvent) -> str:
-    return f"{event.type or 'step'}: {event.name}"
+def span_title(event: SpanBeginEvent) -> str:
+    return f"{event.type or 'span'}: {event.name}"
 EventRenderer = Callable[[Any], EventDisplay | list[EventDisplay] | None]
@@ -354,7 +329,6 @@ EventRenderer = Callable[[Any], EventDisplay | list[EventDisplay] | None]
 _renderers: list[tuple[Type[Event], EventRenderer]] = [
     (SampleInitEvent, render_sample_init_event),
     (SampleLimitEvent, render_sample_limit_event),
-    (StepEvent, render_step_event),
     (ModelEvent, render_model_event),
     (ToolEvent, render_tool_event),
     (SubtaskEvent, render_subtask_event),

inspect_ai/_eval/loader.py CHANGED Viewed

@@ -428,7 +428,7 @@ def solver_from_spec(spec: SolverSpec) -> Solver:
                 return as_solver(agent)
             else:
                 raise ValueError(
-                    f"Unkonwn solver {solver_name} (not registered as a @solver or @agent)"
+                    f"Unknown solver {solver_name} (not registered as a @solver or @agent)"
                 )
         # we do have a solver file

inspect_ai/_eval/task/run.py CHANGED Viewed

@@ -24,10 +24,10 @@ from inspect_ai._util._async import tg_collect
 from inspect_ai._util.constants import (
     DEFAULT_EPOCHS,
     DEFAULT_MAX_CONNECTIONS,
-    SAMPLE_SUBTASK,
 )
 from inspect_ai._util.datetime import iso_now
 from inspect_ai._util.error import exception_message
+from inspect_ai._util.exception import TerminateSampleError
 from inspect_ai._util.hooks import send_telemetry
 from inspect_ai._util.json import to_json_str_safe
 from inspect_ai._util.registry import (
@@ -36,6 +36,7 @@ from inspect_ai._util.registry import (
     registry_unqualified_name,
 )
 from inspect_ai._util.working import (
+    end_sample_working_limit,
     init_sample_working_limit,
     sample_waiting_time,
 )
@@ -65,8 +66,8 @@ from inspect_ai.log._transcript import (
     SampleInitEvent,
     SampleLimitEvent,
     ScoreEvent,
-    StepEvent,
     Transcript,
+    init_transcript,
     transcript,
 )
 from inspect_ai.model import (
@@ -91,7 +92,8 @@ from inspect_ai.solver._task_state import sample_state, set_sample_state, state_
 from inspect_ai.util._limit import LimitExceededError
 from inspect_ai.util._sandbox.context import sandbox_connections
 from inspect_ai.util._sandbox.environment import SandboxEnvironmentSpec
-from inspect_ai.util._subtask import init_subtask
+from inspect_ai.util._span import span
+from inspect_ai.util._store import init_subtask_store
 from ..context import init_task_context
 from ..task import Task
@@ -558,7 +560,9 @@ async def task_run_sample(
     # initialise subtask and scoring context
     init_sample_model_usage()
     set_sample_state(state)
-    sample_transcript: Transcript = init_subtask(SAMPLE_SUBTASK, state.store)
+    sample_transcript = Transcript()
+    init_transcript(sample_transcript)
+    init_subtask_store(state.store)
     if logger:
         sample_transcript._subscribe(
             lambda event: logger.log_sample_event(sample_id, state.epoch, event)
@@ -617,7 +621,8 @@ async def task_run_sample(
         results: dict[str, SampleScore] = {}
         try:
             # begin init
-            transcript()._event(StepEvent(action="begin", name="init"))
+            init_span = span("init", type="init")
+            await init_span.__aenter__()
             # sample init event (remove file bodies as they have content or absolute paths)
             event_sample = sample.model_copy(
@@ -636,10 +641,11 @@ async def task_run_sample(
                 ) = contextlib.nullcontext()
                 try:
                     # update active sample wth sandboxes now that we are initialised
-                    active.sandboxes = await sandbox_connections()
-                    # end init
-                    transcript()._event(StepEvent(action="end", name="init"))
+                    # (ensure that we still exit init context in presence of sandbox error)
+                    try:
+                        active.sandboxes = await sandbox_connections()
+                    finally:
+                        await init_span.__aexit__(None, None, None)
                     # initialise timeout context manager
                     timeout_cm = (
@@ -671,6 +677,9 @@ async def task_run_sample(
                         # set progress for plan then run it
                         state = await plan(state, generate)
+                    # disable sample working limit after execution
+                    end_sample_working_limit()
                 except TimeoutError:
                     if time_limit is not None:
                         transcript()._event(
@@ -712,7 +721,7 @@ async def task_run_sample(
                         # handle the cancel exception
                         raise
-                except LimitExceededError:
+                except (LimitExceededError, TerminateSampleError):
                     # capture most recent state for scoring
                     state = sample_state() or state
@@ -742,7 +751,7 @@ async def task_run_sample(
                                 scorer_name = unique_scorer_name(
                                     scorer, list(results.keys())
                                 )
-                                with transcript().step(name=scorer_name, type="scorer"):
+                                async with span(name=scorer_name, type="scorer"):
                                     score_result = (
                                         await scorer(state, Target(sample.target))
                                         if scorer
@@ -922,7 +931,7 @@ async def log_sample(
         input=sample.input,
         choices=sample.choices,
         target=sample.target,
-        metadata=sample.metadata or {},
+        metadata=state.metadata or {},
         sandbox=sample.sandbox,
         files=list(sample.files.keys()) if sample.files else None,
         setup=sample.setup,

inspect_ai/_util/answer.py ADDED Viewed

@@ -0,0 +1,26 @@
+def answer_character(index: int) -> str:
+    r"""
+    Helper to go from array index to char, for example:
+        0 -> 'A', 1 -> 'B', etc
+    """
+    if index < 26:
+        return chr(ord("A") + index)
+    else:
+        return str(index - 25)
+def answer_index(char: str) -> int:
+    r"""
+    Helper to go from char to array index, for example:
+        'A' -> 0, 'B' -> 1, etc
+    """
+    if char.isalpha() or char == "," or char == " ":
+        return ord(char.upper()) - ord("A")
+    elif char.isnumeric():
+        return 25 + int(char)
+    else:
+        raise ValueError(
+            f"Unepxected multiple choice answer: {char} (must be a letter or number)"
+        )

inspect_ai/_util/constants.py CHANGED Viewed

@@ -34,7 +34,6 @@ EVAL_LOG_FORMAT = "eval"
 DEFAULT_DISPLAY = "full"
 LOG_SCHEMA_VERSION = 2
 SCORED_SUFFIX = "-scored"
-SAMPLE_SUBTASK = "sample"
 CONSOLE_DISPLAY_WIDTH = 120
 BASE_64_DATA_REMOVED = "<base64-data-removed>"
 SANDBOX_SETUP_TIMEOUT = 300

inspect_ai/_util/exception.py ADDED Viewed

@@ -0,0 +1,4 @@
+class TerminateSampleError(RuntimeError):
+    def __init__(self, reason: str) -> None:
+        self.reason = reason
+        super().__init__(reason)

inspect_ai/_util/hash.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import hashlib
 import mmh3
@@ -7,3 +9,40 @@ def mm3_hash(message: str) -> str:
     # Convert to unsigned integers and then to hexadecimal
     return f"{h1 & 0xFFFFFFFFFFFFFFFF:016x}{h2 & 0xFFFFFFFFFFFFFFFF:016x}"
+def base57_id_hash(content: str) -> str:
+    """Generate base67 hash for content.
+    Hash the content, truncate to 128 bits, and then further truncate to 93 bits,
+    returning a 22-character Base-57-URL string. Collision probability reaches 50%
+    at approximately 70 trillion items.
+    """
+    digest_size = 16  # 128 bits
+    digest = hashlib.blake2s(content.encode(), digest_size=digest_size).digest()
+    # Truncate to ~93 bits (log₂57^22 ≈ 128.3)
+    as_int = int.from_bytes(digest, "big")
+    base57_str = to_base57(as_int)
+    if len(base57_str) > 22:
+        return base57_str[-22:]  # Take last 22 chars if longer
+    else:
+        # This is unlikely with a 128-bit input
+        return base57_str.rjust(22, ALPHABET57[0])
+# shortuuid uses these 57 characters (excluding similar-looking characters like 0/O, 1/I/l, etc.)
+ALPHABET57 = "23456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
+def to_base57(n: int) -> str:
+    if n == 0:
+        return ALPHABET57[0]
+    out = []
+    while n:
+        n, rem = divmod(n, 57)
+        out.append(ALPHABET57[rem])
+    # reverse and return
+    return "".join(reversed(out))

inspect_ai/_util/local_server.py CHANGED Viewed

@@ -62,16 +62,24 @@ def release_port(lock_socket: socket.socket) -> None:
         logger.error(f"Error closing socket: {e}")
-def execute_shell_command(command: list[str]) -> subprocess.Popen[str]:
+def execute_shell_command(
+    command: list[str], env: Optional[dict[str, str]] = None
+) -> subprocess.Popen[str]:
     """
     Execute a command and return its process handle.
     Args:
         command: List of command arguments
+        env: Optional environment variables to pass to the subprocess
     Returns:
         A subprocess.Popen object representing the running process
     """
+    # Create a process environment by copying current environment and updating with new values
+    process_env = os.environ.copy()
+    if env:
+        process_env.update(env)
     # Create a process that redirects output to pipes so we can capture it
     process = subprocess.Popen(
         command,
@@ -79,6 +87,7 @@ def execute_shell_command(command: list[str]) -> subprocess.Popen[str]:
         stdout=subprocess.PIPE,
         stderr=subprocess.PIPE,
         bufsize=1,  # Line buffered
+        env=process_env,  # Pass the environment variables
     )
     # Set up background thread to read and log stdout
@@ -134,7 +143,10 @@ def kill_process_tree(pid: int) -> None:
 def launch_server_cmd(
-    command: list[str], host: str = "0.0.0.0", port: Optional[int] = None
+    command: list[str],
+    host: str = "0.0.0.0",
+    port: Optional[int] = None,
+    env: Optional[dict[str, str]] = None,
 ) -> Tuple[subprocess.Popen[str], int, list[str]]:
     """
     Launch a server process with the given base command and return the process, port, and full command.
@@ -143,6 +155,7 @@ def launch_server_cmd(
         command: Base command to execute
         host: Host to bind to
         port: Port to bind to. If None, a free port is reserved.
+        env: Optional environment variables to pass to the subprocess
     Returns:
         Tuple of (process, port, full_command)
@@ -155,7 +168,7 @@ def launch_server_cmd(
     full_command = command + ["--port", str(port)]
     logger.info(f"Launching server on port {port}")
-    process = execute_shell_command(full_command)
+    process = execute_shell_command(full_command, env=env)
     if lock_socket is not None:
         process_socket_map[process] = lock_socket
@@ -181,6 +194,7 @@ def wait_for_server(
     base_url: str,
     process: subprocess.Popen[str],
     full_command: Optional[list[str]] = None,
+    env: Optional[dict[str, str]] = None,
     timeout: Optional[int] = None,
     api_key: Optional[str] = None,
 ) -> None:
@@ -191,6 +205,7 @@ def wait_for_server(
         base_url: The base URL of the server
         process: The subprocess running the server
         full_command: The full command used to launch the server
+        env: The environment variables to use for the request
         timeout: Maximum time to wait in seconds. None means wait forever.
         api_key: The API key to use for the request
     """
@@ -198,7 +213,10 @@ def wait_for_server(
     start_time = time.time()
     debug_advice = "Try rerunning with '--log-level debug' to see the full traceback."
     if full_command:
-        debug_advice += f" Alternatively, you can run the following launch command manually to see the full traceback:\n\n{' '.join(full_command)}\n\n"
+        debug_advice += " Alternatively, you can run the following launch command manually to see the full traceback:\n\n"
+        if env:
+            debug_advice += " ".join([f"{k}={v}" for k, v in env.items()]) + " "
+        debug_advice += " ".join(full_command) + "\n\n"
     while True:
         # Check for timeout first
@@ -245,6 +263,7 @@ def start_local_server(
     server_type: str = "server",
     timeout: Optional[int] = DEFAULT_TIMEOUT,
     server_args: Optional[dict[str, Any]] = None,
+    env: Optional[dict[str, str]] = None,
 ) -> Tuple[str, subprocess.Popen[str], int]:
     """
     Start a server with the given command and handle potential errors.
@@ -257,6 +276,7 @@ def start_local_server(
         server_type: Type of server being started (for error messages)
         timeout: Maximum time to wait for server to become ready
         server_args: Additional server arguments to pass to the command
+        env: Optional environment variables to pass to the subprocess
     Returns:
         Tuple of (base_url, process, port)
@@ -266,15 +286,22 @@ def start_local_server(
     full_command = base_cmd
     server_process = None
+    # Initialize environment variables if not provided
+    process_env = {} if env is None else env.copy()
     if server_args:
         for key, value in server_args.items():
             # Convert Python style args (underscore) to CLI style (dash)
             cli_key = key.replace("_", "-")
-            full_command.extend([f"--{cli_key}", str(value)])
+            if value == "":
+                # If the value is empty, just add the flag
+                full_command.extend([f"--{cli_key}"])
+            else:
+                full_command.extend([f"--{cli_key}", str(value)])
     try:
         server_process, found_port, full_command = launch_server_cmd(
-            full_command, host=host, port=port
+            full_command, host=host, port=port, env=process_env
         )
         base_url = f"http://localhost:{found_port}/v1"
         wait_for_server(
@@ -283,6 +310,7 @@ def start_local_server(
             api_key=api_key,
             timeout=timeout,
             full_command=full_command,
+            env=process_env,
         )
         return base_url, server_process, found_port
     except Exception as e:
@@ -330,17 +358,18 @@ def merge_env_server_args(
 def configure_devices(
     server_args: dict[str, Any], parallel_size_param: str = "tensor_parallel_size"
-) -> dict[str, Any]:
-    """Configure device settings and return updated server args.
+) -> tuple[dict[str, Any], dict[str, str]]:
+    """Configure device settings and return updated server args and environment variables.
     Args:
         server_args: Dictionary of server arguments
         parallel_size_param: Name of parameter to set with device count if not specified
     Returns:
-        Updated server arguments dict
+        Tuple of (updated server arguments dict, environment variables dict)
     """
     result = server_args.copy()
+    env_vars = {}
     devices = None
     if "device" in result and "devices" in result:
@@ -350,19 +379,20 @@ def configure_devices(
     elif "device" in result:
         devices = result.pop("device")
-    # Convert device list to comma-separated string if needed
-    if isinstance(devices, list):
-        device_str = ",".join(map(str, devices))
-    else:
-        device_str = str(devices)
+    if devices is not None:
+        # Convert device list to comma-separated string if needed
+        if isinstance(devices, list):
+            device_str = ",".join(map(str, devices))
+        else:
+            device_str = str(devices)
-    # Set CUDA_VISIBLE_DEVICES environment variable
-    os.environ["CUDA_VISIBLE_DEVICES"] = device_str
+        # Add to env_vars instead of setting os.environ directly
+        env_vars["CUDA_VISIBLE_DEVICES"] = device_str
-    device_count = len(device_str.split(","))
+        device_count = len(device_str.split(","))
-    # Set parallel size parameter if not explicitly provided
-    if parallel_size_param not in result:
-        result[parallel_size_param] = device_count
+        # Set parallel size parameter if not explicitly provided
+        if parallel_size_param not in result:
+            result[parallel_size_param] = device_count
-    return result
+    return result, env_vars

inspect_ai/_util/path.py CHANGED Viewed

@@ -6,6 +6,10 @@ from copy import deepcopy
 from pathlib import PurePath
 from typing import Any, Iterator, overload
+from fsspec.implementations.local import LocalFileSystem  # type: ignore
+from inspect_ai._util.file import filesystem
 @contextmanager
 def add_to_path(p: str) -> Iterator[None]:
@@ -98,6 +102,24 @@ def cwd_relative_path(file: str | None, walk_up: bool = False) -> str | None:
         return None
+def pretty_path(file: str) -> str:
+    fs = filesystem(file)
+    if fs.is_local():
+        file = LocalFileSystem._strip_protocol(file)
+        return cwd_relative_path(file)
+    else:
+        return file
+def native_path(file: str) -> str:
+    fs = filesystem(file)
+    if fs.is_local():
+        file = LocalFileSystem._strip_protocol(file)
+        return file
+    else:
+        return file
 # A slightly modified implementation of task_path.relative(d, walk_up=True)
 # since that wasn't introduced until python 3.12
 def relative_walk(from_path: PurePath, to_path: PurePath) -> str:

inspect_ai/_util/trace.py CHANGED Viewed

@@ -287,7 +287,7 @@ def rotate_trace_files() -> None:
         rotate_files = list_trace_files()[10:]
         for file in rotate_files:
             file.file.unlink(missing_ok=True)
-    except FileNotFoundError:
+    except (FileNotFoundError, OSError):
         pass

inspect_ai/_util/working.py CHANGED Viewed

@@ -10,6 +10,10 @@ def init_sample_working_limit(start_time: float, working_limit: float | None) ->
     _sample_waiting_time.set(0)
+def end_sample_working_limit() -> None:
+    _sample_working_limit.set(None)
 def sample_waiting_time() -> float:
     return _sample_waiting_time.get()

inspect_ai/_view/www/dist/assets/index.css CHANGED Viewed

@@ -15489,34 +15489,34 @@ pre[class*="language-"] {
   padding: 0.1rem 0.6rem;
   border-radius: var(--bs-border-radius);
 }
-._expandableBordered_1wpxz_1 {
+._expandableBordered_59eal_1 {
   border: solid var(--bs-light-border-subtle) 1px;
 }
-._expandableTogglable_1wpxz_5 {
+._expandableTogglable_59eal_5 {
   margin-bottom: 1em;
 }
-._expandableContents_1wpxz_9 {
+._expandableContents_59eal_9 {
   font-size: var(--inspect-font-size-base);
 }
-._expandableCollapsed_1wpxz_13 {
+._expandableCollapsed_59eal_13 {
   overflow: hidden;
 }
-._moreToggle_1wpxz_17 {
+._moreToggle_59eal_17 {
   display: flex;
   margin-top: 0;
   position: relative;
-  height: 8px;
+  height: 18px;
 }
-._moreToggle_1wpxz_17._bordered_1wpxz_24 {
+._moreToggle_59eal_17._bordered_59eal_24 {
   border-top: solid var(--bs-light-border-subtle) 1px;
 }
-._moreToggleContainer_1wpxz_28 {
+._moreToggleContainer_59eal_28 {
   position: absolute;
   top: -1px;
   right: 0;
@@ -15527,7 +15527,7 @@ pre[class*="language-"] {
   margin-right: 0;
 }
-._moreToggleButton_1wpxz_39 {
+._moreToggleButton_59eal_39 {
   font-size: var(--inspect-font-size-smaller);
   border: none;
   padding: 0.1rem 0.5rem;
@@ -17342,37 +17342,38 @@ pre[class*="language-"] {
 ._metadata_1a3fk_21 {
   margin: 0.5em 0;
 }
-._contents_iwnfd_1 {
+._contents_1irga_1 {
   margin-top: 0.5em;
 }
-._contents_iwnfd_1 > :last-child {
+._contents_1irga_1 > :last-child {
   margin-bottom: 0;
 }
-._twoColumn_iwnfd_9 {
+._twoColumn_1irga_9 {
   display: grid;
   grid-template-columns: auto 1fr;
   column-gap: 1.5em;
 }
-._exec_iwnfd_15 {
-  margin-top: 0.5em;
+._exec_1irga_15 {
+  margin-top: 0;
 }
-._result_iwnfd_19 {
+._result_1irga_19 {
   margin-top: 0.5em;
 }
-._fileLabel_iwnfd_23 {
+._fileLabel_1irga_23 {
   margin-top: 0;
   margin-bottom: 0;
 }
-._wrapPre_iwnfd_28 {
+._wrapPre_1irga_28 {
   white-space: pre-wrap;
   word-wrap: break-word;
   overflow-wrap: break-word;
+  margin-bottom: 0;
 }
 ._explanation_1ww42_1 {
   display: grid;
@@ -20001,20 +20002,20 @@ span.ap-marker-container:hover span.ap-marker {
   padding-top: 0rem;
   margin-top: -8px;
 }
-._darkenedBg_1sie6_1 {
+._darkenedBg_u9na2_1 {
   background-color: var(--bs-light-bg-subtle);
 }
-._normalBg_1sie6_5 {
+._normalBg_u9na2_5 {
   background-color: var(--bs-body-bg);
 }
-._node_1sie6_9 {
+._node_u9na2_9 {
   padding-top: 0.7rem;
-  padding-bottom: 0em;
+  padding-bottom: 1px;
 }
-._attached_1sie6_14 {
+._attached_u9na2_14 {
   padding-top: 0rem;
   margin-top: -8px;
 }

inspect-ai 0.3.93__py3-none-any.whl → 0.3.95__py3-none-any.whl

inspect-ai 0.3.93py3-none-any.whl → 0.3.95py3-none-any.whl