PyPI - inspect-ai - Versions diffs - 0.3.92__py3-none-any.whl → 0.3.93__py3-none-any.whl - Mend

inspect-ai 0.3.92py3-none-any.whl → 0.3.93py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

inspect_ai/_cli/eval.py +27 -0
inspect_ai/_eval/eval.py +19 -2
inspect_ai/_eval/evalset.py +4 -1
inspect_ai/_eval/run.py +41 -0
inspect_ai/_eval/task/generate.py +38 -44
inspect_ai/_eval/task/log.py +26 -28
inspect_ai/_eval/task/run.py +13 -20
inspect_ai/_util/local_server.py +368 -0
inspect_ai/_util/working.py +10 -4
inspect_ai/_view/www/dist/assets/index.css +159 -146
inspect_ai/_view/www/dist/assets/index.js +1020 -1061
inspect_ai/_view/www/log-schema.json +4 -3
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/@types/log.d.ts +3 -2
inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
inspect_ai/_view/www/src/components/Card.css +0 -1
inspect_ai/_view/www/src/constants.ts +2 -0
inspect_ai/_view/www/src/utils/numeric.ts +17 -0
inspect_ai/agent/_agent.py +3 -3
inspect_ai/agent/_as_solver.py +20 -12
inspect_ai/agent/_as_tool.py +15 -3
inspect_ai/agent/_handoff.py +8 -1
inspect_ai/agent/_run.py +11 -3
inspect_ai/log/__init__.py +4 -0
inspect_ai/log/_file.py +56 -0
inspect_ai/log/_log.py +99 -0
inspect_ai/log/_recorders/__init__.py +2 -0
inspect_ai/log/_recorders/buffer/database.py +12 -11
inspect_ai/log/_recorders/buffer/filestore.py +2 -2
inspect_ai/log/_recorders/buffer/types.py +2 -2
inspect_ai/log/_recorders/eval.py +20 -65
inspect_ai/log/_recorders/file.py +28 -6
inspect_ai/log/_recorders/recorder.py +7 -0
inspect_ai/log/_recorders/types.py +1 -23
inspect_ai/log/_samples.py +0 -8
inspect_ai/log/_transcript.py +7 -1
inspect_ai/log/_util.py +52 -0
inspect_ai/model/__init__.py +5 -1
inspect_ai/model/_call_tools.py +32 -12
inspect_ai/model/_generate_config.py +14 -8
inspect_ai/model/_model.py +21 -48
inspect_ai/model/_model_output.py +25 -0
inspect_ai/model/_openai.py +2 -0
inspect_ai/model/_providers/anthropic.py +13 -23
inspect_ai/model/_providers/openai_o1.py +8 -2
inspect_ai/model/_providers/providers.py +18 -4
inspect_ai/model/_providers/sglang.py +241 -0
inspect_ai/model/_providers/vllm.py +207 -400
inspect_ai/solver/__init__.py +7 -2
inspect_ai/solver/_basic_agent.py +3 -10
inspect_ai/solver/_task_state.py +26 -88
inspect_ai/tool/_json_rpc_helpers.py +45 -17
inspect_ai/tool/_mcp/_mcp.py +2 -0
inspect_ai/tool/_mcp/_sandbox.py +8 -2
inspect_ai/tool/_mcp/server.py +3 -1
inspect_ai/tool/_tool_call.py +4 -1
inspect_ai/tool/_tool_support_helpers.py +51 -12
inspect_ai/tool/_tools/_bash_session.py +190 -68
inspect_ai/tool/_tools/_computer/_computer.py +25 -1
inspect_ai/tool/_tools/_text_editor.py +4 -3
inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
inspect_ai/util/__init__.py +12 -0
inspect_ai/util/_limit.py +393 -0
inspect_ai/util/_limited_conversation.py +57 -0
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/METADATA +1 -1
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/RECORD +89 -108
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/WHEEL +1 -1
inspect_ai/solver/_limit.py +0 -39
inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/tool/_tools/_computer/test_args.py +0 -151
/inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.93.dist-info}/top_level.txt +0 -0

inspect_ai/_util/local_server.py ADDED Viewed

@@ -0,0 +1,368 @@
+import json
+import logging
+import os
+import random
+import socket
+import subprocess
+import time
+from typing import Any, Dict, Optional, Tuple
+import httpx
+# Set up logger for this module
+logger = logging.getLogger(__name__)
+# Global dictionary to keep track of process -> reserved port mappings
+process_socket_map = {}
+DEFAULT_TIMEOUT = 60 * 10  # fairly conservative default timeout of 10 minutes
+def reserve_port(
+    host: str, start: int = 30000, end: int = 40000
+) -> Tuple[int, socket.socket]:
+    """
+    Reserve an available port by trying to bind a socket.
+    Args:
+        host: Host to bind to
+        start: Minimum port number to try
+        end: Maximum port number to try
+    Returns:
+        A tuple (port, lock_socket) where `lock_socket` is kept open to hold the lock.
+    """
+    candidates = list(range(start, end))
+    random.shuffle(candidates)
+    for port in candidates:
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        try:
+            # Attempt to bind to the port on localhost
+            sock.bind((host, port))
+            return port, sock
+        except socket.error:
+            sock.close()  # Failed to bind, try next port
+            continue
+    raise RuntimeError("No free port available.")
+def release_port(lock_socket: socket.socket) -> None:
+    """
+    Release the reserved port by closing the lock socket.
+    Args:
+        lock_socket: The socket to close
+    """
+    try:
+        lock_socket.close()
+    except Exception as e:
+        logger.error(f"Error closing socket: {e}")
+def execute_shell_command(command: list[str]) -> subprocess.Popen[str]:
+    """
+    Execute a command and return its process handle.
+    Args:
+        command: List of command arguments
+    Returns:
+        A subprocess.Popen object representing the running process
+    """
+    # Create a process that redirects output to pipes so we can capture it
+    process = subprocess.Popen(
+        command,
+        text=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        bufsize=1,  # Line buffered
+    )
+    # Set up background thread to read and log stdout
+    def log_output() -> None:
+        if process.stdout is None:
+            return
+        for line in iter(process.stdout.readline, ""):
+            if line:
+                logger.debug(line.strip())
+        process.stdout.close()
+    # Set up background thread to read and log stderr
+    def log_error() -> None:
+        if process.stderr is None:
+            return
+        for line in iter(process.stderr.readline, ""):
+            if line:
+                logger.info(line.strip())
+        process.stderr.close()
+    # Start background threads to handle output
+    import threading
+    threading.Thread(target=log_output, daemon=True).start()
+    threading.Thread(target=log_error, daemon=True).start()
+    logger.info(f"Started server with command: {' '.join(command)}")
+    return process
+def kill_process_tree(pid: int) -> None:
+    """
+    Kill a process and all its children.
+    Args:
+        pid: Process ID to kill
+    """
+    try:
+        # Send SIGTERM
+        subprocess.run(["pkill", "-TERM", "-P", str(pid)], check=False)
+        subprocess.run(["kill", "-TERM", str(pid)], check=False)
+        time.sleep(1)
+        # If process still exists, send SIGKILL
+        try:
+            os.kill(pid, 0)  # Check if process exists
+            subprocess.run(["pkill", "-KILL", "-P", str(pid)], check=False)
+            subprocess.run(["kill", "-KILL", str(pid)], check=False)
+        except OSError:
+            pass  # Process already terminated
+    except Exception as e:
+        logger.error(f"Error killing process tree: {e}")
+def launch_server_cmd(
+    command: list[str], host: str = "0.0.0.0", port: Optional[int] = None
+) -> Tuple[subprocess.Popen[str], int, list[str]]:
+    """
+    Launch a server process with the given base command and return the process, port, and full command.
+    Args:
+        command: Base command to execute
+        host: Host to bind to
+        port: Port to bind to. If None, a free port is reserved.
+    Returns:
+        Tuple of (process, port, full_command)
+    """
+    if port is None:
+        port, lock_socket = reserve_port(host)
+    else:
+        lock_socket = None
+    full_command = command + ["--port", str(port)]
+    logger.info(f"Launching server on port {port}")
+    process = execute_shell_command(full_command)
+    if lock_socket is not None:
+        process_socket_map[process] = lock_socket
+    return process, port, full_command
+def terminate_process(process: subprocess.Popen[str]) -> None:
+    """
+    Terminate the process and automatically release the reserved port.
+    Args:
+        process: The process to terminate
+    """
+    kill_process_tree(process.pid)
+    lock_socket = process_socket_map.pop(process, None)
+    if lock_socket is not None:
+        release_port(lock_socket)
+def wait_for_server(
+    base_url: str,
+    process: subprocess.Popen[str],
+    full_command: Optional[list[str]] = None,
+    timeout: Optional[int] = None,
+    api_key: Optional[str] = None,
+) -> None:
+    """
+    Wait for the server to be ready by polling the /v1/models endpoint.
+    Args:
+        base_url: The base URL of the server
+        process: The subprocess running the server
+        full_command: The full command used to launch the server
+        timeout: Maximum time to wait in seconds. None means wait forever.
+        api_key: The API key to use for the request
+    """
+    logger.info(f"Waiting for server at {base_url} to become ready...")
+    start_time = time.time()
+    debug_advice = "Try rerunning with '--log-level debug' to see the full traceback."
+    if full_command:
+        debug_advice += f" Alternatively, you can run the following launch command manually to see the full traceback:\n\n{' '.join(full_command)}\n\n"
+    while True:
+        # Check for timeout first
+        if timeout and time.time() - start_time > timeout:
+            error_msg = f"Server did not become ready within timeout period ({timeout} seconds). Try increasing the timeout with '-M timeout=...'. {debug_advice}"
+            logger.error(error_msg)
+            raise TimeoutError(error_msg)
+        # Check if the process is still alive
+        if process.poll() is not None:
+            exit_code = process.poll()
+            error_msg = f"Server process exited unexpectedly with code {exit_code}. {debug_advice}"
+            logger.error(error_msg)
+            raise RuntimeError(error_msg)
+        try:
+            response = httpx.get(
+                f"{base_url}/v1/models",
+                headers={"Authorization": f"Bearer {api_key or 'None'}"},
+                timeout=5.0,  # Short timeout for individual requests
+            )
+            if response.status_code == 200:
+                logger.info("Server is ready.")
+                break
+            # Log non-200 status but don't treat as hard error yet
+            logger.debug(
+                f"Server check returned status {response.status_code}, retrying..."
+            )
+        except httpx.RequestError as e:
+            # Log connection errors but don't treat as hard error yet
+            logger.debug(f"Server check failed: {e}, retrying...")
+            pass  # Request failed (e.g., connection refused), will retry
+        # Wait before the next poll attempt
+        time.sleep(1)
+def start_local_server(
+    base_cmd: list[str],
+    host: str,
+    port: Optional[int] = None,
+    api_key: Optional[str] = None,
+    server_type: str = "server",
+    timeout: Optional[int] = DEFAULT_TIMEOUT,
+    server_args: Optional[dict[str, Any]] = None,
+) -> Tuple[str, subprocess.Popen[str], int]:
+    """
+    Start a server with the given command and handle potential errors.
+    Args:
+        base_cmd: List of base command arguments
+        host: Host to bind to
+        port: Port to bind to. If None, a free port is reserved.
+        api_key: API key to use for server authentication
+        server_type: Type of server being started (for error messages)
+        timeout: Maximum time to wait for server to become ready
+        server_args: Additional server arguments to pass to the command
+    Returns:
+        Tuple of (base_url, process, port)
+    Raises:
+        RuntimeError: If server fails to start
+    """
+    full_command = base_cmd
+    server_process = None
+    if server_args:
+        for key, value in server_args.items():
+            # Convert Python style args (underscore) to CLI style (dash)
+            cli_key = key.replace("_", "-")
+            full_command.extend([f"--{cli_key}", str(value)])
+    try:
+        server_process, found_port, full_command = launch_server_cmd(
+            full_command, host=host, port=port
+        )
+        base_url = f"http://localhost:{found_port}/v1"
+        wait_for_server(
+            f"http://localhost:{found_port}",
+            server_process,
+            api_key=api_key,
+            timeout=timeout,
+            full_command=full_command,
+        )
+        return base_url, server_process, found_port
+    except Exception as e:
+        # Cleanup any partially started server
+        if server_process:
+            terminate_process(server_process)
+        # Re-raise with more context
+        raise RuntimeError(f"Failed to start {server_type} server: {str(e)}") from e
+def merge_env_server_args(
+    env_var_name: str,
+    provided_args: Dict[str, Any],
+    logger: logging.Logger,
+) -> Dict[str, Any]:
+    """
+    Load server arguments from an environment variable and merge them with provided arguments.
+    Args:
+        env_var_name: Name of the environment variable containing JSON server args
+        provided_args: Dictionary of server arguments provided by the user
+        logger: Logger instance to log messages
+    Returns:
+        Dictionary of merged server arguments, with provided args taking precedence
+    """
+    env_server_args = {}
+    server_args_json = os.environ.get(env_var_name)
+    if server_args_json:
+        try:
+            env_server_args = json.loads(server_args_json)
+            logger.info(
+                f"Loaded server args from environment {env_var_name}: {env_server_args}"
+            )
+        except json.JSONDecodeError:
+            logger.warning(
+                f"Failed to parse {env_var_name} as JSON: {server_args_json}"
+            )
+    # Merge environment args with provided args (provided args take precedence)
+    return {**env_server_args, **provided_args}
+def configure_devices(
+    server_args: dict[str, Any], parallel_size_param: str = "tensor_parallel_size"
+) -> dict[str, Any]:
+    """Configure device settings and return updated server args.
+    Args:
+        server_args: Dictionary of server arguments
+        parallel_size_param: Name of parameter to set with device count if not specified
+    Returns:
+        Updated server arguments dict
+    """
+    result = server_args.copy()
+    devices = None
+    if "device" in result and "devices" in result:
+        raise ValueError("Cannot specify both device and devices in server args")
+    elif "devices" in result:
+        devices = result.pop("devices")
+    elif "device" in result:
+        devices = result.pop("device")
+    # Convert device list to comma-separated string if needed
+    if isinstance(devices, list):
+        device_str = ",".join(map(str, devices))
+    else:
+        device_str = str(devices)
+    # Set CUDA_VISIBLE_DEVICES environment variable
+    os.environ["CUDA_VISIBLE_DEVICES"] = device_str
+    device_count = len(device_str.split(","))
+    # Set parallel size parameter if not explicitly provided
+    if parallel_size_param not in result:
+        result[parallel_size_param] = device_count
+    return result

inspect_ai/_util/working.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import time
 from contextvars import ContextVar
+from inspect_ai.util._limit import LimitExceededError
 def init_sample_working_limit(start_time: float, working_limit: float | None) -> None:
     _sample_working_limit.set(working_limit)
@@ -22,6 +24,8 @@ def report_sample_waiting_time(waiting_time: float) -> None:
 def check_sample_working_limit() -> None:
+    from inspect_ai.log._transcript import SampleLimitEvent, transcript
     # no check if we don't have a limit
     working_limit = _sample_working_limit.get()
     if working_limit is None:
@@ -31,13 +35,15 @@ def check_sample_working_limit() -> None:
     running_time = time.monotonic() - _sample_start_time.get()
     working_time = running_time - sample_waiting_time()
     if working_time > working_limit:
-        from inspect_ai.solver._limit import SampleLimitExceededError
-        raise SampleLimitExceededError(
+        message = f"Exceeded working time limit ({working_limit:,} seconds)"
+        transcript()._event(
+            SampleLimitEvent(type="working", limit=int(working_limit), message=message)
+        )
+        raise LimitExceededError(
             type="working",
             value=int(working_time),
             limit=int(working_limit),
-            message=f"Exceeded working time limit ({working_limit:,} seconds)",
+            message=message,
         )

inspect-ai 0.3.92__py3-none-any.whl → 0.3.93__py3-none-any.whl

inspect-ai 0.3.92py3-none-any.whl → 0.3.93py3-none-any.whl