PyPI - inspect-ai - Versions diffs - 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl - Mend

inspect-ai 0.3.55py3-none-any.whl → 0.3.57py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

inspect_ai/__init__.py +1 -0
inspect_ai/_cli/common.py +1 -1
inspect_ai/_cli/trace.py +33 -20
inspect_ai/_display/core/active.py +1 -1
inspect_ai/_display/core/display.py +1 -1
inspect_ai/_display/core/footer.py +1 -1
inspect_ai/_display/core/panel.py +1 -1
inspect_ai/_display/core/progress.py +0 -6
inspect_ai/_display/core/rich.py +1 -1
inspect_ai/_display/rich/display.py +2 -2
inspect_ai/_display/textual/app.py +15 -17
inspect_ai/_display/textual/widgets/clock.py +3 -3
inspect_ai/_display/textual/widgets/samples.py +6 -13
inspect_ai/_eval/context.py +9 -1
inspect_ai/_eval/run.py +16 -11
inspect_ai/_eval/score.py +4 -10
inspect_ai/_eval/task/results.py +5 -4
inspect_ai/_eval/task/run.py +6 -12
inspect_ai/_eval/task/task.py +10 -0
inspect_ai/_util/ansi.py +31 -0
inspect_ai/_util/datetime.py +1 -1
inspect_ai/_util/deprecation.py +1 -1
inspect_ai/_util/format.py +7 -0
inspect_ai/_util/json.py +11 -1
inspect_ai/_util/logger.py +14 -13
inspect_ai/_util/throttle.py +10 -1
inspect_ai/_util/trace.py +79 -47
inspect_ai/_util/transcript.py +37 -4
inspect_ai/_util/vscode.py +51 -0
inspect_ai/_view/notify.py +2 -1
inspect_ai/_view/www/.prettierrc.js +12 -0
inspect_ai/_view/www/App.css +22 -1
inspect_ai/_view/www/dist/assets/index.css +2374 -2
inspect_ai/_view/www/dist/assets/index.js +29752 -24492
inspect_ai/_view/www/log-schema.json +262 -215
inspect_ai/_view/www/package.json +1 -0
inspect_ai/_view/www/src/App.mjs +19 -9
inspect_ai/_view/www/src/Types.mjs +0 -1
inspect_ai/_view/www/src/api/Types.mjs +15 -4
inspect_ai/_view/www/src/api/api-http.mjs +2 -0
inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
inspect_ai/_view/www/src/components/Tools.mjs +28 -5
inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
inspect_ai/_view/www/src/types/log.d.ts +28 -20
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
inspect_ai/_view/www/yarn.lock +44 -0
inspect_ai/approval/_apply.py +4 -0
inspect_ai/approval/_human/panel.py +5 -8
inspect_ai/dataset/_dataset.py +51 -10
inspect_ai/dataset/_util.py +31 -3
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_log.py +30 -2
inspect_ai/log/_recorders/eval.py +2 -0
inspect_ai/model/_call_tools.py +31 -7
inspect_ai/model/_chat_message.py +3 -0
inspect_ai/model/_model.py +42 -1
inspect_ai/model/_providers/anthropic.py +4 -0
inspect_ai/model/_providers/google.py +24 -6
inspect_ai/model/_providers/openai.py +17 -3
inspect_ai/model/_providers/openai_o1.py +10 -12
inspect_ai/model/_render.py +9 -2
inspect_ai/scorer/_metric.py +12 -1
inspect_ai/solver/__init__.py +2 -0
inspect_ai/solver/_human_agent/agent.py +83 -0
inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
inspect_ai/solver/_human_agent/commands/clock.py +70 -0
inspect_ai/solver/_human_agent/commands/command.py +59 -0
inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
inspect_ai/solver/_human_agent/commands/note.py +42 -0
inspect_ai/solver/_human_agent/commands/score.py +80 -0
inspect_ai/solver/_human_agent/commands/status.py +62 -0
inspect_ai/solver/_human_agent/commands/submit.py +151 -0
inspect_ai/solver/_human_agent/install.py +222 -0
inspect_ai/solver/_human_agent/panel.py +252 -0
inspect_ai/solver/_human_agent/service.py +45 -0
inspect_ai/solver/_human_agent/state.py +55 -0
inspect_ai/solver/_human_agent/view.py +24 -0
inspect_ai/solver/_task_state.py +28 -2
inspect_ai/tool/_tool.py +10 -2
inspect_ai/tool/_tool_info.py +2 -1
inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
inspect_ai/tool/_tools/_web_browser/_web_browser.py +16 -13
inspect_ai/util/__init__.py +12 -4
inspect_ai/{_util/display.py → util/_display.py} +6 -0
inspect_ai/util/_panel.py +31 -9
inspect_ai/util/_sandbox/__init__.py +0 -3
inspect_ai/util/_sandbox/context.py +5 -1
inspect_ai/util/_sandbox/docker/compose.py +17 -13
inspect_ai/util/_sandbox/docker/docker.py +9 -6
inspect_ai/util/_sandbox/docker/internal.py +1 -1
inspect_ai/util/_sandbox/docker/util.py +3 -2
inspect_ai/util/_sandbox/environment.py +6 -5
inspect_ai/util/_sandbox/local.py +1 -1
inspect_ai/util/_sandbox/self_check.py +18 -18
inspect_ai/util/_sandbox/service.py +22 -7
inspect_ai/util/_store.py +7 -8
inspect_ai/util/_store_model.py +110 -0
inspect_ai/util/_subprocess.py +3 -3
inspect_ai/util/_throttle.py +32 -0
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +131 -108
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0

inspect_ai/util/_sandbox/service.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import json
+from logging import getLogger
 from pathlib import PurePosixPath
 from textwrap import dedent
 from typing import (
@@ -14,9 +15,12 @@ from inspect_ai.util._subprocess import ExecResult
 from .environment import SandboxEnvironment
+logger = getLogger(__name__)
 REQUESTS_DIR = "requests"
 RESPONSES_DIR = "responses"
-SERVICES_DIR = "/tmp/inspect-sandbox-services"
+SERVICES_DIR = "/var/tmp/sandbox-services"
 ID = "id"
 METHOD = "method"
@@ -70,7 +74,7 @@ class SandboxService:
     ```python
     import sys
-    sys.path.append("/tmp/inspect-sandbox-services/foo")
+    sys.path.append("/var/tmp/sandbox-services/foo")
     import foo
     ```
@@ -79,7 +83,7 @@ class SandboxService:
     ```python
     import importlib.util
     spec = importlib.util.spec_from_file_location(
-        "foo", "/tmp/inspect-sandbox-services/foo/foo.py"
+        "foo", "/var/tmp/sandbox-services/foo/foo.py"
     )
     foo = importlib.util.module_from_spec(spec)
     spec.loader.exec_module(foo)
@@ -150,8 +154,14 @@ class SandboxService:
                 f"Error reading request for service {self._name}: '{read_request}' ({result.stderr})"
             )
-        # parse request
-        request_data = json.loads(result.stdout)
+        # parse request (decode error could occur if its incomplete so bypass this)
+        try:
+            request_data = json.loads(result.stdout)
+        except json.JSONDecodeError:
+            logger.warning(
+                f"JSON decoding error reading service request: {result.stdout}"
+            )
+            return None
         if not isinstance(request_data, dict):
             raise TypeError(f"Service request is not a dict (type={request_data})")
@@ -275,7 +285,7 @@ class SandboxService:
             return request_id
         def _read_{self._name}_response(request_id: str) -> tuple[bool, Any]:
-            from json import load
+            from json import JSONDecodeError, load
             from pathlib import Path
             responses_dir = Path("{SERVICES_DIR}", "{self._name}", "{RESPONSES_DIR}")
@@ -283,7 +293,12 @@ class SandboxService:
             if response_path.exists():
                 # read and remove the file
                 with open(response_path, "r") as f:
-                    response = load(f)
+                    # it's possible the file is still being written so
+                    # just catch and wait for another retry if this occurs
+                    try:
+                        response = load(f)
+                    except JSONDecodeError:
+                        return False, None
                 response_path.unlink()
                 # raise error if we have one

inspect_ai/util/_store.py CHANGED Viewed

@@ -34,18 +34,14 @@ class Store:
     inheriting from Pydantic `BaseModel`)
     """
-    def __init__(self) -> None:
-        self._data: dict[str, Any] = {}
+    def __init__(self, data: dict[str, Any] | None = None) -> None:
+        self._data = deepcopy(data) if data else {}
     @overload
-    def get(self, key: str, default: None = None) -> Any:
-        return self._data.get(key, default)
+    def get(self, key: str, default: None = None) -> Any: ...
     @overload
-    def get(self, key: str, default: VT) -> VT:
-        if key not in self._data.keys():
-            self._data[key] = default
-        return cast(VT, self._data.get(key, default))
+    def get(self, key: str, default: VT) -> VT: ...
     def get(self, key: str, default: VT | None = None) -> VT | Any:
         """Get a value from the store.
@@ -60,6 +56,9 @@ class Store:
         Returns:
            Value if is exists, otherwise default.
         """
+        if default is not None:
+            if key not in self._data.keys():
+                self._data[key] = default
         return cast(VT, self._data.get(key, default))
     def set(self, key: str, value: Any) -> None:

inspect_ai/util/_store_model.py ADDED Viewed

@@ -0,0 +1,110 @@
+from typing import Any, Type, TypeVar
+from pydantic import BaseModel, ConfigDict, Field
+from ._store import Store, store
+class StoreModel(BaseModel):
+    """Store backed Pydandic BaseModel.
+    The model is initialised from a Store, so that Store should
+    either already satisfy the validation constraints of the model
+    OR you should provide Field(default=) annotations for all of
+    your model fields (the latter approach is recommended).
+    """
+    store: Store = Field(exclude=True, default_factory=store)
+    def model_post_init(self, __context: Any) -> None:
+        for name in self.model_fields.keys():
+            if name == "store":
+                continue
+            # if its in the store, then have our dict reflect that
+            ns_name = self._ns_name(name)
+            if ns_name in self.store:
+                self.__dict__[name] = self.store.get(ns_name)
+            # if its not in the store, then reflect dict into store
+            elif name in self.__dict__.keys():
+                self.store.set(ns_name, self.__dict__[name])
+    def __getattribute__(self, name: str) -> Any:
+        # sidestep dunders and pydantic fields
+        if name.startswith("__") or name.startswith("model_"):
+            return object.__getattribute__(self, name)
+        # handle model_fields (except 'store') by reading the store
+        elif name in object.__getattribute__(self, "model_fields") and name != "store":
+            store_key = self._ns_name(name)
+            if store_key in self.store:
+                return self.store.get(store_key)
+            else:
+                return object.__getattribute__(self, name)
+        # default to super
+        else:
+            return super().__getattribute__(name)
+    def __setattr__(self, name: str, value: Any) -> None:
+        if name in self.model_fields:
+            # validate with the new value (can throw ValidationError)
+            temp_data = self.store._data.copy()
+            temp_data[self._ns_name(name)] = value
+            self._validate_store(temp_data)
+            # update the store and sync the underlying __dict__
+            self.store.set(self._ns_name(name), value)
+            self.__dict__[name] = value
+        else:
+            super().__setattr__(name, value)
+    def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
+        self._sync_model()  # in case store was updated behind our back
+        return super().model_dump(*args, **kwargs)
+    def model_dump_json(self, *args: Any, **kwargs: Any) -> str:
+        self._sync_model()  # in case store was updated behind our back
+        return super().model_dump_json(*args, **kwargs)
+    def _sync_model(self) -> None:
+        self._validate_store()
+        for field_name in self.model_fields.keys():
+            if field_name == "store":
+                continue
+            store_value = self.store.get(self._ns_name(field_name))
+            self.__dict__[field_name] = store_value
+    def _validate_store(self, data: dict[str, Any] | None = None) -> None:
+        # validate store or custom dict
+        data = data if data is not None else self.store._data
+        # pick out keys to validate
+        validate: dict[str, Any] = {}
+        for k, v in data.items():
+            if k.startswith(f"{self.__class__.__name__}:"):
+                unprefixed = self._un_ns_name(k)
+                validate[unprefixed] = v
+        # perform validation
+        self.__class__.model_validate(validate)
+    def _ns_name(self, name: str) -> str:
+        return f"{self.__class__.__name__}:{name}"
+    def _un_ns_name(self, name: str) -> str:
+        return name.replace(f"{self.__class__.__name__}:", "", 1)
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+SMT = TypeVar("SMT", bound=StoreModel)
+def store_as(model_cls: Type[SMT]) -> SMT:
+    """Get a Pydantic model interface to the store.
+    Args:
+      model_cls: Pydantic model type (must derive from StoreModel)
+    Returns:
+      StoreModel: Instance of model_cls bound to current Store.
+    """
+    return model_cls(store=store())

inspect_ai/util/_subprocess.py CHANGED Viewed

@@ -101,9 +101,9 @@ async def subprocess(
     input = input.encode() if isinstance(input, str) else input
     # function to run command (we may or may not run it w/ concurrency)
-    async def run_command() -> (
-        AsyncGenerator[Union[Process, ExecResult[str], ExecResult[bytes]], None]
-    ):
+    async def run_command() -> AsyncGenerator[
+        Union[Process, ExecResult[str], ExecResult[bytes]], None
+    ]:
         if isinstance(args, str):
             proc = await asyncio.create_subprocess_shell(
                 args,

inspect_ai/util/_throttle.py ADDED Viewed

@@ -0,0 +1,32 @@
+import time
+from functools import wraps
+from typing import Any, Callable
+def throttle(seconds: float) -> Callable[..., Any]:
+    """Throttle a function to ensure it is called no more than every n seconds.
+    Args:
+       seconds (float): Throttle time.
+    Returns:
+       Callable: Throttled function.
+    """
+    def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
+        last_called: float = 0
+        last_result: Any = None
+        @wraps(func)
+        def wrapped(*args: Any, **kwargs: Any) -> Any:
+            nonlocal last_called
+            nonlocal last_result
+            current_time = time.time()
+            if current_time - last_called >= seconds:
+                last_result = func(*args, **kwargs)
+                last_called = current_time
+            return last_result
+        return wrapped
+    return decorator

{inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.2
 Name: inspect_ai
-Version: 0.3.55
+Version: 0.3.57
 Summary: Framework for large language model evaluations
 Author: UK AI Safety Institute
 License: MIT License
@@ -67,7 +67,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
 Requires-Dist: pytest-cov; extra == "dev"
 Requires-Dist: pytest-dotenv; extra == "dev"
 Requires-Dist: pytest-xdist; extra == "dev"
-Requires-Dist: ruff==0.8.4; extra == "dev"
+Requires-Dist: ruff==0.9.0; extra == "dev"
 Requires-Dist: textual-dev>=0.86.2; extra == "dev"
 Requires-Dist: types-PyYAML; extra == "dev"
 Requires-Dist: types-beautifulsoup4; extra == "dev"

inspect-ai 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl

inspect-ai 0.3.55py3-none-any.whl → 0.3.57py3-none-any.whl