PyPI - inspect-ai - Versions diffs - 0.3.56__py3-none-any.whl → 0.3.58__py3-none-any.whl - Mend

inspect-ai 0.3.56py3-none-any.whl → 0.3.58py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

inspect_ai/__init__.py +2 -1
inspect_ai/_cli/common.py +4 -2
inspect_ai/_cli/eval.py +2 -0
inspect_ai/_cli/trace.py +21 -2
inspect_ai/_display/core/active.py +0 -2
inspect_ai/_display/core/panel.py +1 -1
inspect_ai/_display/rich/display.py +4 -4
inspect_ai/_display/textual/app.py +4 -1
inspect_ai/_display/textual/widgets/samples.py +41 -5
inspect_ai/_eval/eval.py +32 -20
inspect_ai/_eval/evalset.py +7 -5
inspect_ai/_eval/run.py +16 -11
inspect_ai/_eval/task/__init__.py +2 -2
inspect_ai/_eval/task/images.py +40 -25
inspect_ai/_eval/task/run.py +141 -119
inspect_ai/_eval/task/task.py +140 -25
inspect_ai/_util/constants.py +1 -0
inspect_ai/_util/content.py +23 -1
inspect_ai/_util/datetime.py +1 -1
inspect_ai/_util/deprecation.py +1 -1
inspect_ai/_util/images.py +20 -17
inspect_ai/_util/json.py +11 -1
inspect_ai/_util/kvstore.py +73 -0
inspect_ai/_util/logger.py +2 -1
inspect_ai/_util/notgiven.py +18 -0
inspect_ai/_util/thread.py +5 -0
inspect_ai/_util/trace.py +39 -3
inspect_ai/_util/transcript.py +36 -7
inspect_ai/_view/www/.prettierrc.js +12 -0
inspect_ai/_view/www/dist/assets/index.js +322 -226
inspect_ai/_view/www/log-schema.json +221 -138
inspect_ai/_view/www/src/App.mjs +18 -9
inspect_ai/_view/www/src/Types.mjs +0 -1
inspect_ai/_view/www/src/api/Types.mjs +15 -4
inspect_ai/_view/www/src/api/api-http.mjs +2 -0
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
inspect_ai/_view/www/src/components/MessageBand.mjs +2 -2
inspect_ai/_view/www/src/components/MessageContent.mjs +44 -2
inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
inspect_ai/_view/www/src/components/Tools.mjs +18 -3
inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +242 -178
inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
inspect_ai/_view/www/src/types/log.d.ts +53 -35
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
inspect_ai/approval/_human/util.py +2 -2
inspect_ai/dataset/_sources/csv.py +2 -1
inspect_ai/dataset/_sources/json.py +2 -1
inspect_ai/dataset/_sources/util.py +15 -7
inspect_ai/log/_condense.py +11 -1
inspect_ai/log/_log.py +27 -5
inspect_ai/log/_recorders/eval.py +21 -8
inspect_ai/log/_samples.py +10 -5
inspect_ai/log/_transcript.py +28 -1
inspect_ai/model/__init__.py +10 -2
inspect_ai/model/_call_tools.py +82 -17
inspect_ai/model/_chat_message.py +2 -4
inspect_ai/model/{_trace.py → _conversation.py} +9 -8
inspect_ai/model/_model.py +2 -2
inspect_ai/model/_providers/anthropic.py +9 -7
inspect_ai/model/_providers/azureai.py +6 -4
inspect_ai/model/_providers/bedrock.py +6 -4
inspect_ai/model/_providers/google.py +103 -14
inspect_ai/model/_providers/groq.py +7 -5
inspect_ai/model/_providers/hf.py +11 -6
inspect_ai/model/_providers/mistral.py +6 -9
inspect_ai/model/_providers/openai.py +34 -8
inspect_ai/model/_providers/openai_o1.py +10 -12
inspect_ai/model/_providers/vertex.py +17 -4
inspect_ai/scorer/__init__.py +13 -2
inspect_ai/scorer/_metrics/__init__.py +2 -2
inspect_ai/scorer/_metrics/std.py +3 -3
inspect_ai/tool/__init__.py +9 -1
inspect_ai/tool/_tool.py +9 -2
inspect_ai/tool/_tool_info.py +2 -1
inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
inspect_ai/tool/_tools/_web_browser/_web_browser.py +3 -3
inspect_ai/util/__init__.py +4 -3
inspect_ai/util/{_trace.py → _conversation.py} +3 -17
inspect_ai/util/_display.py +14 -4
inspect_ai/util/_sandbox/context.py +12 -13
inspect_ai/util/_sandbox/docker/compose.py +24 -13
inspect_ai/util/_sandbox/docker/docker.py +20 -13
inspect_ai/util/_sandbox/docker/util.py +2 -1
inspect_ai/util/_sandbox/environment.py +13 -1
inspect_ai/util/_sandbox/local.py +1 -0
inspect_ai/util/_sandbox/self_check.py +18 -18
inspect_ai/util/_store.py +2 -2
inspect_ai/util/_subprocess.py +3 -3
{inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/METADATA +3 -3
{inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/RECORD +107 -103
{inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/WHEEL +1 -1
{inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.56.dist-info → inspect_ai-0.3.58.dist-info}/top_level.txt +0 -0

inspect_ai/_util/content.py CHANGED Viewed

@@ -25,5 +25,27 @@ class ContentImage(BaseModel):
     """
-Content = Union[ContentText, ContentImage]
+class ContentAudio(BaseModel):
+    type: Literal["audio"] = Field(default="audio")
+    """Type."""
+    audio: str
+    """Audio file path or base64 encoded data URL."""
+    format: Literal["wav", "mp3"]
+    """Format of audio data ('mp3' or 'wav')"""
+class ContentVideo(BaseModel):
+    type: Literal["video"] = Field(default="video")
+    """Type."""
+    video: str
+    """Audio file path or base64 encoded data URL."""
+    format: Literal["mp4", "mpeg", "mov"]
+    """Format of video data ('mp4', 'mpeg', or 'mov')"""
+Content = Union[ContentText, ContentImage, ContentAudio, ContentVideo]
 """Content sent to or received from a model."""

inspect_ai/_util/datetime.py CHANGED Viewed

@@ -4,7 +4,7 @@ from typing import Literal
 def iso_now(
     timespec: Literal[
-        "auto", "hours", "minutes", "seconds", "milliseconds" "microseconds"
+        "auto", "hours", "minutes", "seconds", "milliseconds", "microseconds"
     ] = "seconds",
 ) -> str:
     return datetime.now().astimezone().isoformat(timespec=timespec)

inspect_ai/_util/deprecation.py CHANGED Viewed

@@ -174,7 +174,7 @@ def default_deprecation_msg(
         _qual = getattr(obj, "__qualname__", "") or ""
         if _qual.endswith(".__init__") or _qual.endswith(".__new__"):
-            _obj = f' class ({_qual.rsplit(".", 1)[0]})'
+            _obj = f" class ({_qual.rsplit('.', 1)[0]})"
         elif _qual and _obj:
             _obj += f" ({_qual})"

inspect_ai/_util/images.py CHANGED Viewed

@@ -3,7 +3,7 @@ import mimetypes
 import httpx
-from .file import file
+from .file import file as open_file
 from .url import (
     data_uri_mime_type,
     data_uri_to_base64,
@@ -12,34 +12,37 @@ from .url import (
 )
-async def image_as_data(image: str) -> tuple[bytes, str]:
-    if is_data_uri(image):
+async def file_as_data(file: str) -> tuple[bytes, str]:
+    if is_data_uri(file):
         # resolve mime type and base64 content
-        mime_type = data_uri_mime_type(image) or "image/png"
-        image_base64 = data_uri_to_base64(image)
-        image_bytes = base64.b64decode(image_base64)
+        mime_type = data_uri_mime_type(file) or "image/png"
+        file_base64 = data_uri_to_base64(file)
+        file_bytes = base64.b64decode(file_base64)
     else:
         # guess mime type; need strict=False for webp images
-        type, _ = mimetypes.guess_type(image, strict=False)
+        type, _ = mimetypes.guess_type(file, strict=False)
         if type:
             mime_type = type
         else:
             mime_type = "image/png"
         # handle url or file
-        if is_http_url(image):
+        if is_http_url(file):
             client = httpx.AsyncClient()
-            image_bytes = (await client.get(image)).content
+            file_bytes = (await client.get(file)).content
         else:
-            with file(image, "rb") as f:
-                image_bytes = f.read()
+            with open_file(file, "rb") as f:
+                file_bytes = f.read()
     # return bytes and type
-    return image_bytes, mime_type
+    return file_bytes, mime_type
-async def image_as_data_uri(image: str) -> str:
-    bytes, mime_type = await image_as_data(image)
-    base64_image = base64.b64encode(bytes).decode("utf-8")
-    image = f"data:{mime_type};base64,{base64_image}"
-    return image
+async def file_as_data_uri(file: str) -> str:
+    if is_data_uri(file):
+        return file
+    else:
+        bytes, mime_type = await file_as_data(file)
+        base64_file = base64.b64encode(bytes).decode("utf-8")
+        file = f"data:{mime_type};base64,{base64_file}"
+        return file

inspect_ai/_util/json.py CHANGED Viewed

@@ -103,10 +103,20 @@ def json_changes(
                 paths = json_change.path.split("/")[1:]
                 replaced = before
                 for path in paths:
-                    index: Any = int(path) if path.isnumeric() else path
+                    decoded_path = decode_json_pointer_segment(path)
+                    index: Any = (
+                        int(decoded_path) if decoded_path.isnumeric() else decoded_path
+                    )
                     replaced = replaced[index]
                 json_change.replaced = replaced
             changes.append(json_change)
         return changes
     else:
         return None
+def decode_json_pointer_segment(segment: str) -> str:
+    """Decode a single JSON Pointer segment."""
+    # JSON points encode ~ and / because they are special characters
+    # this decodes these values (https://www.rfc-editor.org/rfc/rfc6901)
+    return segment.replace("~1", "/").replace("~0", "~")

inspect_ai/_util/kvstore.py ADDED Viewed

@@ -0,0 +1,73 @@
+import sqlite3
+from contextlib import AbstractContextManager
+from typing import Any, Optional, cast
+from .appdirs import inspect_data_dir
+class KVStore(AbstractContextManager["KVStore"]):
+    def __init__(self, filename: str, max_entries: int | None = None):
+        self.filename = filename
+        self.max_entries = max_entries
+    def __enter__(self) -> "KVStore":
+        self.conn = sqlite3.connect(self.filename)
+        self.conn.execute("""
+            CREATE TABLE IF NOT EXISTS kv_store (
+                key TEXT PRIMARY KEY,
+                value TEXT,
+                created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
+        self.conn.commit()
+        return self
+    def __exit__(self, *excinfo: Any) -> None:
+        self.conn.close()
+    def put(self, key: str, value: str) -> None:
+        # Insert or update the value
+        self.conn.execute(
+            """
+            INSERT OR REPLACE INTO kv_store (key, value, created_at)
+            VALUES (?, ?, CURRENT_TIMESTAMP)
+        """,
+            (key, value),
+        )
+        # If we have a max_entries limit, remove oldest entries
+        if self.max_entries:
+            count = self.count()
+            if count > self.max_entries:
+                self.conn.execute(
+                    """
+                    DELETE FROM kv_store
+                    WHERE key IN (
+                        SELECT key FROM kv_store
+                        ORDER BY created_at ASC
+                        LIMIT ?
+                    )
+                    """,
+                    (max(0, count - self.max_entries),),
+                )
+        self.conn.commit()
+    def get(self, key: str) -> Optional[str]:
+        cursor = self.conn.execute("SELECT value FROM kv_store WHERE key = ?", (key,))
+        result = cursor.fetchone()
+        return result[0] if result else None
+    def delete(self, key: str) -> bool:
+        cursor = self.conn.execute("DELETE FROM kv_store WHERE key = ?", (key,))
+        self.conn.commit()
+        return cursor.rowcount > 0
+    def count(self) -> int:
+        cursor = self.conn.execute("SELECT COUNT(*) FROM kv_store")
+        return cast(int, cursor.fetchone()[0])
+def inspect_kvstore(name: str, max_entries: int | None = None) -> KVStore:
+    filename = inspect_data_dir("kvstore") / f"{name}.db"
+    return KVStore(filename.as_posix(), max_entries=max_entries)

inspect_ai/_util/logger.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import atexit
 import os
+import re
 from logging import (
     DEBUG,
     INFO,
@@ -182,7 +183,7 @@ def notify_logger_record(record: LogRecord, write: bool) -> None:
     if write:
         transcript()._event(LoggerEvent(message=LoggingMessage.from_log_record(record)))
     global _rate_limit_count
-    if (record.levelno <= INFO and "429" in record.getMessage()) or (
+    if (record.levelno <= INFO and re.search(r"\b429\b", record.getMessage())) or (
         record.levelno == DEBUG
         # See https://boto3.amazonaws.com/v1/documentation/api/latest/guide/retries.html#validating-retry-attempts
         # for boto retry logic / log messages (this is tracking standard or adapative retries)

inspect_ai/_util/notgiven.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Sentinel class used until PEP 0661 is accepted
+from typing import Literal
+from typing_extensions import override
+class NotGiven:
+    """A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior)."""
+    def __bool__(self) -> Literal[False]:
+        return False
+    @override
+    def __repr__(self) -> str:
+        return "NOT_GIVEN"
+NOT_GIVEN = NotGiven()

inspect_ai/_util/thread.py ADDED Viewed

@@ -0,0 +1,5 @@
+import threading
+def is_main_thread() -> bool:
+    return threading.current_thread() is threading.main_thread()

inspect_ai/_util/trace.py CHANGED Viewed

@@ -33,6 +33,22 @@ def inspect_trace_file() -> Path:
 def trace_action(
     logger: Logger, action: str, message: str, *args: Any, **kwargs: Any
 ) -> Generator[None, None, None]:
+    """Trace a long running or poentially unreliable action.
+    Trace actions for which you want to collect data on the resolution
+    (e.g. succeeded, cancelled, failed, timed out, etc.) and duration of.
+    Traces are written to the `TRACE` log level (which is just below
+    `HTTP` and `INFO`). List and read trace logs with `inspect trace list`
+    and related commands (see `inspect trace --help` for details).
+    Args:
+       logger (Logger): Logger to use for tracing (e.g. from `getLogger(__name__)`)
+       action (str): Name of action to trace (e.g. 'Model', 'Subprocess', etc.)
+       message (str): Message describing action (can be a format string w/ args or kwargs)
+       *args (Any): Positional arguments for `message` format string.
+       **kwargs (Any): Named args for `message` format string.
+    """
     trace_id = uuid()
     start_monotonic = time.monotonic()
     start_wall = time.time()
@@ -117,6 +133,19 @@ def trace_action(
 def trace_message(
     logger: Logger, category: str, message: str, *args: Any, **kwargs: Any
 ) -> None:
+    """Log a message using the TRACE log level.
+    The `TRACE` log level is just below `HTTP` and `INFO`). List and
+    read trace logs with `inspect trace list` and related commands
+    (see `inspect trace --help` for details).
+    Args:
+       logger (Logger): Logger to use for tracing (e.g. from `getLogger(__name__)`)
+       category (str): Category of trace message.
+       message (str): Trace message (can be a format string w/ args or kwargs)
+       *args (Any): Positional arguments for `message` format string.
+       **kwargs (Any): Named args for `message` format string.
+    """
     logger.log(TRACE, f"[{category}] {message}", *args, **kwargs)
@@ -250,9 +279,16 @@ def read_trace_file(file: Path) -> list[TraceRecord]:
 def rotate_trace_files() -> None:
-    rotate_files = list_trace_files()[10:]
-    for file in rotate_files:
-        file.file.unlink(missing_ok=True)
+    # if multiple inspect processes start up at once they
+    # will all be attempting to rotate at the same time,
+    # which can lead to FileNotFoundError -- ignore these
+    # errors if they occur
+    try:
+        rotate_files = list_trace_files()[10:]
+        for file in rotate_files:
+            file.file.unlink(missing_ok=True)
+    except FileNotFoundError:
+        pass
 def compress_trace_log(log_handler: FileHandler) -> Callable[[], None]:

inspect_ai/_util/transcript.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import html
+import re
 from typing import Any
 from rich.align import AlignMethod
@@ -19,13 +20,43 @@ def transcript_code_theme() -> str:
 def transcript_markdown(content: str, *, escape: bool = False) -> Markdown:
     code_theme = transcript_code_theme()
     return Markdown(
-        html.escape(content) if escape else content,
+        html_escape_markdown(content) if escape else content,
         code_theme=code_theme,
         inline_code_lexer="python",
         inline_code_theme=code_theme,
     )
+def html_escape_markdown(content: str) -> str:
+    """Escape markdown lines that aren't in a code block."""
+    codeblock_pattern = re.compile("`{3,}")
+    current_codeblock = ""
+    escaped: list[str] = []
+    lines = content.splitlines()
+    for line in lines:
+        # look for matching end of codeblock
+        if current_codeblock:
+            if current_codeblock in line:
+                current_codeblock = ""
+                escaped.append(line)
+                continue
+        # look for beginning of codeblock
+        match = codeblock_pattern.search(line)
+        if match:
+            current_codeblock = match[0]
+            escaped.append(line)
+            continue
+        # escape if we are not in a codeblock
+        if current_codeblock:
+            escaped.append(line)
+        else:
+            escaped.append(html.escape(line, quote=False))
+    return "\n".join(escaped)
 def set_transcript_markdown_options(markdown: Markdown) -> None:
     code_theme = transcript_code_theme()
     markdown.code_theme = code_theme
@@ -89,12 +120,10 @@ def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableT
     return transcript_markdown("```python\n" + call + "\n```\n")
-DOUBLE_LINE = Box(
-    " ══ \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n"
-)
+DOUBLE_LINE = Box(" ══ \n    \n    \n    \n    \n    \n    \n    \n")
-LINE = Box(" ── \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n")
+LINE = Box(" ── \n    \n    \n    \n    \n    \n    \n    \n")
-DOTTED = Box(" ·· \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n")
+DOTTED = Box(" ·· \n    \n    \n    \n    \n    \n    \n    \n")
-NOBORDER = Box("    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n" "    \n")
+NOBORDER = Box("    \n    \n    \n    \n    \n    \n    \n    \n")

inspect_ai/_view/www/.prettierrc.js ADDED Viewed

@@ -0,0 +1,12 @@
+// Do not remove this file even if the config is empty!
+// VSCode's "Format Document" will respect this config and use the default
+// settings, which is what we want. Without prettierrc, VSCode falls back to
+// users settings, which could be different.
+/**
+ * @see https://prettier.io/docs/en/configuration.html
+ * @type {import("prettier").Config}
+ */
+const config = {};
+export default config;

inspect-ai 0.3.56__py3-none-any.whl → 0.3.58__py3-none-any.whl

inspect-ai 0.3.56py3-none-any.whl → 0.3.58py3-none-any.whl