PyPI - inspect-ai - Versions diffs - 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl - Mend

inspect-ai 0.3.92py3-none-any.whl → 0.3.94py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

inspect_ai/_cli/eval.py +27 -0
inspect_ai/_display/textual/widgets/samples.py +3 -3
inspect_ai/_display/textual/widgets/transcript.py +3 -29
inspect_ai/_eval/eval.py +19 -2
inspect_ai/_eval/evalset.py +4 -1
inspect_ai/_eval/run.py +41 -0
inspect_ai/_eval/task/generate.py +38 -44
inspect_ai/_eval/task/log.py +26 -28
inspect_ai/_eval/task/run.py +23 -27
inspect_ai/_util/answer.py +26 -0
inspect_ai/_util/constants.py +0 -1
inspect_ai/_util/local_server.py +398 -0
inspect_ai/_util/working.py +10 -4
inspect_ai/_view/www/dist/assets/index.css +173 -159
inspect_ai/_view/www/dist/assets/index.js +1417 -1142
inspect_ai/_view/www/log-schema.json +379 -3
inspect_ai/_view/www/package.json +1 -1
inspect_ai/_view/www/src/@types/log.d.ts +93 -14
inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +2 -2
inspect_ai/_view/www/src/app/content/MetaDataView.module.css +1 -1
inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +1 -1
inspect_ai/_view/www/src/app/content/RenderedContent.tsx +1 -1
inspect_ai/_view/www/src/app/log-view/LogView.tsx +11 -0
inspect_ai/_view/www/src/app/log-view/tabs/InfoTab.tsx +2 -9
inspect_ai/_view/www/src/app/log-view/tabs/ModelsTab.tsx +51 -0
inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.module.css +6 -0
inspect_ai/_view/www/src/app/log-view/tabs/TaskTab.tsx +143 -0
inspect_ai/_view/www/src/app/plan/ModelCard.tsx +1 -2
inspect_ai/_view/www/src/app/plan/PlanCard.tsx +29 -7
inspect_ai/_view/www/src/app/plan/PlanDetailView.module.css +1 -1
inspect_ai/_view/www/src/app/plan/PlanDetailView.tsx +1 -198
inspect_ai/_view/www/src/app/samples/descriptor/score/NumericScoreDescriptor.tsx +2 -1
inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.module.css +2 -1
inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +174 -0
inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +8 -8
inspect_ai/_view/www/src/app/samples/transcript/TranscriptView.tsx +12 -2
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +1 -1
inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +0 -3
inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +87 -25
inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +229 -17
inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +11 -0
inspect_ai/_view/www/src/app/samples/transcript/types.ts +5 -1
inspect_ai/_view/www/src/app/usage/ModelUsagePanel.tsx +3 -2
inspect_ai/_view/www/src/app/usage/TokenTable.module.css +4 -1
inspect_ai/_view/www/src/app/usage/TokenTable.tsx +2 -2
inspect_ai/_view/www/src/app/usage/UsageCard.module.css +8 -3
inspect_ai/_view/www/src/app/usage/UsageCard.tsx +1 -35
inspect_ai/_view/www/src/components/Card.css +0 -1
inspect_ai/_view/www/src/constants.ts +2 -0
inspect_ai/_view/www/src/utils/numeric.ts +17 -0
inspect_ai/agent/_agent.py +3 -3
inspect_ai/agent/_as_solver.py +22 -12
inspect_ai/agent/_as_tool.py +20 -6
inspect_ai/agent/_handoff.py +12 -1
inspect_ai/agent/_react.py +4 -3
inspect_ai/agent/_run.py +16 -3
inspect_ai/agent/_types.py +9 -0
inspect_ai/dataset/_dataset.py +6 -3
inspect_ai/log/__init__.py +14 -0
inspect_ai/log/_convert.py +4 -9
inspect_ai/log/_file.py +56 -0
inspect_ai/log/_log.py +99 -0
inspect_ai/log/_recorders/__init__.py +2 -0
inspect_ai/log/_recorders/buffer/database.py +12 -11
inspect_ai/log/_recorders/buffer/filestore.py +2 -2
inspect_ai/log/_recorders/buffer/types.py +2 -2
inspect_ai/log/_recorders/eval.py +20 -65
inspect_ai/log/_recorders/file.py +28 -6
inspect_ai/log/_recorders/recorder.py +7 -0
inspect_ai/log/_recorders/types.py +1 -23
inspect_ai/log/_samples.py +14 -25
inspect_ai/log/_transcript.py +84 -36
inspect_ai/log/_tree.py +118 -0
inspect_ai/log/_util.py +52 -0
inspect_ai/model/__init__.py +5 -1
inspect_ai/model/_call_tools.py +72 -44
inspect_ai/model/_generate_config.py +14 -8
inspect_ai/model/_model.py +66 -88
inspect_ai/model/_model_output.py +25 -0
inspect_ai/model/_openai.py +2 -0
inspect_ai/model/_providers/anthropic.py +13 -23
inspect_ai/model/_providers/hf.py +27 -1
inspect_ai/model/_providers/openai_o1.py +8 -2
inspect_ai/model/_providers/providers.py +18 -4
inspect_ai/model/_providers/sglang.py +247 -0
inspect_ai/model/_providers/vllm.py +211 -400
inspect_ai/scorer/_choice.py +1 -2
inspect_ai/solver/__init__.py +7 -2
inspect_ai/solver/_basic_agent.py +3 -10
inspect_ai/solver/_chain.py +1 -1
inspect_ai/solver/_fork.py +1 -1
inspect_ai/solver/_multiple_choice.py +5 -22
inspect_ai/solver/_plan.py +2 -2
inspect_ai/solver/_task_state.py +26 -88
inspect_ai/solver/_transcript.py +6 -7
inspect_ai/tool/_json_rpc_helpers.py +45 -17
inspect_ai/tool/_mcp/_mcp.py +8 -5
inspect_ai/tool/_mcp/_sandbox.py +8 -2
inspect_ai/tool/_mcp/server.py +3 -1
inspect_ai/tool/_tool_call.py +4 -1
inspect_ai/tool/_tool_support_helpers.py +51 -12
inspect_ai/tool/_tools/_bash_session.py +190 -68
inspect_ai/tool/_tools/_computer/_computer.py +25 -1
inspect_ai/tool/_tools/_execute.py +4 -1
inspect_ai/tool/_tools/_text_editor.py +4 -3
inspect_ai/tool/_tools/_web_browser/_web_browser.py +10 -3
inspect_ai/util/__init__.py +16 -0
inspect_ai/util/_anyio.py +11 -0
inspect_ai/util/_collect.py +50 -0
inspect_ai/util/_limit.py +393 -0
inspect_ai/util/_limited_conversation.py +57 -0
inspect_ai/util/_span.py +58 -0
inspect_ai/util/_subtask.py +27 -42
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/METADATA +1 -1
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/RECORD +120 -134
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/WHEEL +1 -1
inspect_ai/_display/core/group.py +0 -79
inspect_ai/solver/_limit.py +0 -39
inspect_ai/tool/_tools/_computer/_resources/Dockerfile +0 -102
inspect_ai/tool/_tools/_computer/_resources/README.md +0 -30
inspect_ai/tool/_tools/_computer/_resources/entrypoint/entrypoint.sh +0 -18
inspect_ai/tool/_tools/_computer/_resources/entrypoint/novnc_startup.sh +0 -20
inspect_ai/tool/_tools/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -48
inspect_ai/tool/_tools/_computer/_resources/entrypoint/xfce_startup.sh +0 -13
inspect_ai/tool/_tools/_computer/_resources/entrypoint/xvfb_startup.sh +0 -48
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -9
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -61
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -10
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +0 -91
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -10
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -10
inspect_ai/tool/_tools/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -10
inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +0 -8
inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +0 -12
inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +0 -78
inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +0 -22
inspect_ai/tool/_tools/_computer/_resources/tool/_logger.py +0 -22
inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +0 -42
inspect_ai/tool/_tools/_computer/_resources/tool/_tool_result.py +0 -33
inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +0 -341
inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +0 -141
inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +0 -65
inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/tool/_tools/_computer/test_args.py +0 -151
/inspect_ai/{tool/_tools/_computer/_resources/tool/__init__.py → _view/www/src/app/log-view/tabs/ModelsTab.module.css} +0 -0
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/top_level.txt +0 -0

inspect_ai/util/_collect.py ADDED Viewed

@@ -0,0 +1,50 @@
+import sys
+from typing import Awaitable, TypeVar, cast
+import anyio
+from ._span import span
+if sys.version_info < (3, 11):
+    from exceptiongroup import ExceptionGroup
+T = TypeVar("T")
+async def collect(*tasks: Awaitable[T]) -> list[T]:
+    """Run and collect the results of one or more async coroutines.
+    Similar to [`asyncio.gather()`](https://docs.python.org/3/library/asyncio-task.html#asyncio.gather),
+    but also works when [Trio](https://trio.readthedocs.io/en/stable/) is the async backend.
+    Automatically includes each task in a `span()`, which
+    ensures that its events are grouped together in the transcript.
+    Using `collect()` in preference to `asyncio.gather()` is highly recommended
+    for both Trio compatibility and more legible transcript output.
+    Args:
+        *tasks: Tasks to run
+    Returns:
+        List of task results.
+    """
+    results: list[None | T] = [None] * len(tasks)
+    try:
+        async with anyio.create_task_group() as tg:
+            async def run_task(index: int, task: Awaitable[T]) -> None:
+                async with span(f"task-{index + 1}", type="task"):
+                    results[index] = await task
+            for i, task in enumerate(tasks):
+                tg.start_soon(run_task, i, task)
+    except ExceptionGroup as ex:
+        if len(ex.exceptions) == 1:
+            raise ex.exceptions[0] from None
+        else:
+            raise
+    return cast(list[T], results)

inspect_ai/util/_limit.py ADDED Viewed

@@ -0,0 +1,393 @@
+from __future__ import annotations
+import abc
+import logging
+from contextlib import ExitStack, contextmanager
+from contextvars import ContextVar
+from types import TracebackType
+from typing import TYPE_CHECKING, Iterator, Literal
+from inspect_ai._util.logger import warn_once
+if TYPE_CHECKING:
+    # These imports are used as type hints only - prevent circular imports.
+    from inspect_ai.model._model_output import ModelUsage
+    from inspect_ai.solver._task_state import TaskState
+logger = logging.getLogger(__name__)
+# Stores the current execution context's leaf _TokenLimitNode.
+# The resulting data structure is a tree of _TokenLimitNode nodes which each
+# have a pointer to their parent node. Each additional context manager inserts a new
+# child node into the tree. The fact that there can be multiple execution contexts is
+# what makes this a tree rather than a stack.
+token_limit_leaf_node: ContextVar[_TokenLimitNode | None] = ContextVar(
+    "token_limit_leaf_node", default=None
+)
+message_limit_leaf_node: ContextVar[_MessageLimitNode | None] = ContextVar(
+    "message_limit_leaf_node", default=None
+)
+class LimitExceededError(Exception):
+    """Exception raised when a limit is exceeded.
+    In some scenarios this error may be raised when `value >= limit` to
+    prevent another operation which is guaranteed to exceed the limit from being
+    wastefully performed.
+    Args:
+       type: Type of limit exceeded.
+       value: Value compared to.
+       limit: Limit applied.
+       message (str | None): Optional. Human readable message.
+    """
+    def __init__(
+        self,
+        type: Literal["message", "time", "working", "token", "operator", "custom"],
+        *,
+        value: int,
+        limit: int,
+        message: str | None = None,
+    ) -> None:
+        self.type = type
+        self.value = value
+        self.limit = limit
+        self.message = f"Exceeded {type} limit: {limit:,}"
+        super().__init__(message)
+    def with_state(self, state: TaskState) -> LimitExceededError:
+        warn_once(
+            logger,
+            "LimitExceededError.with_state() is deprecated (no longer required).",
+        )
+        return self
+class Limit(abc.ABC):
+    """Base class for all limits."""
+    @abc.abstractmethod
+    def __enter__(self) -> Limit:
+        pass
+    @abc.abstractmethod
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        pass
+@contextmanager
+def apply_limits(limits: list[Limit]) -> Iterator[None]:
+    """
+    Apply a list of limits within a context manager.
+    Args:
+      limits: List of limits to apply while the context manager is open. Should a
+        limit be exceeded, a LimitExceededError is raised.
+    """
+    with ExitStack() as stack:
+        for limit in limits:
+            stack.enter_context(limit)
+        yield
+def token_limit(limit: int | None) -> _TokenLimit:
+    """Limits the total number of tokens which can be used.
+    The counter starts when the context manager is opened and ends when it is closed.
+    The context manager can be opened multiple times, even in different execution
+    contexts.
+    These limits can be stacked.
+    This relies on "cooperative" checking - consumers must call check_token_limit()
+    themselves whenever tokens are consumed.
+    When a limit is exceeded, a LimitExceededError is raised.
+    Args:
+      limit: The maximum number of tokens that can be used while the context manager is
+        open. Tokens used before the context manager was opened are not counted. A value
+        of None means unlimited tokens.
+    """
+    return _TokenLimit(limit)
+def record_model_usage(usage: ModelUsage) -> None:
+    """Record model usage against any active token limits.
+    Does not check if the limit has been exceeded.
+    """
+    node = token_limit_leaf_node.get()
+    if node is None:
+        return
+    node.record(usage)
+def check_token_limit() -> None:
+    """Check if the current token usage exceeds _any_ of the token limits.
+    Within the current execution context (e.g. async task) and its parent contexts only.
+    Note that all active token limits are checked, not just the most recent one.
+    """
+    node = token_limit_leaf_node.get()
+    if node is None:
+        return
+    node.check()
+def message_limit(limit: int | None) -> _MessageLimit:
+    """Limits the number of messages in a conversation.
+    The total number of messages in the conversation are compared to the limit (not just
+    "new" messages). The context manager can be opened multiple times, even in different
+    execution contexts.
+    These limits can be stacked.
+    This relies on "cooperative" checking - consumers must call check_message_limit()
+    themselves whenever the message count is updated.
+    When a limit is exceeded, a LimitExceededError is raised.
+    Args:
+      limit: The maximum conversation length (number of messages) allowed while the
+        context manager is open. A value of None means unlimited messages.
+    """
+    return _MessageLimit(limit)
+def check_message_limit(count: int, raise_for_equal: bool) -> None:
+    """Check if the current message count exceeds the active message limit.
+    Only the most recent message limit is checked. Ancestors are not checked.
+    Args:
+      count: The number of messages in the conversation.
+      raise_for_equal: If True, raise an error if the message count is equal to the
+        limit, otherwise, only raise an error if the message count is greater than the
+        limit.
+    """
+    node = message_limit_leaf_node.get()
+    if node is None:
+        return
+    node.check(count, raise_for_equal)
+class _LimitValueWrapper:
+    """Container/wrapper type for the limit value.
+    This facilitates updating the limit value, which may have been passed to many
+    _TokenLimitNode instances.
+    """
+    def __init__(self, value: int | None) -> None:
+        self.value = value
+class _TokenLimit(Limit):
+    def __init__(self, limit: int | None) -> None:
+        self._validate_token_limit(limit)
+        self._limit_value_wrapper = _LimitValueWrapper(limit)
+    def __enter__(self) -> Limit:
+        current_node = token_limit_leaf_node.get()
+        new_node = _TokenLimitNode(self._limit_value_wrapper, current_node)
+        # Note that we don't store new_node as an instance variable, because the context
+        # manager may be used across multiple execution contexts, or opened multiple
+        # times.
+        token_limit_leaf_node.set(new_node)
+        return self
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        current_node = token_limit_leaf_node.get()
+        assert current_node is not None, (
+            "Token limit node should not be None when exiting context manager."
+        )
+        token_limit_leaf_node.set(current_node.parent)
+    @property
+    def limit(self) -> int | None:
+        """Get the configured token limit value."""
+        return self._limit_value_wrapper.value
+    @limit.setter
+    def limit(self, value: int | None) -> None:
+        """Update the token limit value.
+        This will affect the limit for all active token limit nodes derived from this
+        context manager.
+        This does not trigger a check of the token limit (which could now have been
+        exceeded).
+        """
+        self._validate_token_limit(value)
+        self._limit_value_wrapper.value = value
+    def _validate_token_limit(self, value: int | None) -> None:
+        if value is not None and value < 0:
+            raise ValueError("Token limit value must be a non-negative integer.")
+class _TokenLimitNode:
+    def __init__(
+        self,
+        limit: _LimitValueWrapper,
+        parent: _TokenLimitNode | None,
+    ) -> None:
+        """
+        Initialize a token limit node.
+        Forms part of a tree structure. Each node has a pointer to its parent, or None
+        if it is the root node.
+        Tracks the token usage for this node and its parent nodes and checks if the
+        usage has exceeded a (variable) limit.
+        Args:
+          limit: The maximum number of tokens that can be used while the context
+            manager is open.
+          parent: The parent node in the tree.
+        """
+        from inspect_ai.model._model_output import ModelUsage
+        self._limit = limit
+        self.parent = parent
+        self._usage = ModelUsage()
+    def record(self, usage: ModelUsage) -> None:
+        """Record model usage for this node and its parent nodes."""
+        if self.parent is not None:
+            self.parent.record(usage)
+        self._usage += usage
+    def check(self) -> None:
+        """Check if this token limit or any parent limits have been exceeded."""
+        self._check_self()
+        if self.parent is not None:
+            self.parent.check()
+    def _check_self(self) -> None:
+        from inspect_ai.log._transcript import SampleLimitEvent, transcript
+        if self._limit.value is None:
+            return
+        total = self._usage.total_tokens
+        if total > self._limit.value:
+            message = (
+                f"Token limit exceeded. value: {total:,}; limit: {self._limit.value:,}"
+            )
+            transcript()._event(
+                SampleLimitEvent(type="token", limit=self._limit.value, message=message)
+            )
+            raise LimitExceededError(
+                "token", value=total, limit=self._limit.value, message=message
+            )
+class _MessageLimit(Limit):
+    def __init__(self, limit: int | None) -> None:
+        self._validate_message_limit(limit)
+        self._limit_value_wrapper = _LimitValueWrapper(limit)
+    def __enter__(self) -> Limit:
+        current_node = message_limit_leaf_node.get()
+        new_node = _MessageLimitNode(self._limit_value_wrapper, current_node)
+        # Note that we don't store new_node as an instance variable, because the context
+        # manager may be used across multiple execution contexts, or opened multiple
+        # times.
+        message_limit_leaf_node.set(new_node)
+        return self
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        current_node = message_limit_leaf_node.get()
+        assert current_node is not None, (
+            "Message limit node should not be None when exiting context manager."
+        )
+        message_limit_leaf_node.set(current_node.parent)
+    @property
+    def limit(self) -> int | None:
+        """Get the configured message limit value."""
+        return self._limit_value_wrapper.value
+    @limit.setter
+    def limit(self, value: int | None) -> None:
+        """Update the message limit value.
+        This will affect the limit for all active message limit nodes derived from this
+        context manager.
+        This does not trigger a check of the message limit (which could now have been
+        exceeded).
+        """
+        self._validate_message_limit(value)
+        self._limit_value_wrapper.value = value
+    def _validate_message_limit(self, value: int | None) -> None:
+        if value is not None and value < 0:
+            raise ValueError("Message limit value must be a non-negative integer.")
+class _MessageLimitNode:
+    def __init__(
+        self,
+        limit: _LimitValueWrapper,
+        parent: _MessageLimitNode | None,
+    ) -> None:
+        """
+        Initialize a message limit node.
+        Forms part of a tree structure. Each node has a pointer to its parent, or None
+        if it is the root node.
+        Checks if the message count for this node has exceeded a (variable) limit.
+        Args:
+          limit: The maximum conversation length (number of messages) allowed while this
+            node is the lead node of the current execution context.
+          parent: The parent node in the tree.
+        """
+        self._limit = limit
+        self.parent = parent
+    def check(self, count: int, raise_for_equal: bool) -> None:
+        """Check if this message limit has been exceeded.
+        Does not check parents.
+        """
+        from inspect_ai.log._transcript import SampleLimitEvent, transcript
+        if self._limit.value is None:
+            return
+        limit = self._limit.value
+        if count > limit or (raise_for_equal and count == limit):
+            reached_or_exceeded = "reached" if count == limit else "exceeded"
+            message = (
+                f"Message limit {reached_or_exceeded}. count: {count:,}; "
+                f"limit: {limit:,}"
+            )
+            transcript()._event(
+                SampleLimitEvent(type="message", limit=limit, message=message)
+            )
+            raise LimitExceededError(
+                "message", value=count, limit=limit, message=message
+            )

inspect_ai/util/_limited_conversation.py ADDED Viewed

@@ -0,0 +1,57 @@
+from itertools import tee
+from typing import Iterable, SupportsIndex, overload
+from inspect_ai.model._chat_message import ChatMessage, ChatMessageBase
+from inspect_ai.util._limit import check_message_limit
+class ChatMessageList(list[ChatMessage]):
+    """A limited list of ChatMessage items.
+    Raises an exception if an operation would exceed the active message limit.
+    """
+    def __init__(self, iterable: Iterable[ChatMessage]):
+        items, length = self._iterable_length(iterable)
+        self._check_size(length)
+        super().__init__(items)
+    def _check_size(self, additional_items: int) -> None:
+        check_message_limit(len(self) + additional_items, raise_for_equal=False)
+    def append(self, item: ChatMessage) -> None:
+        self._check_size(1)
+        super().append(item)
+    def extend(self, items: Iterable[ChatMessage]) -> None:
+        items, length = self._iterable_length(items)
+        self._check_size(length)
+        super().extend(items)
+    def insert(self, index: SupportsIndex, item: ChatMessage) -> None:
+        self._check_size(1)
+        super().insert(index, item)
+    @overload
+    def __setitem__(self, index: SupportsIndex, item: ChatMessage) -> None: ...
+    @overload
+    def __setitem__(self, index: slice, item: Iterable[ChatMessage]) -> None: ...
+    def __setitem__(
+        self, index: SupportsIndex | slice, item: ChatMessage | Iterable[ChatMessage]
+    ) -> None:
+        if isinstance(index, slice) and not isinstance(item, ChatMessageBase):
+            item, length = self._iterable_length(item)
+            size_change = length - len(self[index])
+            if size_change > 0:
+                self._check_size(size_change)
+        super().__setitem__(index, item)  # type: ignore[assignment,index]
+    def _iterable_length(
+        self, items: Iterable[ChatMessage]
+    ) -> tuple[Iterable[ChatMessage], int]:
+        items, counter = tee(items)
+        length = sum(1 for _ in counter)
+        return items, length

inspect_ai/util/_span.py ADDED Viewed

@@ -0,0 +1,58 @@
+import contextlib
+from contextvars import ContextVar
+from typing import AsyncIterator
+from uuid import uuid4
+@contextlib.asynccontextmanager
+async def span(name: str, *, type: str | None = None) -> AsyncIterator[None]:
+    """Context manager for establishing a transcript span.
+    Args:
+        name (str): Step name.
+        type (str | None): Optional span type.
+    """
+    from inspect_ai.log._transcript import (
+        SpanBeginEvent,
+        SpanEndEvent,
+        track_store_changes,
+        transcript,
+    )
+    # span id
+    id = uuid4().hex
+    # capture parent id
+    parent_id = _current_span_id.get()
+    # set new current span (reset at the end)
+    token = _current_span_id.set(id)
+    # run the span
+    try:
+        # span begin event
+        transcript()._event(
+            SpanBeginEvent(
+                id=id,
+                parent_id=parent_id,
+                type=type,
+                name=name,
+            )
+        )
+        # run span w/ store change events
+        with track_store_changes():
+            yield
+    finally:
+        # send end event
+        transcript()._event(SpanEndEvent(id=id))
+        _current_span_id.reset(token)
+def current_span_id() -> str | None:
+    return _current_span_id.get()
+_current_span_id: ContextVar[str | None] = ContextVar("_current_span_id", default=None)

inspect_ai/util/_subtask.py CHANGED Viewed

@@ -16,6 +16,7 @@ from inspect_ai._util._async import is_callable_coroutine, tg_collect
 from inspect_ai._util.content import Content
 from inspect_ai._util.trace import trace_action
 from inspect_ai._util.working import sample_waiting_time
+from inspect_ai.util._span import span
 from inspect_ai.util._store import Store, dict_jsonable, init_subtask_store
 SubtaskResult = str | int | float | bool | list[Content]
@@ -85,9 +86,7 @@ def subtask(
     def create_subtask_wrapper(func: Subtask, name: str | None = None) -> Subtask:
         from inspect_ai.log._transcript import (
-            Event,
             SubtaskEvent,
-            track_store_changes,
             transcript,
         )
@@ -118,43 +117,41 @@ def subtask(
                 log_input = dict_jsonable(log_input | kwargs)
             # create coroutine so we can provision a subtask contextvars
-            async def run() -> tuple[RT, list[Event]]:
+            async def run() -> RT:
                 # initialise subtask (provisions store and transcript)
-                init_subtask(subtask_name, store if store else Store())
+                init_subtask_store(store if store else Store())
                 # run the subtask
                 with trace_action(logger, "Subtask", subtask_name):
-                    with track_store_changes():  # type: ignore
+                    async with span(name=subtask_name, type="subtask"):
+                        # create subtask event
+                        waiting_time_start = sample_waiting_time()
+                        event = SubtaskEvent(
+                            name=subtask_name, input=log_input, type=type, pending=True
+                        )
+                        transcript()._event(event)
+                        # run the subtask
                         result = await func(*args, **kwargs)
-                # return result and event
-                return result, list(transcript().events)
+                        # time accounting
+                        completed = datetime.now()
+                        waiting_time_end = sample_waiting_time()
+                        event.completed = completed
+                        event.working_time = (
+                            completed - event.timestamp
+                        ).total_seconds() - (waiting_time_end - waiting_time_start)
-            # create subtask event
-            waiting_time_start = sample_waiting_time()
-            event = SubtaskEvent(
-                name=subtask_name, input=log_input, type=type, pending=True
-            )
-            transcript()._event(event)
-            # create and run the task as a coroutine
-            result, events = (await tg_collect([run]))[0]
-            # time accounting
-            completed = datetime.now()
-            waiting_time_end = sample_waiting_time()
-            event.completed = completed
-            event.working_time = (completed - event.timestamp).total_seconds() - (
-                waiting_time_end - waiting_time_start
-            )
+                        # update event
+                        event.result = result
+                        event.pending = None
+                        transcript()._event_updated(event)
-            # update event
-            event.result = result
-            event.events = events
-            event.pending = None
-            transcript()._event_updated(event)
+                        # return result
+                        return result  # type: ignore[no-any-return]
-            # return result
+            # create and run the task as a coroutine
+            result = (await tg_collect([run]))[0]
             return result
         return run_subtask
@@ -167,15 +164,3 @@ def subtask(
         return wrapper
     else:
         return create_subtask_wrapper(name)
-def init_subtask(name: str, store: Store) -> Any:
-    from inspect_ai.log._transcript import (
-        Transcript,
-        init_transcript,
-    )
-    init_subtask_store(store)
-    transcript = Transcript(name=name)
-    init_transcript(transcript)
-    return transcript

{inspect_ai-0.3.92.dist-info → inspect_ai-0.3.94.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: inspect_ai
-Version: 0.3.92
+Version: 0.3.94
 Summary: Framework for large language model evaluations
 Author: UK AI Security Institute
 License: MIT License

inspect-ai 0.3.92__py3-none-any.whl → 0.3.94__py3-none-any.whl

inspect-ai 0.3.92py3-none-any.whl → 0.3.94py3-none-any.whl