PyPI - inspect-ai - Versions diffs - 0.3.98__py3-none-any.whl → 0.3.100__py3-none-any.whl - Mend

inspect-ai 0.3.98py3-none-any.whl → 0.3.100py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

inspect_ai/__init__.py +2 -0
inspect_ai/_cli/log.py +1 -1
inspect_ai/_display/core/config.py +11 -5
inspect_ai/_display/core/panel.py +66 -2
inspect_ai/_display/core/textual.py +5 -2
inspect_ai/_display/plain/display.py +1 -0
inspect_ai/_display/rich/display.py +2 -2
inspect_ai/_display/textual/widgets/transcript.py +41 -1
inspect_ai/_eval/run.py +12 -4
inspect_ai/_eval/score.py +2 -4
inspect_ai/_eval/task/log.py +1 -1
inspect_ai/_eval/task/run.py +59 -81
inspect_ai/_eval/task/task.py +1 -1
inspect_ai/_util/_async.py +1 -1
inspect_ai/_util/content.py +11 -6
inspect_ai/_util/interrupt.py +2 -2
inspect_ai/_util/text.py +7 -0
inspect_ai/_util/working.py +8 -37
inspect_ai/_view/__init__.py +0 -0
inspect_ai/_view/schema.py +3 -1
inspect_ai/_view/view.py +14 -0
inspect_ai/_view/www/CLAUDE.md +15 -0
inspect_ai/_view/www/dist/assets/index.css +273 -169
inspect_ai/_view/www/dist/assets/index.js +20079 -17019
inspect_ai/_view/www/log-schema.json +122 -8
inspect_ai/_view/www/package.json +5 -1
inspect_ai/_view/www/src/@types/log.d.ts +20 -2
inspect_ai/_view/www/src/app/App.tsx +1 -15
inspect_ai/_view/www/src/app/appearance/icons.ts +4 -1
inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +24 -6
inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +0 -5
inspect_ai/_view/www/src/app/content/RenderedContent.tsx +221 -205
inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +2 -1
inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +5 -0
inspect_ai/_view/www/src/app/routing/url.ts +84 -4
inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +0 -5
inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +1 -1
inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +7 -0
inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +26 -19
inspect_ai/_view/www/src/app/samples/SampleSummaryView.module.css +1 -2
inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +8 -6
inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +0 -4
inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +3 -2
inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +2 -0
inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +2 -0
inspect_ai/_view/www/src/app/samples/chat/messages.ts +1 -0
inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -0
inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +1 -1
inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.module.css +2 -2
inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +2 -3
inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +1 -2
inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.module.css +1 -1
inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +3 -2
inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +4 -5
inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +1 -1
inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +1 -2
inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +1 -3
inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +1 -2
inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +3 -4
inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.module.css +42 -0
inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.tsx +77 -0
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +27 -71
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +13 -3
inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +27 -2
inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +1 -0
inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +21 -22
inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.module.css +45 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +223 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.module.css +10 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +258 -0
inspect_ai/_view/www/src/app/samples/transcript/outline/tree-visitors.ts +187 -0
inspect_ai/_view/www/src/app/samples/transcript/state/StateEventRenderers.tsx +8 -1
inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +3 -4
inspect_ai/_view/www/src/app/samples/transcript/transform/hooks.ts +78 -0
inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +340 -135
inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +3 -0
inspect_ai/_view/www/src/app/samples/transcript/types.ts +2 -0
inspect_ai/_view/www/src/app/types.ts +5 -1
inspect_ai/_view/www/src/client/api/api-browser.ts +2 -2
inspect_ai/_view/www/src/components/LiveVirtualList.tsx +6 -1
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +1 -1
inspect_ai/_view/www/src/components/PopOver.tsx +422 -0
inspect_ai/_view/www/src/components/PulsingDots.module.css +9 -9
inspect_ai/_view/www/src/components/PulsingDots.tsx +4 -1
inspect_ai/_view/www/src/components/StickyScroll.tsx +183 -0
inspect_ai/_view/www/src/components/TabSet.tsx +4 -0
inspect_ai/_view/www/src/state/hooks.ts +52 -2
inspect_ai/_view/www/src/state/logSlice.ts +4 -3
inspect_ai/_view/www/src/state/samplePolling.ts +8 -0
inspect_ai/_view/www/src/state/sampleSlice.ts +53 -9
inspect_ai/_view/www/src/state/scrolling.ts +152 -0
inspect_ai/_view/www/src/utils/attachments.ts +7 -0
inspect_ai/_view/www/src/utils/python.ts +18 -0
inspect_ai/_view/www/yarn.lock +269 -6
inspect_ai/agent/_react.py +12 -7
inspect_ai/agent/_run.py +46 -11
inspect_ai/analysis/beta/_dataframe/samples/table.py +19 -18
inspect_ai/log/_bundle.py +5 -3
inspect_ai/log/_log.py +3 -3
inspect_ai/log/_recorders/file.py +2 -9
inspect_ai/log/_transcript.py +1 -1
inspect_ai/model/_call_tools.py +6 -2
inspect_ai/model/_openai.py +1 -1
inspect_ai/model/_openai_responses.py +78 -39
inspect_ai/model/_openai_web_search.py +31 -0
inspect_ai/model/_providers/anthropic.py +3 -6
inspect_ai/model/_providers/azureai.py +72 -3
inspect_ai/model/_providers/openai.py +2 -1
inspect_ai/model/_providers/providers.py +1 -1
inspect_ai/scorer/_metric.py +1 -2
inspect_ai/solver/_task_state.py +2 -2
inspect_ai/tool/_tool.py +6 -2
inspect_ai/tool/_tool_def.py +27 -4
inspect_ai/tool/_tool_info.py +2 -0
inspect_ai/tool/_tools/_web_search/_google.py +15 -4
inspect_ai/tool/_tools/_web_search/_tavily.py +35 -12
inspect_ai/tool/_tools/_web_search/_web_search.py +214 -45
inspect_ai/util/__init__.py +6 -0
inspect_ai/util/_json.py +3 -0
inspect_ai/util/_limit.py +374 -141
inspect_ai/util/_sandbox/docker/compose.py +20 -11
inspect_ai/util/_span.py +1 -1
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/METADATA +3 -3
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/RECORD +131 -117
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/WHEEL +1 -1
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/licenses/LICENSE +0 -0
{inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/top_level.txt +0 -0

inspect_ai/util/_limit.py CHANGED Viewed

@@ -5,7 +5,10 @@ import logging
 from contextlib import ExitStack, contextmanager
 from contextvars import ContextVar
 from types import TracebackType
-from typing import TYPE_CHECKING, Iterator, Literal
+from typing import TYPE_CHECKING, Generic, Iterator, Literal, TypeVar
+import anyio
+from typing_extensions import Self
 from inspect_ai._util.logger import warn_once
@@ -16,18 +19,7 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-# Stores the current execution context's leaf _TokenLimitNode.
-# The resulting data structure is a tree of _TokenLimitNode nodes which each
-# have a pointer to their parent node. Each additional context manager inserts a new
-# child node into the tree. The fact that there can be multiple execution contexts is
-# what makes this a tree rather than a stack.
-token_limit_leaf_node: ContextVar[_TokenLimitNode | None] = ContextVar(
-    "token_limit_leaf_node", default=None
-)
-message_limit_leaf_node: ContextVar[_MessageLimitNode | None] = ContextVar(
-    "message_limit_leaf_node", default=None
-)
+TNode = TypeVar("TNode", bound="_Node")
 class LimitExceededError(Exception):
@@ -42,20 +34,25 @@ class LimitExceededError(Exception):
        value: Value compared to.
        limit: Limit applied.
        message (str | None): Optional. Human readable message.
+       source (Limit | None): Optional. The `Limit` instance which was responsible for raising this error.
     """
     def __init__(
         self,
         type: Literal["message", "time", "working", "token", "operator", "custom"],
         *,
-        value: int,
-        limit: int,
+        value: float,
+        limit: float,
         message: str | None = None,
+        source: Limit | None = None,
     ) -> None:
         self.type = type
         self.value = value
+        self.value_str = self._format_float_or_int(value)
         self.limit = limit
+        self.limit_str = self._format_float_or_int(limit)
         self.message = f"Exceeded {type} limit: {limit:,}"
+        self.source = source
         super().__init__(message)
     def with_state(self, state: TaskState) -> LimitExceededError:
@@ -65,9 +62,18 @@ class LimitExceededError(Exception):
         )
         return self
+    def _format_float_or_int(self, value: float | int) -> str:
+        if isinstance(value, int):
+            return f"{value:,}"
+        else:
+            return f"{value:,.2f}"
 class Limit(abc.ABC):
-    """Base class for all limits."""
+    """Base class for all limit context managers."""
+    def __init__(self) -> None:
+        self._entered = False
     @abc.abstractmethod
     def __enter__(self) -> Limit:
@@ -82,35 +88,81 @@ class Limit(abc.ABC):
     ) -> None:
         pass
+    @property
+    @abc.abstractmethod
+    def usage(self) -> float:
+        """The current usage of the resource being limited."""
+        pass
+    def _check_reuse(self) -> None:
+        if self._entered:
+            raise RuntimeError(
+                "Each Limit may only be used once in a single 'with' block. Please "
+                "create a new instance of the Limit."
+            )
+        self._entered = True
 @contextmanager
-def apply_limits(limits: list[Limit]) -> Iterator[None]:
+def apply_limits(
+    limits: list[Limit], catch_errors: bool = False
+) -> Iterator[LimitScope]:
     """
     Apply a list of limits within a context manager.
+    Optionally catches any `LimitExceededError` raised by the applied limits, while
+    allowing other limit errors from any other scope (e.g. the Sample level) to
+    propagate.
+    Yields a `LimitScope` object which can be used once the context manager is closed
+    to determine which, if any, limits were exceeded.
     Args:
       limits: List of limits to apply while the context manager is open. Should a
-        limit be exceeded, a LimitExceededError is raised.
+        limit be exceeded, a `LimitExceededError` is raised.
+      catch_errors: If True, catch any `LimitExceededError` raised by the applied
+        limits. Callers can determine whether any limits were exceeded by checking the
+        limit_error property of the `LimitScope` object yielded by this function. If
+        False, all `LimitExceededError` exceptions will be allowed to propagate.
+    """
+    limit_scope = LimitScope()
+    # Try scope is outside the `with ExitStack()` so that we can catch any errors raised
+    # when exiting it (which will be where time_limit() would raise LimitExceededError).
+    try:
+        with ExitStack() as stack:
+            for limit in limits:
+                stack.enter_context(limit)
+            yield limit_scope
+    except LimitExceededError as e:
+        # If it was not one of the limits we applied.
+        if e.source is None or e.source not in limits:
+            raise
+        limit_scope.limit_error = e
+        if not catch_errors:
+            raise
+class LimitScope:
+    """Object returned from `apply_limits()`.
+    Used to check which, if any, limits were exceeded.
     """
-    with ExitStack() as stack:
-        for limit in limits:
-            stack.enter_context(limit)
-        yield
+    def __init__(self) -> None:
+        self.limit_error: LimitExceededError | None = None
 def token_limit(limit: int | None) -> _TokenLimit:
     """Limits the total number of tokens which can be used.
     The counter starts when the context manager is opened and ends when it is closed.
-    The context manager can be opened multiple times, even in different execution
-    contexts.
     These limits can be stacked.
-    This relies on "cooperative" checking - consumers must call check_token_limit()
+    This relies on "cooperative" checking - consumers must call `check_token_limit()`
     themselves whenever tokens are consumed.
-    When a limit is exceeded, a LimitExceededError is raised.
+    When a limit is exceeded, a `LimitExceededError` is raised.
     Args:
       limit: The maximum number of tokens that can be used while the context manager is
@@ -125,7 +177,7 @@ def record_model_usage(usage: ModelUsage) -> None:
     Does not check if the limit has been exceeded.
     """
-    node = token_limit_leaf_node.get()
+    node = token_limit_tree.get()
     if node is None:
         return
     node.record(usage)
@@ -138,7 +190,7 @@ def check_token_limit() -> None:
     Note that all active token limits are checked, not just the most recent one.
     """
-    node = token_limit_leaf_node.get()
+    node = token_limit_tree.get()
     if node is None:
         return
     node.check()
@@ -148,15 +200,14 @@ def message_limit(limit: int | None) -> _MessageLimit:
     """Limits the number of messages in a conversation.
     The total number of messages in the conversation are compared to the limit (not just
-    "new" messages). The context manager can be opened multiple times, even in different
-    execution contexts.
+    "new" messages).
     These limits can be stacked.
     This relies on "cooperative" checking - consumers must call check_message_limit()
     themselves whenever the message count is updated.
-    When a limit is exceeded, a LimitExceededError is raised.
+    When a limit is exceeded, a `LimitExceededError` is raised.
     Args:
       limit: The maximum conversation length (number of messages) allowed while the
@@ -176,35 +227,135 @@ def check_message_limit(count: int, raise_for_equal: bool) -> None:
         limit, otherwise, only raise an error if the message count is greater than the
         limit.
     """
-    node = message_limit_leaf_node.get()
+    node = message_limit_tree.get()
     if node is None:
         return
     node.check(count, raise_for_equal)
-class _LimitValueWrapper:
-    """Container/wrapper type for the limit value.
+def time_limit(limit: float | None) -> _TimeLimit:
+    """Limits the wall clock time which can elapse.
+    The timer starts when the context manager is opened and stops when it is closed.
+    These limits can be stacked.
+    When a limit is exceeded, the code block is cancelled and a `LimitExceededError` is
+    raised.
+    Uses anyio's cancellation scopes meaning that the operations within the context
+    manager block are cancelled if the limit is exceeded. The `LimitExceededError` is
+    therefore raised at the level that the `time_limit()` context manager was opened,
+    not at the level of the operation which caused the limit to be exceeded (e.g. a call
+    to `generate()`). Ensure you handle `LimitExceededError` at the level of opening the context manager.
+    Args:
+      limit: The maximum number of seconds that can pass while the context manager is
+        open. A value of None means unlimited time.
+    """
+    return _TimeLimit(limit)
+def working_limit(limit: float | None) -> _WorkingLimit:
+    """Limits the working time which can elapse.
+    Working time is the wall clock time minus any waiting time e.g. waiting before
+    retrying in response to rate limits or waiting on a semaphore.
-    This facilitates updating the limit value, which may have been passed to many
-    _TokenLimitNode instances.
+    The timer starts when the context manager is opened and stops when it is closed.
+    These limits can be stacked.
+    When a limit is exceeded, a `LimitExceededError` is raised.
+    Args:
+      limit: The maximum number of seconds of working that can pass while the context
+        manager is open. A value of None means unlimited time.
     """
+    return _WorkingLimit(limit)
+def record_waiting_time(waiting_time: float) -> None:
+    node = working_limit_tree.get()
+    if node is None:
+        return
+    node.record_waiting_time(waiting_time)
+def check_working_limit() -> None:
+    node = working_limit_tree.get()
+    if node is None:
+        return
+    node.check()
-    def __init__(self, value: int | None) -> None:
-        self.value = value
+class _Tree(Generic[TNode]):
+    """A tree data structure of limit nodes.
-class _TokenLimit(Limit):
+    Each node has a pointer to its parent, or None if it is a root node.
+    Each additional context manager inserts a new child node into the tree. The fact
+    that there can be multiple execution contexts is what makes this a tree rather than
+    a stack and why a context variable is used to store the leaf node.
+    """
+    def __init__(self, id: str) -> None:
+        self._leaf_node: ContextVar[TNode | None] = ContextVar(id, default=None)
+    def get(self) -> TNode | None:
+        return self._leaf_node.get()
+    def push(self, new_node: TNode) -> None:
+        current_leaf = self._leaf_node.get()
+        new_node.parent = current_leaf
+        self._leaf_node.set(new_node)
+    def pop(self) -> TNode:
+        current_leaf = self._leaf_node.get()
+        if current_leaf is None:
+            raise RuntimeError("Limit tree is empty. Cannot pop from an empty tree.")
+        self._leaf_node.set(current_leaf.parent)
+        return current_leaf
+token_limit_tree: _Tree[_TokenLimit] = _Tree("token_limit_tree")
+# Store the message limit leaf node so that we know which limit to check in
+# check_message_limit().
+message_limit_tree: _Tree[_MessageLimit] = _Tree("message_limit_tree")
+working_limit_tree: _Tree[_WorkingLimit] = _Tree("working_limit_tree")
+class _Node:
+    """Mixin for objects used as nodes in a limit tree.
+    This allows us to have an "internal" parent property which is not exported as part
+    of the public API.
+    """
+    parent: Self | None
+    def _pop_and_check_identity(self, tree: _Tree[TNode]) -> None:
+        popped = tree.pop()
+        if popped is not self:
+            raise RuntimeError(
+                "The limit context manager being closed is not the leaf node in the "
+                "tree. Make sure to open and close the context managers in a "
+                "stack-like manner using a `with` statement."
+            )
+class _TokenLimit(Limit, _Node):
     def __init__(self, limit: int | None) -> None:
+        from inspect_ai.model._model_output import ModelUsage
+        super().__init__()
         self._validate_token_limit(limit)
-        self._limit_value_wrapper = _LimitValueWrapper(limit)
+        self._limit = limit
+        self._usage = ModelUsage()
     def __enter__(self) -> Limit:
-        current_node = token_limit_leaf_node.get()
-        new_node = _TokenLimitNode(self._limit_value_wrapper, current_node)
-        # Note that we don't store new_node as an instance variable, because the context
-        # manager may be used across multiple execution contexts, or opened multiple
-        # times.
-        token_limit_leaf_node.set(new_node)
+        super()._check_reuse()
+        token_limit_tree.push(self)
         return self
     def __exit__(
@@ -213,103 +364,75 @@ class _TokenLimit(Limit):
         exc_val: BaseException | None,
         exc_tb: TracebackType | None,
     ) -> None:
-        current_node = token_limit_leaf_node.get()
-        assert current_node is not None, (
-            "Token limit node should not be None when exiting context manager."
-        )
-        token_limit_leaf_node.set(current_node.parent)
+        self._pop_and_check_identity(token_limit_tree)
+    @property
+    def usage(self) -> float:
+        return self._usage.total_tokens
     @property
     def limit(self) -> int | None:
         """Get the configured token limit value."""
-        return self._limit_value_wrapper.value
+        return self._limit
     @limit.setter
     def limit(self, value: int | None) -> None:
         """Update the token limit value.
-        This will affect the limit for all active token limit nodes derived from this
-        context manager.
         This does not trigger a check of the token limit (which could now have been
         exceeded).
         """
         self._validate_token_limit(value)
-        self._limit_value_wrapper.value = value
-    def _validate_token_limit(self, value: int | None) -> None:
-        if value is not None and value < 0:
-            raise ValueError("Token limit value must be a non-negative integer.")
-class _TokenLimitNode:
-    def __init__(
-        self,
-        limit: _LimitValueWrapper,
-        parent: _TokenLimitNode | None,
-    ) -> None:
-        """
-        Initialize a token limit node.
-        Forms part of a tree structure. Each node has a pointer to its parent, or None
-        if it is the root node.
-        Tracks the token usage for this node and its parent nodes and checks if the
-        usage has exceeded a (variable) limit.
-        Args:
-          limit: The maximum number of tokens that can be used while the context
-            manager is open.
-          parent: The parent node in the tree.
-        """
-        from inspect_ai.model._model_output import ModelUsage
-        self._limit = limit
-        self.parent = parent
-        self._usage = ModelUsage()
+        self._limit = value
     def record(self, usage: ModelUsage) -> None:
-        """Record model usage for this node and its parent nodes."""
+        """Record model usage for this node and its ancestor nodes."""
         if self.parent is not None:
             self.parent.record(usage)
         self._usage += usage
     def check(self) -> None:
-        """Check if this token limit or any parent limits have been exceeded."""
-        self._check_self()
+        """Check if this token limit or any ancestor limits have been exceeded.
+        The checks occur from root to leaf. This is so that if multiple limits are
+        simultaneously exceeded, the outermost (closest to root) one raises the error,
+        preventing certain sub-agent architectures from ending up in an infinite loop.
+        """
         if self.parent is not None:
             self.parent.check()
+        self._check_self()
+    def _validate_token_limit(self, value: int | None) -> None:
+        if value is not None and value < 0:
+            raise ValueError(
+                f"Token limit value must be a non-negative integer or None: {value}"
+            )
     def _check_self(self) -> None:
         from inspect_ai.log._transcript import SampleLimitEvent, transcript
-        if self._limit.value is None:
+        if self.limit is None:
             return
         total = self._usage.total_tokens
-        if total > self._limit.value:
-            message = (
-                f"Token limit exceeded. value: {total:,}; limit: {self._limit.value:,}"
-            )
+        if total > self.limit:
+            message = f"Token limit exceeded. value: {total:,}; limit: {self.limit:,}"
             transcript()._event(
-                SampleLimitEvent(type="token", limit=self._limit.value, message=message)
+                SampleLimitEvent(type="token", limit=self.limit, message=message)
             )
             raise LimitExceededError(
-                "token", value=total, limit=self._limit.value, message=message
+                "token", value=total, limit=self.limit, message=message, source=self
             )
-class _MessageLimit(Limit):
+class _MessageLimit(Limit, _Node):
     def __init__(self, limit: int | None) -> None:
+        super().__init__()
         self._validate_message_limit(limit)
-        self._limit_value_wrapper = _LimitValueWrapper(limit)
+        self._limit = limit
     def __enter__(self) -> Limit:
-        current_node = message_limit_leaf_node.get()
-        new_node = _MessageLimitNode(self._limit_value_wrapper, current_node)
-        # Note that we don't store new_node as an instance variable, because the context
-        # manager may be used across multiple execution contexts, or opened multiple
-        # times.
-        message_limit_leaf_node.set(new_node)
+        super()._check_reuse()
+        message_limit_tree.push(self)
         return self
     def __exit__(
@@ -318,16 +441,19 @@ class _MessageLimit(Limit):
         exc_val: BaseException | None,
         exc_tb: TracebackType | None,
     ) -> None:
-        current_node = message_limit_leaf_node.get()
-        assert current_node is not None, (
-            "Message limit node should not be None when exiting context manager."
+        self._pop_and_check_identity(message_limit_tree)
+    @property
+    def usage(self) -> float:
+        raise NotImplementedError(
+            "Retrieving the message count from a limit is not supported. Please query "
+            "the messages property on the task or agent state instead."
         )
-        message_limit_leaf_node.set(current_node.parent)
     @property
     def limit(self) -> int | None:
         """Get the configured message limit value."""
-        return self._limit_value_wrapper.value
+        return self._limit
     @limit.setter
     def limit(self, value: int | None) -> None:
@@ -340,54 +466,161 @@ class _MessageLimit(Limit):
         exceeded).
         """
         self._validate_message_limit(value)
-        self._limit_value_wrapper.value = value
+        self._limit = value
+    def check(self, count: int, raise_for_equal: bool) -> None:
+        """Check if this message limit has been exceeded.
+        Does not check ancestors.
+        """
+        from inspect_ai.log._transcript import SampleLimitEvent, transcript
+        if self.limit is None:
+            return
+        if count > self.limit or (raise_for_equal and count == self.limit):
+            reached_or_exceeded = "reached" if count == self.limit else "exceeded"
+            message = (
+                f"Message limit {reached_or_exceeded}. count: {count:,}; "
+                f"limit: {self.limit:,}"
+            )
+            transcript()._event(
+                SampleLimitEvent(type="message", limit=self.limit, message=message)
+            )
+            raise LimitExceededError(
+                "message", value=count, limit=self.limit, message=message, source=self
+            )
     def _validate_message_limit(self, value: int | None) -> None:
         if value is not None and value < 0:
-            raise ValueError("Message limit value must be a non-negative integer.")
+            raise ValueError(
+                f"Message limit value must be a non-negative integer or None: {value}"
+            )
-class _MessageLimitNode:
-    def __init__(
+class _TimeLimit(Limit):
+    def __init__(self, limit: float | None) -> None:
+        super().__init__()
+        _validate_time_limit("Time", limit)
+        self._limit = limit
+        self._start_time: float | None = None
+        self._end_time: float | None = None
+    def __enter__(self) -> Limit:
+        super()._check_reuse()
+        # Unlike the other limits, this one is not stored in a tree. Anyio handles all
+        # of the state.
+        self._cancel_scope = anyio.move_on_after(self._limit)
+        self._cancel_scope.__enter__()
+        self._start_time = anyio.current_time()
+        return self
+    def __exit__(
         self,
-        limit: _LimitValueWrapper,
-        parent: _MessageLimitNode | None,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
     ) -> None:
-        """
-        Initialize a message limit node.
-        Forms part of a tree structure. Each node has a pointer to its parent, or None
-        if it is the root node.
+        from inspect_ai.log._transcript import SampleLimitEvent, transcript
-        Checks if the message count for this node has exceeded a (variable) limit.
+        self._cancel_scope.__exit__(exc_type, exc_val, exc_tb)
+        self._end_time = anyio.current_time()
+        if self._cancel_scope.cancel_called and self._limit is not None:
+            message = f"Time limit exceeded. limit: {self._limit} seconds"
+            assert self._start_time is not None
+            # Note we've measured the elapsed time independently of anyio's cancel scope
+            # so this is an approximation.
+            time_elapsed = self._end_time - self._start_time
+            transcript()._event(
+                SampleLimitEvent(type="time", message=message, limit=self._limit)
+            )
+            raise LimitExceededError(
+                "time",
+                value=time_elapsed,
+                limit=self._limit,
+                message=message,
+                source=self,
+            ) from exc_val
-        Args:
-          limit: The maximum conversation length (number of messages) allowed while this
-            node is the lead node of the current execution context.
-          parent: The parent node in the tree.
-        """
+    @property
+    def usage(self) -> float:
+        if self._start_time is None:
+            return 0.0
+        if self._end_time is None:
+            return anyio.current_time() - self._start_time
+        return self._end_time - self._start_time
+class _WorkingLimit(Limit, _Node):
+    def __init__(self, limit: float | None) -> None:
+        super().__init__()
+        _validate_time_limit("Working time", limit)
         self._limit = limit
-        self.parent = parent
+        self.parent: _WorkingLimit | None = None
+        self._start_time: float | None = None
+        self._end_time: float | None = None
-    def check(self, count: int, raise_for_equal: bool) -> None:
-        """Check if this message limit has been exceeded.
+    def __enter__(self) -> Limit:
+        super()._check_reuse()
+        self._start_time = anyio.current_time()
+        self._waiting_time = 0.0
+        working_limit_tree.push(self)
+        return self
-        Does not check parents.
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc_val: BaseException | None,
+        exc_tb: TracebackType | None,
+    ) -> None:
+        self._end_time = anyio.current_time()
+        self._pop_and_check_identity(working_limit_tree)
+    @property
+    def usage(self) -> float:
+        if self._start_time is None:
+            return 0.0
+        if self._end_time is None:
+            return anyio.current_time() - self._start_time - self._waiting_time
+        return self._end_time - self._start_time - self._waiting_time
+    def record_waiting_time(self, waiting_time: float) -> None:
+        """Record waiting time for this node and its ancestor nodes."""
+        if self.parent is not None:
+            self.parent.record_waiting_time(waiting_time)
+        self._waiting_time += waiting_time
+    def check(self) -> None:
+        """Check if this working time limit or any ancestor limits have been exceeded.
+        The checks occur from root to leaf. This is so that if multiple limits are
+        simultaneously exceeded, the outermost (closest to root) one raises the error,
+        preventing certain sub-agent architectures from ending up in an infinite loop.
         """
+        if self.parent is not None:
+            self.parent.check()
+        self._check_self()
+    def _check_self(self) -> None:
         from inspect_ai.log._transcript import SampleLimitEvent, transcript
-        if self._limit.value is None:
+        if self._limit is None:
             return
-        limit = self._limit.value
-        if count > limit or (raise_for_equal and count == limit):
-            reached_or_exceeded = "reached" if count == limit else "exceeded"
-            message = (
-                f"Message limit {reached_or_exceeded}. count: {count:,}; "
-                f"limit: {limit:,}"
-            )
+        if self.usage > self._limit:
+            message = f"Working time limit exceeded. limit: {self._limit} seconds"
             transcript()._event(
-                SampleLimitEvent(type="message", limit=limit, message=message)
+                SampleLimitEvent(type="working", message=message, limit=self._limit)
             )
             raise LimitExceededError(
-                "message", value=count, limit=limit, message=message
+                "working",
+                value=self.usage,
+                limit=self._limit,
+                message=message,
+                source=self,
             )
+def _validate_time_limit(name: str, value: float | None) -> None:
+    if value is not None and value < 0:
+        raise ValueError(
+            f"{name} limit value must be a non-negative float or None: {value}"
+        )

inspect-ai 0.3.98__py3-none-any.whl → 0.3.100__py3-none-any.whl

inspect-ai 0.3.98py3-none-any.whl → 0.3.100py3-none-any.whl