inspect-ai 0.3.98__py3-none-any.whl → 0.3.100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -0
- inspect_ai/_cli/log.py +1 -1
- inspect_ai/_display/core/config.py +11 -5
- inspect_ai/_display/core/panel.py +66 -2
- inspect_ai/_display/core/textual.py +5 -2
- inspect_ai/_display/plain/display.py +1 -0
- inspect_ai/_display/rich/display.py +2 -2
- inspect_ai/_display/textual/widgets/transcript.py +41 -1
- inspect_ai/_eval/run.py +12 -4
- inspect_ai/_eval/score.py +2 -4
- inspect_ai/_eval/task/log.py +1 -1
- inspect_ai/_eval/task/run.py +59 -81
- inspect_ai/_eval/task/task.py +1 -1
- inspect_ai/_util/_async.py +1 -1
- inspect_ai/_util/content.py +11 -6
- inspect_ai/_util/interrupt.py +2 -2
- inspect_ai/_util/text.py +7 -0
- inspect_ai/_util/working.py +8 -37
- inspect_ai/_view/__init__.py +0 -0
- inspect_ai/_view/schema.py +3 -1
- inspect_ai/_view/view.py +14 -0
- inspect_ai/_view/www/CLAUDE.md +15 -0
- inspect_ai/_view/www/dist/assets/index.css +273 -169
- inspect_ai/_view/www/dist/assets/index.js +20079 -17019
- inspect_ai/_view/www/log-schema.json +122 -8
- inspect_ai/_view/www/package.json +5 -1
- inspect_ai/_view/www/src/@types/log.d.ts +20 -2
- inspect_ai/_view/www/src/app/App.tsx +1 -15
- inspect_ai/_view/www/src/app/appearance/icons.ts +4 -1
- inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +24 -6
- inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +0 -5
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +221 -205
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +2 -1
- inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +5 -0
- inspect_ai/_view/www/src/app/routing/url.ts +84 -4
- inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +0 -5
- inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +26 -19
- inspect_ai/_view/www/src/app/samples/SampleSummaryView.module.css +1 -2
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +8 -6
- inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +0 -4
- inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +3 -2
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +2 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +2 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +1 -0
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -0
- inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.module.css +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +2 -3
- inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +1 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.module.css +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +3 -2
- inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +4 -5
- inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +1 -2
- inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +1 -3
- inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +1 -2
- inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +3 -4
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.module.css +42 -0
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.tsx +77 -0
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +27 -71
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +13 -3
- inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +27 -2
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +1 -0
- inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +21 -22
- inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.module.css +45 -0
- inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +223 -0
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +258 -0
- inspect_ai/_view/www/src/app/samples/transcript/outline/tree-visitors.ts +187 -0
- inspect_ai/_view/www/src/app/samples/transcript/state/StateEventRenderers.tsx +8 -1
- inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +3 -4
- inspect_ai/_view/www/src/app/samples/transcript/transform/hooks.ts +78 -0
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +340 -135
- inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +3 -0
- inspect_ai/_view/www/src/app/samples/transcript/types.ts +2 -0
- inspect_ai/_view/www/src/app/types.ts +5 -1
- inspect_ai/_view/www/src/client/api/api-browser.ts +2 -2
- inspect_ai/_view/www/src/components/LiveVirtualList.tsx +6 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +1 -1
- inspect_ai/_view/www/src/components/PopOver.tsx +422 -0
- inspect_ai/_view/www/src/components/PulsingDots.module.css +9 -9
- inspect_ai/_view/www/src/components/PulsingDots.tsx +4 -1
- inspect_ai/_view/www/src/components/StickyScroll.tsx +183 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +4 -0
- inspect_ai/_view/www/src/state/hooks.ts +52 -2
- inspect_ai/_view/www/src/state/logSlice.ts +4 -3
- inspect_ai/_view/www/src/state/samplePolling.ts +8 -0
- inspect_ai/_view/www/src/state/sampleSlice.ts +53 -9
- inspect_ai/_view/www/src/state/scrolling.ts +152 -0
- inspect_ai/_view/www/src/utils/attachments.ts +7 -0
- inspect_ai/_view/www/src/utils/python.ts +18 -0
- inspect_ai/_view/www/yarn.lock +269 -6
- inspect_ai/agent/_react.py +12 -7
- inspect_ai/agent/_run.py +46 -11
- inspect_ai/analysis/beta/_dataframe/samples/table.py +19 -18
- inspect_ai/log/_bundle.py +5 -3
- inspect_ai/log/_log.py +3 -3
- inspect_ai/log/_recorders/file.py +2 -9
- inspect_ai/log/_transcript.py +1 -1
- inspect_ai/model/_call_tools.py +6 -2
- inspect_ai/model/_openai.py +1 -1
- inspect_ai/model/_openai_responses.py +78 -39
- inspect_ai/model/_openai_web_search.py +31 -0
- inspect_ai/model/_providers/anthropic.py +3 -6
- inspect_ai/model/_providers/azureai.py +72 -3
- inspect_ai/model/_providers/openai.py +2 -1
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/scorer/_metric.py +1 -2
- inspect_ai/solver/_task_state.py +2 -2
- inspect_ai/tool/_tool.py +6 -2
- inspect_ai/tool/_tool_def.py +27 -4
- inspect_ai/tool/_tool_info.py +2 -0
- inspect_ai/tool/_tools/_web_search/_google.py +15 -4
- inspect_ai/tool/_tools/_web_search/_tavily.py +35 -12
- inspect_ai/tool/_tools/_web_search/_web_search.py +214 -45
- inspect_ai/util/__init__.py +6 -0
- inspect_ai/util/_json.py +3 -0
- inspect_ai/util/_limit.py +374 -141
- inspect_ai/util/_sandbox/docker/compose.py +20 -11
- inspect_ai/util/_span.py +1 -1
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/RECORD +131 -117
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/top_level.txt +0 -0
inspect_ai/util/_limit.py
CHANGED
@@ -5,7 +5,10 @@ import logging
|
|
5
5
|
from contextlib import ExitStack, contextmanager
|
6
6
|
from contextvars import ContextVar
|
7
7
|
from types import TracebackType
|
8
|
-
from typing import TYPE_CHECKING, Iterator, Literal
|
8
|
+
from typing import TYPE_CHECKING, Generic, Iterator, Literal, TypeVar
|
9
|
+
|
10
|
+
import anyio
|
11
|
+
from typing_extensions import Self
|
9
12
|
|
10
13
|
from inspect_ai._util.logger import warn_once
|
11
14
|
|
@@ -16,18 +19,7 @@ if TYPE_CHECKING:
|
|
16
19
|
|
17
20
|
|
18
21
|
logger = logging.getLogger(__name__)
|
19
|
-
|
20
|
-
# Stores the current execution context's leaf _TokenLimitNode.
|
21
|
-
# The resulting data structure is a tree of _TokenLimitNode nodes which each
|
22
|
-
# have a pointer to their parent node. Each additional context manager inserts a new
|
23
|
-
# child node into the tree. The fact that there can be multiple execution contexts is
|
24
|
-
# what makes this a tree rather than a stack.
|
25
|
-
token_limit_leaf_node: ContextVar[_TokenLimitNode | None] = ContextVar(
|
26
|
-
"token_limit_leaf_node", default=None
|
27
|
-
)
|
28
|
-
message_limit_leaf_node: ContextVar[_MessageLimitNode | None] = ContextVar(
|
29
|
-
"message_limit_leaf_node", default=None
|
30
|
-
)
|
22
|
+
TNode = TypeVar("TNode", bound="_Node")
|
31
23
|
|
32
24
|
|
33
25
|
class LimitExceededError(Exception):
|
@@ -42,20 +34,25 @@ class LimitExceededError(Exception):
|
|
42
34
|
value: Value compared to.
|
43
35
|
limit: Limit applied.
|
44
36
|
message (str | None): Optional. Human readable message.
|
37
|
+
source (Limit | None): Optional. The `Limit` instance which was responsible for raising this error.
|
45
38
|
"""
|
46
39
|
|
47
40
|
def __init__(
|
48
41
|
self,
|
49
42
|
type: Literal["message", "time", "working", "token", "operator", "custom"],
|
50
43
|
*,
|
51
|
-
value:
|
52
|
-
limit:
|
44
|
+
value: float,
|
45
|
+
limit: float,
|
53
46
|
message: str | None = None,
|
47
|
+
source: Limit | None = None,
|
54
48
|
) -> None:
|
55
49
|
self.type = type
|
56
50
|
self.value = value
|
51
|
+
self.value_str = self._format_float_or_int(value)
|
57
52
|
self.limit = limit
|
53
|
+
self.limit_str = self._format_float_or_int(limit)
|
58
54
|
self.message = f"Exceeded {type} limit: {limit:,}"
|
55
|
+
self.source = source
|
59
56
|
super().__init__(message)
|
60
57
|
|
61
58
|
def with_state(self, state: TaskState) -> LimitExceededError:
|
@@ -65,9 +62,18 @@ class LimitExceededError(Exception):
|
|
65
62
|
)
|
66
63
|
return self
|
67
64
|
|
65
|
+
def _format_float_or_int(self, value: float | int) -> str:
|
66
|
+
if isinstance(value, int):
|
67
|
+
return f"{value:,}"
|
68
|
+
else:
|
69
|
+
return f"{value:,.2f}"
|
70
|
+
|
68
71
|
|
69
72
|
class Limit(abc.ABC):
|
70
|
-
"""Base class for all
|
73
|
+
"""Base class for all limit context managers."""
|
74
|
+
|
75
|
+
def __init__(self) -> None:
|
76
|
+
self._entered = False
|
71
77
|
|
72
78
|
@abc.abstractmethod
|
73
79
|
def __enter__(self) -> Limit:
|
@@ -82,35 +88,81 @@ class Limit(abc.ABC):
|
|
82
88
|
) -> None:
|
83
89
|
pass
|
84
90
|
|
91
|
+
@property
|
92
|
+
@abc.abstractmethod
|
93
|
+
def usage(self) -> float:
|
94
|
+
"""The current usage of the resource being limited."""
|
95
|
+
pass
|
96
|
+
|
97
|
+
def _check_reuse(self) -> None:
|
98
|
+
if self._entered:
|
99
|
+
raise RuntimeError(
|
100
|
+
"Each Limit may only be used once in a single 'with' block. Please "
|
101
|
+
"create a new instance of the Limit."
|
102
|
+
)
|
103
|
+
self._entered = True
|
104
|
+
|
85
105
|
|
86
106
|
@contextmanager
|
87
|
-
def apply_limits(
|
107
|
+
def apply_limits(
|
108
|
+
limits: list[Limit], catch_errors: bool = False
|
109
|
+
) -> Iterator[LimitScope]:
|
88
110
|
"""
|
89
111
|
Apply a list of limits within a context manager.
|
90
112
|
|
113
|
+
Optionally catches any `LimitExceededError` raised by the applied limits, while
|
114
|
+
allowing other limit errors from any other scope (e.g. the Sample level) to
|
115
|
+
propagate.
|
116
|
+
|
117
|
+
Yields a `LimitScope` object which can be used once the context manager is closed
|
118
|
+
to determine which, if any, limits were exceeded.
|
119
|
+
|
91
120
|
Args:
|
92
121
|
limits: List of limits to apply while the context manager is open. Should a
|
93
|
-
limit be exceeded, a LimitExceededError is raised.
|
122
|
+
limit be exceeded, a `LimitExceededError` is raised.
|
123
|
+
catch_errors: If True, catch any `LimitExceededError` raised by the applied
|
124
|
+
limits. Callers can determine whether any limits were exceeded by checking the
|
125
|
+
limit_error property of the `LimitScope` object yielded by this function. If
|
126
|
+
False, all `LimitExceededError` exceptions will be allowed to propagate.
|
127
|
+
"""
|
128
|
+
limit_scope = LimitScope()
|
129
|
+
# Try scope is outside the `with ExitStack()` so that we can catch any errors raised
|
130
|
+
# when exiting it (which will be where time_limit() would raise LimitExceededError).
|
131
|
+
try:
|
132
|
+
with ExitStack() as stack:
|
133
|
+
for limit in limits:
|
134
|
+
stack.enter_context(limit)
|
135
|
+
yield limit_scope
|
136
|
+
except LimitExceededError as e:
|
137
|
+
# If it was not one of the limits we applied.
|
138
|
+
if e.source is None or e.source not in limits:
|
139
|
+
raise
|
140
|
+
limit_scope.limit_error = e
|
141
|
+
if not catch_errors:
|
142
|
+
raise
|
143
|
+
|
144
|
+
|
145
|
+
class LimitScope:
|
146
|
+
"""Object returned from `apply_limits()`.
|
147
|
+
|
148
|
+
Used to check which, if any, limits were exceeded.
|
94
149
|
"""
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
yield
|
150
|
+
|
151
|
+
def __init__(self) -> None:
|
152
|
+
self.limit_error: LimitExceededError | None = None
|
99
153
|
|
100
154
|
|
101
155
|
def token_limit(limit: int | None) -> _TokenLimit:
|
102
156
|
"""Limits the total number of tokens which can be used.
|
103
157
|
|
104
158
|
The counter starts when the context manager is opened and ends when it is closed.
|
105
|
-
The context manager can be opened multiple times, even in different execution
|
106
|
-
contexts.
|
107
159
|
|
108
160
|
These limits can be stacked.
|
109
161
|
|
110
|
-
This relies on "cooperative" checking - consumers must call check_token_limit()
|
162
|
+
This relies on "cooperative" checking - consumers must call `check_token_limit()`
|
111
163
|
themselves whenever tokens are consumed.
|
112
164
|
|
113
|
-
When a limit is exceeded, a LimitExceededError is raised.
|
165
|
+
When a limit is exceeded, a `LimitExceededError` is raised.
|
114
166
|
|
115
167
|
Args:
|
116
168
|
limit: The maximum number of tokens that can be used while the context manager is
|
@@ -125,7 +177,7 @@ def record_model_usage(usage: ModelUsage) -> None:
|
|
125
177
|
|
126
178
|
Does not check if the limit has been exceeded.
|
127
179
|
"""
|
128
|
-
node =
|
180
|
+
node = token_limit_tree.get()
|
129
181
|
if node is None:
|
130
182
|
return
|
131
183
|
node.record(usage)
|
@@ -138,7 +190,7 @@ def check_token_limit() -> None:
|
|
138
190
|
|
139
191
|
Note that all active token limits are checked, not just the most recent one.
|
140
192
|
"""
|
141
|
-
node =
|
193
|
+
node = token_limit_tree.get()
|
142
194
|
if node is None:
|
143
195
|
return
|
144
196
|
node.check()
|
@@ -148,15 +200,14 @@ def message_limit(limit: int | None) -> _MessageLimit:
|
|
148
200
|
"""Limits the number of messages in a conversation.
|
149
201
|
|
150
202
|
The total number of messages in the conversation are compared to the limit (not just
|
151
|
-
"new" messages).
|
152
|
-
execution contexts.
|
203
|
+
"new" messages).
|
153
204
|
|
154
205
|
These limits can be stacked.
|
155
206
|
|
156
207
|
This relies on "cooperative" checking - consumers must call check_message_limit()
|
157
208
|
themselves whenever the message count is updated.
|
158
209
|
|
159
|
-
When a limit is exceeded, a LimitExceededError is raised.
|
210
|
+
When a limit is exceeded, a `LimitExceededError` is raised.
|
160
211
|
|
161
212
|
Args:
|
162
213
|
limit: The maximum conversation length (number of messages) allowed while the
|
@@ -176,35 +227,135 @@ def check_message_limit(count: int, raise_for_equal: bool) -> None:
|
|
176
227
|
limit, otherwise, only raise an error if the message count is greater than the
|
177
228
|
limit.
|
178
229
|
"""
|
179
|
-
node =
|
230
|
+
node = message_limit_tree.get()
|
180
231
|
if node is None:
|
181
232
|
return
|
182
233
|
node.check(count, raise_for_equal)
|
183
234
|
|
184
235
|
|
185
|
-
|
186
|
-
"""
|
236
|
+
def time_limit(limit: float | None) -> _TimeLimit:
|
237
|
+
"""Limits the wall clock time which can elapse.
|
238
|
+
|
239
|
+
The timer starts when the context manager is opened and stops when it is closed.
|
240
|
+
|
241
|
+
These limits can be stacked.
|
242
|
+
|
243
|
+
When a limit is exceeded, the code block is cancelled and a `LimitExceededError` is
|
244
|
+
raised.
|
245
|
+
|
246
|
+
Uses anyio's cancellation scopes meaning that the operations within the context
|
247
|
+
manager block are cancelled if the limit is exceeded. The `LimitExceededError` is
|
248
|
+
therefore raised at the level that the `time_limit()` context manager was opened,
|
249
|
+
not at the level of the operation which caused the limit to be exceeded (e.g. a call
|
250
|
+
to `generate()`). Ensure you handle `LimitExceededError` at the level of opening the context manager.
|
251
|
+
|
252
|
+
Args:
|
253
|
+
limit: The maximum number of seconds that can pass while the context manager is
|
254
|
+
open. A value of None means unlimited time.
|
255
|
+
"""
|
256
|
+
return _TimeLimit(limit)
|
257
|
+
|
258
|
+
|
259
|
+
def working_limit(limit: float | None) -> _WorkingLimit:
|
260
|
+
"""Limits the working time which can elapse.
|
261
|
+
|
262
|
+
Working time is the wall clock time minus any waiting time e.g. waiting before
|
263
|
+
retrying in response to rate limits or waiting on a semaphore.
|
187
264
|
|
188
|
-
|
189
|
-
|
265
|
+
The timer starts when the context manager is opened and stops when it is closed.
|
266
|
+
|
267
|
+
These limits can be stacked.
|
268
|
+
|
269
|
+
When a limit is exceeded, a `LimitExceededError` is raised.
|
270
|
+
|
271
|
+
Args:
|
272
|
+
limit: The maximum number of seconds of working that can pass while the context
|
273
|
+
manager is open. A value of None means unlimited time.
|
190
274
|
"""
|
275
|
+
return _WorkingLimit(limit)
|
276
|
+
|
277
|
+
|
278
|
+
def record_waiting_time(waiting_time: float) -> None:
|
279
|
+
node = working_limit_tree.get()
|
280
|
+
if node is None:
|
281
|
+
return
|
282
|
+
node.record_waiting_time(waiting_time)
|
283
|
+
|
284
|
+
|
285
|
+
def check_working_limit() -> None:
|
286
|
+
node = working_limit_tree.get()
|
287
|
+
if node is None:
|
288
|
+
return
|
289
|
+
node.check()
|
191
290
|
|
192
|
-
def __init__(self, value: int | None) -> None:
|
193
|
-
self.value = value
|
194
291
|
|
292
|
+
class _Tree(Generic[TNode]):
|
293
|
+
"""A tree data structure of limit nodes.
|
195
294
|
|
196
|
-
|
295
|
+
Each node has a pointer to its parent, or None if it is a root node.
|
296
|
+
|
297
|
+
Each additional context manager inserts a new child node into the tree. The fact
|
298
|
+
that there can be multiple execution contexts is what makes this a tree rather than
|
299
|
+
a stack and why a context variable is used to store the leaf node.
|
300
|
+
"""
|
301
|
+
|
302
|
+
def __init__(self, id: str) -> None:
|
303
|
+
self._leaf_node: ContextVar[TNode | None] = ContextVar(id, default=None)
|
304
|
+
|
305
|
+
def get(self) -> TNode | None:
|
306
|
+
return self._leaf_node.get()
|
307
|
+
|
308
|
+
def push(self, new_node: TNode) -> None:
|
309
|
+
current_leaf = self._leaf_node.get()
|
310
|
+
new_node.parent = current_leaf
|
311
|
+
self._leaf_node.set(new_node)
|
312
|
+
|
313
|
+
def pop(self) -> TNode:
|
314
|
+
current_leaf = self._leaf_node.get()
|
315
|
+
if current_leaf is None:
|
316
|
+
raise RuntimeError("Limit tree is empty. Cannot pop from an empty tree.")
|
317
|
+
self._leaf_node.set(current_leaf.parent)
|
318
|
+
return current_leaf
|
319
|
+
|
320
|
+
|
321
|
+
token_limit_tree: _Tree[_TokenLimit] = _Tree("token_limit_tree")
|
322
|
+
# Store the message limit leaf node so that we know which limit to check in
|
323
|
+
# check_message_limit().
|
324
|
+
message_limit_tree: _Tree[_MessageLimit] = _Tree("message_limit_tree")
|
325
|
+
working_limit_tree: _Tree[_WorkingLimit] = _Tree("working_limit_tree")
|
326
|
+
|
327
|
+
|
328
|
+
class _Node:
|
329
|
+
"""Mixin for objects used as nodes in a limit tree.
|
330
|
+
|
331
|
+
This allows us to have an "internal" parent property which is not exported as part
|
332
|
+
of the public API.
|
333
|
+
"""
|
334
|
+
|
335
|
+
parent: Self | None
|
336
|
+
|
337
|
+
def _pop_and_check_identity(self, tree: _Tree[TNode]) -> None:
|
338
|
+
popped = tree.pop()
|
339
|
+
if popped is not self:
|
340
|
+
raise RuntimeError(
|
341
|
+
"The limit context manager being closed is not the leaf node in the "
|
342
|
+
"tree. Make sure to open and close the context managers in a "
|
343
|
+
"stack-like manner using a `with` statement."
|
344
|
+
)
|
345
|
+
|
346
|
+
|
347
|
+
class _TokenLimit(Limit, _Node):
|
197
348
|
def __init__(self, limit: int | None) -> None:
|
349
|
+
from inspect_ai.model._model_output import ModelUsage
|
350
|
+
|
351
|
+
super().__init__()
|
198
352
|
self._validate_token_limit(limit)
|
199
|
-
self.
|
353
|
+
self._limit = limit
|
354
|
+
self._usage = ModelUsage()
|
200
355
|
|
201
356
|
def __enter__(self) -> Limit:
|
202
|
-
|
203
|
-
|
204
|
-
# Note that we don't store new_node as an instance variable, because the context
|
205
|
-
# manager may be used across multiple execution contexts, or opened multiple
|
206
|
-
# times.
|
207
|
-
token_limit_leaf_node.set(new_node)
|
357
|
+
super()._check_reuse()
|
358
|
+
token_limit_tree.push(self)
|
208
359
|
return self
|
209
360
|
|
210
361
|
def __exit__(
|
@@ -213,103 +364,75 @@ class _TokenLimit(Limit):
|
|
213
364
|
exc_val: BaseException | None,
|
214
365
|
exc_tb: TracebackType | None,
|
215
366
|
) -> None:
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
367
|
+
self._pop_and_check_identity(token_limit_tree)
|
368
|
+
|
369
|
+
@property
|
370
|
+
def usage(self) -> float:
|
371
|
+
return self._usage.total_tokens
|
221
372
|
|
222
373
|
@property
|
223
374
|
def limit(self) -> int | None:
|
224
375
|
"""Get the configured token limit value."""
|
225
|
-
return self.
|
376
|
+
return self._limit
|
226
377
|
|
227
378
|
@limit.setter
|
228
379
|
def limit(self, value: int | None) -> None:
|
229
380
|
"""Update the token limit value.
|
230
381
|
|
231
|
-
This will affect the limit for all active token limit nodes derived from this
|
232
|
-
context manager.
|
233
|
-
|
234
382
|
This does not trigger a check of the token limit (which could now have been
|
235
383
|
exceeded).
|
236
384
|
"""
|
237
385
|
self._validate_token_limit(value)
|
238
|
-
self.
|
239
|
-
|
240
|
-
def _validate_token_limit(self, value: int | None) -> None:
|
241
|
-
if value is not None and value < 0:
|
242
|
-
raise ValueError("Token limit value must be a non-negative integer.")
|
243
|
-
|
244
|
-
|
245
|
-
class _TokenLimitNode:
|
246
|
-
def __init__(
|
247
|
-
self,
|
248
|
-
limit: _LimitValueWrapper,
|
249
|
-
parent: _TokenLimitNode | None,
|
250
|
-
) -> None:
|
251
|
-
"""
|
252
|
-
Initialize a token limit node.
|
253
|
-
|
254
|
-
Forms part of a tree structure. Each node has a pointer to its parent, or None
|
255
|
-
if it is the root node.
|
256
|
-
|
257
|
-
Tracks the token usage for this node and its parent nodes and checks if the
|
258
|
-
usage has exceeded a (variable) limit.
|
259
|
-
|
260
|
-
Args:
|
261
|
-
limit: The maximum number of tokens that can be used while the context
|
262
|
-
manager is open.
|
263
|
-
parent: The parent node in the tree.
|
264
|
-
"""
|
265
|
-
from inspect_ai.model._model_output import ModelUsage
|
266
|
-
|
267
|
-
self._limit = limit
|
268
|
-
self.parent = parent
|
269
|
-
self._usage = ModelUsage()
|
386
|
+
self._limit = value
|
270
387
|
|
271
388
|
def record(self, usage: ModelUsage) -> None:
|
272
|
-
"""Record model usage for this node and its
|
389
|
+
"""Record model usage for this node and its ancestor nodes."""
|
273
390
|
if self.parent is not None:
|
274
391
|
self.parent.record(usage)
|
275
392
|
self._usage += usage
|
276
393
|
|
277
394
|
def check(self) -> None:
|
278
|
-
"""Check if this token limit or any
|
279
|
-
|
395
|
+
"""Check if this token limit or any ancestor limits have been exceeded.
|
396
|
+
|
397
|
+
The checks occur from root to leaf. This is so that if multiple limits are
|
398
|
+
simultaneously exceeded, the outermost (closest to root) one raises the error,
|
399
|
+
preventing certain sub-agent architectures from ending up in an infinite loop.
|
400
|
+
"""
|
280
401
|
if self.parent is not None:
|
281
402
|
self.parent.check()
|
403
|
+
self._check_self()
|
404
|
+
|
405
|
+
def _validate_token_limit(self, value: int | None) -> None:
|
406
|
+
if value is not None and value < 0:
|
407
|
+
raise ValueError(
|
408
|
+
f"Token limit value must be a non-negative integer or None: {value}"
|
409
|
+
)
|
282
410
|
|
283
411
|
def _check_self(self) -> None:
|
284
412
|
from inspect_ai.log._transcript import SampleLimitEvent, transcript
|
285
413
|
|
286
|
-
if self.
|
414
|
+
if self.limit is None:
|
287
415
|
return
|
288
416
|
total = self._usage.total_tokens
|
289
|
-
if total > self.
|
290
|
-
message =
|
291
|
-
f"Token limit exceeded. value: {total:,}; limit: {self._limit.value:,}"
|
292
|
-
)
|
417
|
+
if total > self.limit:
|
418
|
+
message = f"Token limit exceeded. value: {total:,}; limit: {self.limit:,}"
|
293
419
|
transcript()._event(
|
294
|
-
SampleLimitEvent(type="token", limit=self.
|
420
|
+
SampleLimitEvent(type="token", limit=self.limit, message=message)
|
295
421
|
)
|
296
422
|
raise LimitExceededError(
|
297
|
-
"token", value=total, limit=self.
|
423
|
+
"token", value=total, limit=self.limit, message=message, source=self
|
298
424
|
)
|
299
425
|
|
300
426
|
|
301
|
-
class _MessageLimit(Limit):
|
427
|
+
class _MessageLimit(Limit, _Node):
|
302
428
|
def __init__(self, limit: int | None) -> None:
|
429
|
+
super().__init__()
|
303
430
|
self._validate_message_limit(limit)
|
304
|
-
self.
|
431
|
+
self._limit = limit
|
305
432
|
|
306
433
|
def __enter__(self) -> Limit:
|
307
|
-
|
308
|
-
|
309
|
-
# Note that we don't store new_node as an instance variable, because the context
|
310
|
-
# manager may be used across multiple execution contexts, or opened multiple
|
311
|
-
# times.
|
312
|
-
message_limit_leaf_node.set(new_node)
|
434
|
+
super()._check_reuse()
|
435
|
+
message_limit_tree.push(self)
|
313
436
|
return self
|
314
437
|
|
315
438
|
def __exit__(
|
@@ -318,16 +441,19 @@ class _MessageLimit(Limit):
|
|
318
441
|
exc_val: BaseException | None,
|
319
442
|
exc_tb: TracebackType | None,
|
320
443
|
) -> None:
|
321
|
-
|
322
|
-
|
323
|
-
|
444
|
+
self._pop_and_check_identity(message_limit_tree)
|
445
|
+
|
446
|
+
@property
|
447
|
+
def usage(self) -> float:
|
448
|
+
raise NotImplementedError(
|
449
|
+
"Retrieving the message count from a limit is not supported. Please query "
|
450
|
+
"the messages property on the task or agent state instead."
|
324
451
|
)
|
325
|
-
message_limit_leaf_node.set(current_node.parent)
|
326
452
|
|
327
453
|
@property
|
328
454
|
def limit(self) -> int | None:
|
329
455
|
"""Get the configured message limit value."""
|
330
|
-
return self.
|
456
|
+
return self._limit
|
331
457
|
|
332
458
|
@limit.setter
|
333
459
|
def limit(self, value: int | None) -> None:
|
@@ -340,54 +466,161 @@ class _MessageLimit(Limit):
|
|
340
466
|
exceeded).
|
341
467
|
"""
|
342
468
|
self._validate_message_limit(value)
|
343
|
-
self.
|
469
|
+
self._limit = value
|
470
|
+
|
471
|
+
def check(self, count: int, raise_for_equal: bool) -> None:
|
472
|
+
"""Check if this message limit has been exceeded.
|
473
|
+
|
474
|
+
Does not check ancestors.
|
475
|
+
"""
|
476
|
+
from inspect_ai.log._transcript import SampleLimitEvent, transcript
|
477
|
+
|
478
|
+
if self.limit is None:
|
479
|
+
return
|
480
|
+
if count > self.limit or (raise_for_equal and count == self.limit):
|
481
|
+
reached_or_exceeded = "reached" if count == self.limit else "exceeded"
|
482
|
+
message = (
|
483
|
+
f"Message limit {reached_or_exceeded}. count: {count:,}; "
|
484
|
+
f"limit: {self.limit:,}"
|
485
|
+
)
|
486
|
+
transcript()._event(
|
487
|
+
SampleLimitEvent(type="message", limit=self.limit, message=message)
|
488
|
+
)
|
489
|
+
raise LimitExceededError(
|
490
|
+
"message", value=count, limit=self.limit, message=message, source=self
|
491
|
+
)
|
344
492
|
|
345
493
|
def _validate_message_limit(self, value: int | None) -> None:
|
346
494
|
if value is not None and value < 0:
|
347
|
-
raise ValueError(
|
495
|
+
raise ValueError(
|
496
|
+
f"Message limit value must be a non-negative integer or None: {value}"
|
497
|
+
)
|
348
498
|
|
349
499
|
|
350
|
-
class
|
351
|
-
def __init__(
|
500
|
+
class _TimeLimit(Limit):
|
501
|
+
def __init__(self, limit: float | None) -> None:
|
502
|
+
super().__init__()
|
503
|
+
_validate_time_limit("Time", limit)
|
504
|
+
self._limit = limit
|
505
|
+
self._start_time: float | None = None
|
506
|
+
self._end_time: float | None = None
|
507
|
+
|
508
|
+
def __enter__(self) -> Limit:
|
509
|
+
super()._check_reuse()
|
510
|
+
# Unlike the other limits, this one is not stored in a tree. Anyio handles all
|
511
|
+
# of the state.
|
512
|
+
self._cancel_scope = anyio.move_on_after(self._limit)
|
513
|
+
self._cancel_scope.__enter__()
|
514
|
+
self._start_time = anyio.current_time()
|
515
|
+
return self
|
516
|
+
|
517
|
+
def __exit__(
|
352
518
|
self,
|
353
|
-
|
354
|
-
|
519
|
+
exc_type: type[BaseException] | None,
|
520
|
+
exc_val: BaseException | None,
|
521
|
+
exc_tb: TracebackType | None,
|
355
522
|
) -> None:
|
356
|
-
|
357
|
-
Initialize a message limit node.
|
358
|
-
|
359
|
-
Forms part of a tree structure. Each node has a pointer to its parent, or None
|
360
|
-
if it is the root node.
|
523
|
+
from inspect_ai.log._transcript import SampleLimitEvent, transcript
|
361
524
|
|
362
|
-
|
525
|
+
self._cancel_scope.__exit__(exc_type, exc_val, exc_tb)
|
526
|
+
self._end_time = anyio.current_time()
|
527
|
+
if self._cancel_scope.cancel_called and self._limit is not None:
|
528
|
+
message = f"Time limit exceeded. limit: {self._limit} seconds"
|
529
|
+
assert self._start_time is not None
|
530
|
+
# Note we've measured the elapsed time independently of anyio's cancel scope
|
531
|
+
# so this is an approximation.
|
532
|
+
time_elapsed = self._end_time - self._start_time
|
533
|
+
transcript()._event(
|
534
|
+
SampleLimitEvent(type="time", message=message, limit=self._limit)
|
535
|
+
)
|
536
|
+
raise LimitExceededError(
|
537
|
+
"time",
|
538
|
+
value=time_elapsed,
|
539
|
+
limit=self._limit,
|
540
|
+
message=message,
|
541
|
+
source=self,
|
542
|
+
) from exc_val
|
363
543
|
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
544
|
+
@property
|
545
|
+
def usage(self) -> float:
|
546
|
+
if self._start_time is None:
|
547
|
+
return 0.0
|
548
|
+
if self._end_time is None:
|
549
|
+
return anyio.current_time() - self._start_time
|
550
|
+
return self._end_time - self._start_time
|
551
|
+
|
552
|
+
|
553
|
+
class _WorkingLimit(Limit, _Node):
|
554
|
+
def __init__(self, limit: float | None) -> None:
|
555
|
+
super().__init__()
|
556
|
+
_validate_time_limit("Working time", limit)
|
369
557
|
self._limit = limit
|
370
|
-
self.parent =
|
558
|
+
self.parent: _WorkingLimit | None = None
|
559
|
+
self._start_time: float | None = None
|
560
|
+
self._end_time: float | None = None
|
371
561
|
|
372
|
-
def
|
373
|
-
|
562
|
+
def __enter__(self) -> Limit:
|
563
|
+
super()._check_reuse()
|
564
|
+
self._start_time = anyio.current_time()
|
565
|
+
self._waiting_time = 0.0
|
566
|
+
working_limit_tree.push(self)
|
567
|
+
return self
|
374
568
|
|
375
|
-
|
569
|
+
def __exit__(
|
570
|
+
self,
|
571
|
+
exc_type: type[BaseException] | None,
|
572
|
+
exc_val: BaseException | None,
|
573
|
+
exc_tb: TracebackType | None,
|
574
|
+
) -> None:
|
575
|
+
self._end_time = anyio.current_time()
|
576
|
+
self._pop_and_check_identity(working_limit_tree)
|
577
|
+
|
578
|
+
@property
|
579
|
+
def usage(self) -> float:
|
580
|
+
if self._start_time is None:
|
581
|
+
return 0.0
|
582
|
+
if self._end_time is None:
|
583
|
+
return anyio.current_time() - self._start_time - self._waiting_time
|
584
|
+
return self._end_time - self._start_time - self._waiting_time
|
585
|
+
|
586
|
+
def record_waiting_time(self, waiting_time: float) -> None:
|
587
|
+
"""Record waiting time for this node and its ancestor nodes."""
|
588
|
+
if self.parent is not None:
|
589
|
+
self.parent.record_waiting_time(waiting_time)
|
590
|
+
self._waiting_time += waiting_time
|
591
|
+
|
592
|
+
def check(self) -> None:
|
593
|
+
"""Check if this working time limit or any ancestor limits have been exceeded.
|
594
|
+
|
595
|
+
The checks occur from root to leaf. This is so that if multiple limits are
|
596
|
+
simultaneously exceeded, the outermost (closest to root) one raises the error,
|
597
|
+
preventing certain sub-agent architectures from ending up in an infinite loop.
|
376
598
|
"""
|
599
|
+
if self.parent is not None:
|
600
|
+
self.parent.check()
|
601
|
+
self._check_self()
|
602
|
+
|
603
|
+
def _check_self(self) -> None:
|
377
604
|
from inspect_ai.log._transcript import SampleLimitEvent, transcript
|
378
605
|
|
379
|
-
if self._limit
|
606
|
+
if self._limit is None:
|
380
607
|
return
|
381
|
-
|
382
|
-
|
383
|
-
reached_or_exceeded = "reached" if count == limit else "exceeded"
|
384
|
-
message = (
|
385
|
-
f"Message limit {reached_or_exceeded}. count: {count:,}; "
|
386
|
-
f"limit: {limit:,}"
|
387
|
-
)
|
608
|
+
if self.usage > self._limit:
|
609
|
+
message = f"Working time limit exceeded. limit: {self._limit} seconds"
|
388
610
|
transcript()._event(
|
389
|
-
SampleLimitEvent(type="
|
611
|
+
SampleLimitEvent(type="working", message=message, limit=self._limit)
|
390
612
|
)
|
391
613
|
raise LimitExceededError(
|
392
|
-
"
|
614
|
+
"working",
|
615
|
+
value=self.usage,
|
616
|
+
limit=self._limit,
|
617
|
+
message=message,
|
618
|
+
source=self,
|
393
619
|
)
|
620
|
+
|
621
|
+
|
622
|
+
def _validate_time_limit(name: str, value: float | None) -> None:
|
623
|
+
if value is not None and value < 0:
|
624
|
+
raise ValueError(
|
625
|
+
f"{name} limit value must be a non-negative float or None: {value}"
|
626
|
+
)
|