inspect-ai 0.3.97__py3-none-any.whl → 0.3.99__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +2 -0
- inspect_ai/_cli/log.py +1 -1
- inspect_ai/_display/textual/widgets/transcript.py +15 -3
- inspect_ai/_eval/run.py +18 -5
- inspect_ai/_eval/task/log.py +1 -1
- inspect_ai/_eval/task/task.py +1 -1
- inspect_ai/_util/_async.py +1 -1
- inspect_ai/_view/schema.py +1 -0
- inspect_ai/_view/view.py +14 -0
- inspect_ai/_view/www/dist/assets/index.css +10 -10
- inspect_ai/_view/www/dist/assets/index.js +10 -10
- inspect_ai/_view/www/log-schema.json +45 -5
- inspect_ai/_view/www/src/@types/log.d.ts +11 -2
- inspect_ai/_view/www/src/app/content/RenderedContent.tsx +2 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +2 -2
- inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.module.css +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +1 -1
- inspect_ai/agent/_run.py +44 -8
- inspect_ai/dataset/_dataset.py +0 -1
- inspect_ai/log/_bundle.py +5 -3
- inspect_ai/log/_log.py +2 -2
- inspect_ai/model/_providers/anthropic.py +3 -6
- inspect_ai/model/_providers/google.py +6 -0
- inspect_ai/model/_providers/providers.py +1 -1
- inspect_ai/util/__init__.py +2 -0
- inspect_ai/util/_limit.py +160 -137
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.99.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.99.dist-info}/RECORD +32 -32
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.99.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.99.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.99.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.97.dist-info → inspect_ai-0.3.99.dist-info}/top_level.txt +0 -0
inspect_ai/log/_log.py
CHANGED
@@ -158,7 +158,7 @@ class EvalConfig(BaseModel):
|
|
158
158
|
|
159
159
|
|
160
160
|
class EvalSampleLimit(BaseModel):
|
161
|
-
"""Limit
|
161
|
+
"""Limit encountered by sample."""
|
162
162
|
|
163
163
|
type: Literal[
|
164
164
|
"context", "time", "working", "message", "token", "operator", "custom"
|
@@ -694,7 +694,7 @@ class EvalSpec(BaseModel):
|
|
694
694
|
task_id: str = Field(default_factory=str)
|
695
695
|
"""Unique task id."""
|
696
696
|
|
697
|
-
task_version: int = Field(default=0)
|
697
|
+
task_version: int | str = Field(default=0)
|
698
698
|
"""Task version."""
|
699
699
|
|
700
700
|
task_file: str | None = Field(default=None)
|
@@ -356,12 +356,9 @@ class AnthropicAPI(ModelAPI):
|
|
356
356
|
if isinstance(ex, APIStatusError):
|
357
357
|
# for unknown reasons, anthropic does not always set status_code == 529
|
358
358
|
# for "overloaded_error" so we check for it explicitly
|
359
|
-
if (
|
360
|
-
|
361
|
-
|
362
|
-
and ex.body.get("error", {}).get("type", "") == "overloaded_error"
|
363
|
-
):
|
364
|
-
return True
|
359
|
+
if isinstance(ex.body, dict):
|
360
|
+
if "overloaded_error" in str(ex.body):
|
361
|
+
return True
|
365
362
|
|
366
363
|
# standard http status code checking
|
367
364
|
return is_retryable_http_status(ex.status_code)
|
@@ -350,6 +350,12 @@ class GoogleGenAIAPI(ModelAPI):
|
|
350
350
|
self.is_gemini() and not self.is_gemini_1_5() and not self.is_gemini_2_0()
|
351
351
|
)
|
352
352
|
if has_thinking_config:
|
353
|
+
if config.reasoning_tokens == 0:
|
354
|
+
# When reasoning_tokens is set to zero, we disable reasoning and return None.
|
355
|
+
# We cannot return a ThinkingConfig with reasoning_tokens set to 0,
|
356
|
+
# as this will cause the Gemini API to return a 400 INVALID_ARGUMENT error.
|
357
|
+
return None
|
358
|
+
|
353
359
|
return ThinkingConfig(
|
354
360
|
include_thoughts=True, thinking_budget=config.reasoning_tokens
|
355
361
|
)
|
inspect_ai/util/__init__.py
CHANGED
@@ -3,6 +3,7 @@ from inspect_ai._util.trace import trace_action, trace_message
|
|
3
3
|
from inspect_ai.util._limit import (
|
4
4
|
Limit,
|
5
5
|
LimitExceededError,
|
6
|
+
LimitScope,
|
6
7
|
apply_limits,
|
7
8
|
message_limit,
|
8
9
|
token_limit,
|
@@ -58,6 +59,7 @@ __all__ = [
|
|
58
59
|
"resource",
|
59
60
|
"subprocess",
|
60
61
|
"LimitExceededError",
|
62
|
+
"LimitScope",
|
61
63
|
"SandboxEnvironment",
|
62
64
|
"SandboxEnvironmentConfigType",
|
63
65
|
"SandboxEnvironmentLimits",
|
inspect_ai/util/_limit.py
CHANGED
@@ -5,7 +5,9 @@ import logging
|
|
5
5
|
from contextlib import ExitStack, contextmanager
|
6
6
|
from contextvars import ContextVar
|
7
7
|
from types import TracebackType
|
8
|
-
from typing import TYPE_CHECKING, Iterator, Literal
|
8
|
+
from typing import TYPE_CHECKING, Generic, Iterator, Literal, TypeVar
|
9
|
+
|
10
|
+
from typing_extensions import Self
|
9
11
|
|
10
12
|
from inspect_ai._util.logger import warn_once
|
11
13
|
|
@@ -16,18 +18,7 @@ if TYPE_CHECKING:
|
|
16
18
|
|
17
19
|
|
18
20
|
logger = logging.getLogger(__name__)
|
19
|
-
|
20
|
-
# Stores the current execution context's leaf _TokenLimitNode.
|
21
|
-
# The resulting data structure is a tree of _TokenLimitNode nodes which each
|
22
|
-
# have a pointer to their parent node. Each additional context manager inserts a new
|
23
|
-
# child node into the tree. The fact that there can be multiple execution contexts is
|
24
|
-
# what makes this a tree rather than a stack.
|
25
|
-
token_limit_leaf_node: ContextVar[_TokenLimitNode | None] = ContextVar(
|
26
|
-
"token_limit_leaf_node", default=None
|
27
|
-
)
|
28
|
-
message_limit_leaf_node: ContextVar[_MessageLimitNode | None] = ContextVar(
|
29
|
-
"message_limit_leaf_node", default=None
|
30
|
-
)
|
21
|
+
TNode = TypeVar("TNode", bound="_Node")
|
31
22
|
|
32
23
|
|
33
24
|
class LimitExceededError(Exception):
|
@@ -42,6 +33,8 @@ class LimitExceededError(Exception):
|
|
42
33
|
value: Value compared to.
|
43
34
|
limit: Limit applied.
|
44
35
|
message (str | None): Optional. Human readable message.
|
36
|
+
source (Limit | None): Optional. The `Limit` instance which was responsible for
|
37
|
+
raising this error.
|
45
38
|
"""
|
46
39
|
|
47
40
|
def __init__(
|
@@ -51,11 +44,13 @@ class LimitExceededError(Exception):
|
|
51
44
|
value: int,
|
52
45
|
limit: int,
|
53
46
|
message: str | None = None,
|
47
|
+
source: Limit | None = None,
|
54
48
|
) -> None:
|
55
49
|
self.type = type
|
56
50
|
self.value = value
|
57
51
|
self.limit = limit
|
58
52
|
self.message = f"Exceeded {type} limit: {limit:,}"
|
53
|
+
self.source = source
|
59
54
|
super().__init__(message)
|
60
55
|
|
61
56
|
def with_state(self, state: TaskState) -> LimitExceededError:
|
@@ -67,7 +62,10 @@ class LimitExceededError(Exception):
|
|
67
62
|
|
68
63
|
|
69
64
|
class Limit(abc.ABC):
|
70
|
-
"""Base class for all
|
65
|
+
"""Base class for all limit context managers."""
|
66
|
+
|
67
|
+
def __init__(self) -> None:
|
68
|
+
self._entered = False
|
71
69
|
|
72
70
|
@abc.abstractmethod
|
73
71
|
def __enter__(self) -> Limit:
|
@@ -82,20 +80,60 @@ class Limit(abc.ABC):
|
|
82
80
|
) -> None:
|
83
81
|
pass
|
84
82
|
|
83
|
+
def _check_reuse(self) -> None:
|
84
|
+
if self._entered:
|
85
|
+
raise RuntimeError(
|
86
|
+
"Each Limit may only be used once in a single 'with' block. Please "
|
87
|
+
"create a new instance of the Limit."
|
88
|
+
)
|
89
|
+
self._entered = True
|
90
|
+
|
85
91
|
|
86
92
|
@contextmanager
|
87
|
-
def apply_limits(
|
93
|
+
def apply_limits(
|
94
|
+
limits: list[Limit], catch_errors: bool = False
|
95
|
+
) -> Iterator[LimitScope]:
|
88
96
|
"""
|
89
97
|
Apply a list of limits within a context manager.
|
90
98
|
|
99
|
+
Optionally catches any `LimitExceededError` raised by the applied limits, while
|
100
|
+
allowing other limit errors from any other scope (e.g. the Sample level) to
|
101
|
+
propagate.
|
102
|
+
|
103
|
+
Yields a `LimitScope` object which can be used once the context manager is closed
|
104
|
+
to determine which, if any, limits were exceeded.
|
105
|
+
|
91
106
|
Args:
|
92
107
|
limits: List of limits to apply while the context manager is open. Should a
|
93
|
-
limit be exceeded, a LimitExceededError is raised.
|
108
|
+
limit be exceeded, a `LimitExceededError` is raised.
|
109
|
+
catch_errors: If True, catch any `LimitExceededError` raised by the applied
|
110
|
+
limits. Callers can determine whether any limits were exceeded by checking the
|
111
|
+
limit_error property of the `LimitScope` object yielded by this function. If
|
112
|
+
False, all `LimitExceededError` exceptions will be allowed to propagate.
|
94
113
|
"""
|
114
|
+
limit_scope = LimitScope()
|
95
115
|
with ExitStack() as stack:
|
96
116
|
for limit in limits:
|
97
117
|
stack.enter_context(limit)
|
98
|
-
|
118
|
+
try:
|
119
|
+
yield limit_scope
|
120
|
+
except LimitExceededError as e:
|
121
|
+
# If it was not one of the limits we applied.
|
122
|
+
if e.source is None or e.source not in limits:
|
123
|
+
raise
|
124
|
+
limit_scope.limit_error = e
|
125
|
+
if not catch_errors:
|
126
|
+
raise
|
127
|
+
|
128
|
+
|
129
|
+
class LimitScope:
|
130
|
+
"""Object returned from `apply_limits()`.
|
131
|
+
|
132
|
+
Used to check which, if any, limits were exceeded.
|
133
|
+
"""
|
134
|
+
|
135
|
+
def __init__(self) -> None:
|
136
|
+
self.limit_error: LimitExceededError | None = None
|
99
137
|
|
100
138
|
|
101
139
|
def token_limit(limit: int | None) -> _TokenLimit:
|
@@ -107,10 +145,10 @@ def token_limit(limit: int | None) -> _TokenLimit:
|
|
107
145
|
|
108
146
|
These limits can be stacked.
|
109
147
|
|
110
|
-
This relies on "cooperative" checking - consumers must call check_token_limit()
|
148
|
+
This relies on "cooperative" checking - consumers must call `check_token_limit()`
|
111
149
|
themselves whenever tokens are consumed.
|
112
150
|
|
113
|
-
When a limit is exceeded, a LimitExceededError is raised.
|
151
|
+
When a limit is exceeded, a `LimitExceededError` is raised.
|
114
152
|
|
115
153
|
Args:
|
116
154
|
limit: The maximum number of tokens that can be used while the context manager is
|
@@ -125,7 +163,7 @@ def record_model_usage(usage: ModelUsage) -> None:
|
|
125
163
|
|
126
164
|
Does not check if the limit has been exceeded.
|
127
165
|
"""
|
128
|
-
node =
|
166
|
+
node = token_limit_tree.get()
|
129
167
|
if node is None:
|
130
168
|
return
|
131
169
|
node.record(usage)
|
@@ -138,7 +176,7 @@ def check_token_limit() -> None:
|
|
138
176
|
|
139
177
|
Note that all active token limits are checked, not just the most recent one.
|
140
178
|
"""
|
141
|
-
node =
|
179
|
+
node = token_limit_tree.get()
|
142
180
|
if node is None:
|
143
181
|
return
|
144
182
|
node.check()
|
@@ -156,7 +194,7 @@ def message_limit(limit: int | None) -> _MessageLimit:
|
|
156
194
|
This relies on "cooperative" checking - consumers must call check_message_limit()
|
157
195
|
themselves whenever the message count is updated.
|
158
196
|
|
159
|
-
When a limit is exceeded, a LimitExceededError is raised.
|
197
|
+
When a limit is exceeded, a `LimitExceededError` is raised.
|
160
198
|
|
161
199
|
Args:
|
162
200
|
limit: The maximum conversation length (number of messages) allowed while the
|
@@ -176,35 +214,78 @@ def check_message_limit(count: int, raise_for_equal: bool) -> None:
|
|
176
214
|
limit, otherwise, only raise an error if the message count is greater than the
|
177
215
|
limit.
|
178
216
|
"""
|
179
|
-
node =
|
217
|
+
node = message_limit_tree.get()
|
180
218
|
if node is None:
|
181
219
|
return
|
182
220
|
node.check(count, raise_for_equal)
|
183
221
|
|
184
222
|
|
185
|
-
class
|
186
|
-
"""
|
223
|
+
class _Tree(Generic[TNode]):
|
224
|
+
"""A tree data structure of limit nodes.
|
225
|
+
|
226
|
+
Each node has a pointer to its parent, or None if it is a root node.
|
187
227
|
|
188
|
-
|
189
|
-
|
228
|
+
Each additional context manager inserts a new child node into the tree. The fact
|
229
|
+
that there can be multiple execution contexts is what makes this a tree rather than
|
230
|
+
a stack and why a context variable is used to store the leaf node.
|
190
231
|
"""
|
191
232
|
|
192
|
-
def __init__(self,
|
193
|
-
self.
|
233
|
+
def __init__(self, id: str) -> None:
|
234
|
+
self._leaf_node: ContextVar[TNode | None] = ContextVar(id, default=None)
|
235
|
+
|
236
|
+
def get(self) -> TNode | None:
|
237
|
+
return self._leaf_node.get()
|
238
|
+
|
239
|
+
def push(self, new_node: TNode) -> None:
|
240
|
+
current_leaf = self._leaf_node.get()
|
241
|
+
new_node.parent = current_leaf
|
242
|
+
self._leaf_node.set(new_node)
|
243
|
+
|
244
|
+
def pop(self) -> TNode:
|
245
|
+
current_leaf = self._leaf_node.get()
|
246
|
+
if current_leaf is None:
|
247
|
+
raise RuntimeError("Limit tree is empty. Cannot pop from an empty tree.")
|
248
|
+
self._leaf_node.set(current_leaf.parent)
|
249
|
+
return current_leaf
|
250
|
+
|
251
|
+
|
252
|
+
token_limit_tree: _Tree[_TokenLimit] = _Tree("token_limit_tree")
|
253
|
+
# Store the message limit leaf node so that we know which limit to check in
|
254
|
+
# check_message_limit().
|
255
|
+
message_limit_tree: _Tree[_MessageLimit] = _Tree("message_limit_tree")
|
256
|
+
|
194
257
|
|
258
|
+
class _Node:
|
259
|
+
"""Mixin for objects used as nodes in a limit tree.
|
195
260
|
|
196
|
-
|
261
|
+
This allows us to have an "internal" parent property which is not exported as part
|
262
|
+
of the public API.
|
263
|
+
"""
|
264
|
+
|
265
|
+
parent: Self | None
|
266
|
+
|
267
|
+
def _pop_and_check_identity(self, tree: _Tree[TNode]) -> None:
|
268
|
+
popped = tree.pop()
|
269
|
+
if popped is not self:
|
270
|
+
raise RuntimeError(
|
271
|
+
"The limit context manager being closed is not the leaf node in the "
|
272
|
+
"tree. Make sure to open and close the context managers in a "
|
273
|
+
"stack-like manner using a `with` statement."
|
274
|
+
)
|
275
|
+
|
276
|
+
|
277
|
+
class _TokenLimit(Limit, _Node):
|
197
278
|
def __init__(self, limit: int | None) -> None:
|
279
|
+
from inspect_ai.model._model_output import ModelUsage
|
280
|
+
|
281
|
+
super().__init__()
|
198
282
|
self._validate_token_limit(limit)
|
199
|
-
self.
|
283
|
+
self._limit = limit
|
284
|
+
self._usage = ModelUsage()
|
200
285
|
|
201
286
|
def __enter__(self) -> Limit:
|
202
|
-
|
203
|
-
|
204
|
-
# Note that we don't store new_node as an instance variable, because the context
|
205
|
-
# manager may be used across multiple execution contexts, or opened multiple
|
206
|
-
# times.
|
207
|
-
token_limit_leaf_node.set(new_node)
|
287
|
+
super()._check_reuse()
|
288
|
+
token_limit_tree.push(self)
|
208
289
|
return self
|
209
290
|
|
210
291
|
def __exit__(
|
@@ -213,60 +294,22 @@ class _TokenLimit(Limit):
|
|
213
294
|
exc_val: BaseException | None,
|
214
295
|
exc_tb: TracebackType | None,
|
215
296
|
) -> None:
|
216
|
-
|
217
|
-
assert current_node is not None, (
|
218
|
-
"Token limit node should not be None when exiting context manager."
|
219
|
-
)
|
220
|
-
token_limit_leaf_node.set(current_node.parent)
|
297
|
+
self._pop_and_check_identity(token_limit_tree)
|
221
298
|
|
222
299
|
@property
|
223
300
|
def limit(self) -> int | None:
|
224
301
|
"""Get the configured token limit value."""
|
225
|
-
return self.
|
302
|
+
return self._limit
|
226
303
|
|
227
304
|
@limit.setter
|
228
305
|
def limit(self, value: int | None) -> None:
|
229
306
|
"""Update the token limit value.
|
230
307
|
|
231
|
-
This will affect the limit for all active token limit nodes derived from this
|
232
|
-
context manager.
|
233
|
-
|
234
308
|
This does not trigger a check of the token limit (which could now have been
|
235
309
|
exceeded).
|
236
310
|
"""
|
237
311
|
self._validate_token_limit(value)
|
238
|
-
self.
|
239
|
-
|
240
|
-
def _validate_token_limit(self, value: int | None) -> None:
|
241
|
-
if value is not None and value < 0:
|
242
|
-
raise ValueError("Token limit value must be a non-negative integer.")
|
243
|
-
|
244
|
-
|
245
|
-
class _TokenLimitNode:
|
246
|
-
def __init__(
|
247
|
-
self,
|
248
|
-
limit: _LimitValueWrapper,
|
249
|
-
parent: _TokenLimitNode | None,
|
250
|
-
) -> None:
|
251
|
-
"""
|
252
|
-
Initialize a token limit node.
|
253
|
-
|
254
|
-
Forms part of a tree structure. Each node has a pointer to its parent, or None
|
255
|
-
if it is the root node.
|
256
|
-
|
257
|
-
Tracks the token usage for this node and its parent nodes and checks if the
|
258
|
-
usage has exceeded a (variable) limit.
|
259
|
-
|
260
|
-
Args:
|
261
|
-
limit: The maximum number of tokens that can be used while the context
|
262
|
-
manager is open.
|
263
|
-
parent: The parent node in the tree.
|
264
|
-
"""
|
265
|
-
from inspect_ai.model._model_output import ModelUsage
|
266
|
-
|
267
|
-
self._limit = limit
|
268
|
-
self.parent = parent
|
269
|
-
self._usage = ModelUsage()
|
312
|
+
self._limit = value
|
270
313
|
|
271
314
|
def record(self, usage: ModelUsage) -> None:
|
272
315
|
"""Record model usage for this node and its parent nodes."""
|
@@ -275,41 +318,47 @@ class _TokenLimitNode:
|
|
275
318
|
self._usage += usage
|
276
319
|
|
277
320
|
def check(self) -> None:
|
278
|
-
"""Check if this token limit or any
|
279
|
-
|
321
|
+
"""Check if this token limit or any ancestor limits have been exceeded.
|
322
|
+
|
323
|
+
The checks occur from root to leaf. This is so that if multiple limits are
|
324
|
+
simultaneously exceeded, the outermost (closest to root) one raises the error,
|
325
|
+
preventing certain sub-agent architectures from ending up in an infinite loop.
|
326
|
+
"""
|
280
327
|
if self.parent is not None:
|
281
328
|
self.parent.check()
|
329
|
+
self._check_self()
|
330
|
+
|
331
|
+
def _validate_token_limit(self, value: int | None) -> None:
|
332
|
+
if value is not None and value < 0:
|
333
|
+
raise ValueError(
|
334
|
+
f"Token limit value must be a non-negative integer or None: {value}"
|
335
|
+
)
|
282
336
|
|
283
337
|
def _check_self(self) -> None:
|
284
338
|
from inspect_ai.log._transcript import SampleLimitEvent, transcript
|
285
339
|
|
286
|
-
if self.
|
340
|
+
if self.limit is None:
|
287
341
|
return
|
288
342
|
total = self._usage.total_tokens
|
289
|
-
if total > self.
|
290
|
-
message =
|
291
|
-
f"Token limit exceeded. value: {total:,}; limit: {self._limit.value:,}"
|
292
|
-
)
|
343
|
+
if total > self.limit:
|
344
|
+
message = f"Token limit exceeded. value: {total:,}; limit: {self.limit:,}"
|
293
345
|
transcript()._event(
|
294
|
-
SampleLimitEvent(type="token", limit=self.
|
346
|
+
SampleLimitEvent(type="token", limit=self.limit, message=message)
|
295
347
|
)
|
296
348
|
raise LimitExceededError(
|
297
|
-
"token", value=total, limit=self.
|
349
|
+
"token", value=total, limit=self.limit, message=message, source=self
|
298
350
|
)
|
299
351
|
|
300
352
|
|
301
|
-
class _MessageLimit(Limit):
|
353
|
+
class _MessageLimit(Limit, _Node):
|
302
354
|
def __init__(self, limit: int | None) -> None:
|
355
|
+
super().__init__()
|
303
356
|
self._validate_message_limit(limit)
|
304
|
-
self.
|
357
|
+
self._limit = limit
|
305
358
|
|
306
359
|
def __enter__(self) -> Limit:
|
307
|
-
|
308
|
-
|
309
|
-
# Note that we don't store new_node as an instance variable, because the context
|
310
|
-
# manager may be used across multiple execution contexts, or opened multiple
|
311
|
-
# times.
|
312
|
-
message_limit_leaf_node.set(new_node)
|
360
|
+
super()._check_reuse()
|
361
|
+
message_limit_tree.push(self)
|
313
362
|
return self
|
314
363
|
|
315
364
|
def __exit__(
|
@@ -318,16 +367,12 @@ class _MessageLimit(Limit):
|
|
318
367
|
exc_val: BaseException | None,
|
319
368
|
exc_tb: TracebackType | None,
|
320
369
|
) -> None:
|
321
|
-
|
322
|
-
assert current_node is not None, (
|
323
|
-
"Message limit node should not be None when exiting context manager."
|
324
|
-
)
|
325
|
-
message_limit_leaf_node.set(current_node.parent)
|
370
|
+
self._pop_and_check_identity(message_limit_tree)
|
326
371
|
|
327
372
|
@property
|
328
373
|
def limit(self) -> int | None:
|
329
374
|
"""Get the configured message limit value."""
|
330
|
-
return self.
|
375
|
+
return self._limit
|
331
376
|
|
332
377
|
@limit.setter
|
333
378
|
def limit(self, value: int | None) -> None:
|
@@ -340,54 +385,32 @@ class _MessageLimit(Limit):
|
|
340
385
|
exceeded).
|
341
386
|
"""
|
342
387
|
self._validate_message_limit(value)
|
343
|
-
self.
|
344
|
-
|
345
|
-
def _validate_message_limit(self, value: int | None) -> None:
|
346
|
-
if value is not None and value < 0:
|
347
|
-
raise ValueError("Message limit value must be a non-negative integer.")
|
348
|
-
|
349
|
-
|
350
|
-
class _MessageLimitNode:
|
351
|
-
def __init__(
|
352
|
-
self,
|
353
|
-
limit: _LimitValueWrapper,
|
354
|
-
parent: _MessageLimitNode | None,
|
355
|
-
) -> None:
|
356
|
-
"""
|
357
|
-
Initialize a message limit node.
|
358
|
-
|
359
|
-
Forms part of a tree structure. Each node has a pointer to its parent, or None
|
360
|
-
if it is the root node.
|
361
|
-
|
362
|
-
Checks if the message count for this node has exceeded a (variable) limit.
|
363
|
-
|
364
|
-
Args:
|
365
|
-
limit: The maximum conversation length (number of messages) allowed while this
|
366
|
-
node is the lead node of the current execution context.
|
367
|
-
parent: The parent node in the tree.
|
368
|
-
"""
|
369
|
-
self._limit = limit
|
370
|
-
self.parent = parent
|
388
|
+
self._limit = value
|
371
389
|
|
372
390
|
def check(self, count: int, raise_for_equal: bool) -> None:
|
373
391
|
"""Check if this message limit has been exceeded.
|
374
392
|
|
375
|
-
Does not check
|
393
|
+
Does not check ancestors.
|
376
394
|
"""
|
377
395
|
from inspect_ai.log._transcript import SampleLimitEvent, transcript
|
378
396
|
|
379
|
-
if self.
|
397
|
+
if self.limit is None:
|
380
398
|
return
|
381
|
-
limit
|
382
|
-
|
383
|
-
reached_or_exceeded = "reached" if count == limit else "exceeded"
|
399
|
+
if count > self.limit or (raise_for_equal and count == self.limit):
|
400
|
+
reached_or_exceeded = "reached" if count == self.limit else "exceeded"
|
384
401
|
message = (
|
385
402
|
f"Message limit {reached_or_exceeded}. count: {count:,}; "
|
386
|
-
f"limit: {limit:,}"
|
403
|
+
f"limit: {self.limit:,}"
|
387
404
|
)
|
388
405
|
transcript()._event(
|
389
|
-
SampleLimitEvent(type="message", limit=limit, message=message)
|
406
|
+
SampleLimitEvent(type="message", limit=self.limit, message=message)
|
390
407
|
)
|
391
408
|
raise LimitExceededError(
|
392
|
-
"message", value=count, limit=limit, message=message
|
409
|
+
"message", value=count, limit=self.limit, message=message, source=self
|
410
|
+
)
|
411
|
+
|
412
|
+
def _validate_message_limit(self, value: int | None) -> None:
|
413
|
+
if value is not None and value < 0:
|
414
|
+
raise ValueError(
|
415
|
+
f"Message limit value must be a non-negative integer or None: {value}"
|
393
416
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.99
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Security Institute
|
6
6
|
License: MIT License
|
@@ -49,7 +49,7 @@ Requires-Dist: semver>=3.0.0
|
|
49
49
|
Requires-Dist: shortuuid
|
50
50
|
Requires-Dist: sniffio
|
51
51
|
Requires-Dist: tenacity
|
52
|
-
Requires-Dist: textual
|
52
|
+
Requires-Dist: textual<v3.0.0,>=0.86.2
|
53
53
|
Requires-Dist: typing_extensions>=4.9.0
|
54
54
|
Requires-Dist: zipp>=3.19.1
|
55
55
|
Provides-Extra: dev
|