inspect-ai 0.3.98__py3-none-any.whl → 0.3.99__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. inspect_ai/__init__.py +2 -0
  2. inspect_ai/_cli/log.py +1 -1
  3. inspect_ai/_display/textual/widgets/transcript.py +15 -3
  4. inspect_ai/_eval/run.py +12 -4
  5. inspect_ai/_eval/task/log.py +1 -1
  6. inspect_ai/_eval/task/task.py +1 -1
  7. inspect_ai/_util/_async.py +1 -1
  8. inspect_ai/_view/schema.py +1 -0
  9. inspect_ai/_view/view.py +14 -0
  10. inspect_ai/_view/www/dist/assets/index.css +10 -10
  11. inspect_ai/_view/www/dist/assets/index.js +10 -10
  12. inspect_ai/_view/www/log-schema.json +45 -5
  13. inspect_ai/_view/www/src/@types/log.d.ts +11 -2
  14. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +2 -1
  15. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +2 -2
  16. inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.module.css +2 -2
  17. inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +1 -1
  18. inspect_ai/agent/_run.py +44 -8
  19. inspect_ai/log/_bundle.py +5 -3
  20. inspect_ai/log/_log.py +2 -2
  21. inspect_ai/model/_providers/anthropic.py +3 -6
  22. inspect_ai/model/_providers/providers.py +1 -1
  23. inspect_ai/util/__init__.py +2 -0
  24. inspect_ai/util/_limit.py +160 -137
  25. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.99.dist-info}/METADATA +1 -1
  26. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.99.dist-info}/RECORD +30 -30
  27. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.99.dist-info}/WHEEL +1 -1
  28. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.99.dist-info}/entry_points.txt +0 -0
  29. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.99.dist-info}/licenses/LICENSE +0 -0
  30. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.99.dist-info}/top_level.txt +0 -0
@@ -356,12 +356,9 @@ class AnthropicAPI(ModelAPI):
356
356
  if isinstance(ex, APIStatusError):
357
357
  # for unknown reasons, anthropic does not always set status_code == 529
358
358
  # for "overloaded_error" so we check for it explicitly
359
- if (
360
- isinstance(ex.body, dict)
361
- and isinstance(ex.body.get("error", {}), dict)
362
- and ex.body.get("error", {}).get("type", "") == "overloaded_error"
363
- ):
364
- return True
359
+ if isinstance(ex.body, dict):
360
+ if "overloaded_error" in str(ex.body):
361
+ return True
365
362
 
366
363
  # standard http status code checking
367
364
  return is_retryable_http_status(ex.status_code)
@@ -105,7 +105,7 @@ def vertex() -> type[ModelAPI]:
105
105
  def google() -> type[ModelAPI]:
106
106
  FEATURE = "Google API"
107
107
  PACKAGE = "google-genai"
108
- MIN_VERSION = "1.12.1"
108
+ MIN_VERSION = "1.16.1"
109
109
 
110
110
  # verify we have the package
111
111
  try:
@@ -3,6 +3,7 @@ from inspect_ai._util.trace import trace_action, trace_message
3
3
  from inspect_ai.util._limit import (
4
4
  Limit,
5
5
  LimitExceededError,
6
+ LimitScope,
6
7
  apply_limits,
7
8
  message_limit,
8
9
  token_limit,
@@ -58,6 +59,7 @@ __all__ = [
58
59
  "resource",
59
60
  "subprocess",
60
61
  "LimitExceededError",
62
+ "LimitScope",
61
63
  "SandboxEnvironment",
62
64
  "SandboxEnvironmentConfigType",
63
65
  "SandboxEnvironmentLimits",
inspect_ai/util/_limit.py CHANGED
@@ -5,7 +5,9 @@ import logging
5
5
  from contextlib import ExitStack, contextmanager
6
6
  from contextvars import ContextVar
7
7
  from types import TracebackType
8
- from typing import TYPE_CHECKING, Iterator, Literal
8
+ from typing import TYPE_CHECKING, Generic, Iterator, Literal, TypeVar
9
+
10
+ from typing_extensions import Self
9
11
 
10
12
  from inspect_ai._util.logger import warn_once
11
13
 
@@ -16,18 +18,7 @@ if TYPE_CHECKING:
16
18
 
17
19
 
18
20
  logger = logging.getLogger(__name__)
19
-
20
- # Stores the current execution context's leaf _TokenLimitNode.
21
- # The resulting data structure is a tree of _TokenLimitNode nodes which each
22
- # have a pointer to their parent node. Each additional context manager inserts a new
23
- # child node into the tree. The fact that there can be multiple execution contexts is
24
- # what makes this a tree rather than a stack.
25
- token_limit_leaf_node: ContextVar[_TokenLimitNode | None] = ContextVar(
26
- "token_limit_leaf_node", default=None
27
- )
28
- message_limit_leaf_node: ContextVar[_MessageLimitNode | None] = ContextVar(
29
- "message_limit_leaf_node", default=None
30
- )
21
+ TNode = TypeVar("TNode", bound="_Node")
31
22
 
32
23
 
33
24
  class LimitExceededError(Exception):
@@ -42,6 +33,8 @@ class LimitExceededError(Exception):
42
33
  value: Value compared to.
43
34
  limit: Limit applied.
44
35
  message (str | None): Optional. Human readable message.
36
+ source (Limit | None): Optional. The `Limit` instance which was responsible for
37
+ raising this error.
45
38
  """
46
39
 
47
40
  def __init__(
@@ -51,11 +44,13 @@ class LimitExceededError(Exception):
51
44
  value: int,
52
45
  limit: int,
53
46
  message: str | None = None,
47
+ source: Limit | None = None,
54
48
  ) -> None:
55
49
  self.type = type
56
50
  self.value = value
57
51
  self.limit = limit
58
52
  self.message = f"Exceeded {type} limit: {limit:,}"
53
+ self.source = source
59
54
  super().__init__(message)
60
55
 
61
56
  def with_state(self, state: TaskState) -> LimitExceededError:
@@ -67,7 +62,10 @@ class LimitExceededError(Exception):
67
62
 
68
63
 
69
64
  class Limit(abc.ABC):
70
- """Base class for all limits."""
65
+ """Base class for all limit context managers."""
66
+
67
+ def __init__(self) -> None:
68
+ self._entered = False
71
69
 
72
70
  @abc.abstractmethod
73
71
  def __enter__(self) -> Limit:
@@ -82,20 +80,60 @@ class Limit(abc.ABC):
82
80
  ) -> None:
83
81
  pass
84
82
 
83
+ def _check_reuse(self) -> None:
84
+ if self._entered:
85
+ raise RuntimeError(
86
+ "Each Limit may only be used once in a single 'with' block. Please "
87
+ "create a new instance of the Limit."
88
+ )
89
+ self._entered = True
90
+
85
91
 
86
92
  @contextmanager
87
- def apply_limits(limits: list[Limit]) -> Iterator[None]:
93
+ def apply_limits(
94
+ limits: list[Limit], catch_errors: bool = False
95
+ ) -> Iterator[LimitScope]:
88
96
  """
89
97
  Apply a list of limits within a context manager.
90
98
 
99
+ Optionally catches any `LimitExceededError` raised by the applied limits, while
100
+ allowing other limit errors from any other scope (e.g. the Sample level) to
101
+ propagate.
102
+
103
+ Yields a `LimitScope` object which can be used once the context manager is closed
104
+ to determine which, if any, limits were exceeded.
105
+
91
106
  Args:
92
107
  limits: List of limits to apply while the context manager is open. Should a
93
- limit be exceeded, a LimitExceededError is raised.
108
+ limit be exceeded, a `LimitExceededError` is raised.
109
+ catch_errors: If True, catch any `LimitExceededError` raised by the applied
110
+ limits. Callers can determine whether any limits were exceeded by checking the
111
+ limit_error property of the `LimitScope` object yielded by this function. If
112
+ False, all `LimitExceededError` exceptions will be allowed to propagate.
94
113
  """
114
+ limit_scope = LimitScope()
95
115
  with ExitStack() as stack:
96
116
  for limit in limits:
97
117
  stack.enter_context(limit)
98
- yield
118
+ try:
119
+ yield limit_scope
120
+ except LimitExceededError as e:
121
+ # If it was not one of the limits we applied.
122
+ if e.source is None or e.source not in limits:
123
+ raise
124
+ limit_scope.limit_error = e
125
+ if not catch_errors:
126
+ raise
127
+
128
+
129
+ class LimitScope:
130
+ """Object returned from `apply_limits()`.
131
+
132
+ Used to check which, if any, limits were exceeded.
133
+ """
134
+
135
+ def __init__(self) -> None:
136
+ self.limit_error: LimitExceededError | None = None
99
137
 
100
138
 
101
139
  def token_limit(limit: int | None) -> _TokenLimit:
@@ -107,10 +145,10 @@ def token_limit(limit: int | None) -> _TokenLimit:
107
145
 
108
146
  These limits can be stacked.
109
147
 
110
- This relies on "cooperative" checking - consumers must call check_token_limit()
148
+ This relies on "cooperative" checking - consumers must call `check_token_limit()`
111
149
  themselves whenever tokens are consumed.
112
150
 
113
- When a limit is exceeded, a LimitExceededError is raised.
151
+ When a limit is exceeded, a `LimitExceededError` is raised.
114
152
 
115
153
  Args:
116
154
  limit: The maximum number of tokens that can be used while the context manager is
@@ -125,7 +163,7 @@ def record_model_usage(usage: ModelUsage) -> None:
125
163
 
126
164
  Does not check if the limit has been exceeded.
127
165
  """
128
- node = token_limit_leaf_node.get()
166
+ node = token_limit_tree.get()
129
167
  if node is None:
130
168
  return
131
169
  node.record(usage)
@@ -138,7 +176,7 @@ def check_token_limit() -> None:
138
176
 
139
177
  Note that all active token limits are checked, not just the most recent one.
140
178
  """
141
- node = token_limit_leaf_node.get()
179
+ node = token_limit_tree.get()
142
180
  if node is None:
143
181
  return
144
182
  node.check()
@@ -156,7 +194,7 @@ def message_limit(limit: int | None) -> _MessageLimit:
156
194
  This relies on "cooperative" checking - consumers must call check_message_limit()
157
195
  themselves whenever the message count is updated.
158
196
 
159
- When a limit is exceeded, a LimitExceededError is raised.
197
+ When a limit is exceeded, a `LimitExceededError` is raised.
160
198
 
161
199
  Args:
162
200
  limit: The maximum conversation length (number of messages) allowed while the
@@ -176,35 +214,78 @@ def check_message_limit(count: int, raise_for_equal: bool) -> None:
176
214
  limit, otherwise, only raise an error if the message count is greater than the
177
215
  limit.
178
216
  """
179
- node = message_limit_leaf_node.get()
217
+ node = message_limit_tree.get()
180
218
  if node is None:
181
219
  return
182
220
  node.check(count, raise_for_equal)
183
221
 
184
222
 
185
- class _LimitValueWrapper:
186
- """Container/wrapper type for the limit value.
223
+ class _Tree(Generic[TNode]):
224
+ """A tree data structure of limit nodes.
225
+
226
+ Each node has a pointer to its parent, or None if it is a root node.
187
227
 
188
- This facilitates updating the limit value, which may have been passed to many
189
- _TokenLimitNode instances.
228
+ Each additional context manager inserts a new child node into the tree. The fact
229
+ that there can be multiple execution contexts is what makes this a tree rather than
230
+ a stack and why a context variable is used to store the leaf node.
190
231
  """
191
232
 
192
- def __init__(self, value: int | None) -> None:
193
- self.value = value
233
+ def __init__(self, id: str) -> None:
234
+ self._leaf_node: ContextVar[TNode | None] = ContextVar(id, default=None)
235
+
236
+ def get(self) -> TNode | None:
237
+ return self._leaf_node.get()
238
+
239
+ def push(self, new_node: TNode) -> None:
240
+ current_leaf = self._leaf_node.get()
241
+ new_node.parent = current_leaf
242
+ self._leaf_node.set(new_node)
243
+
244
+ def pop(self) -> TNode:
245
+ current_leaf = self._leaf_node.get()
246
+ if current_leaf is None:
247
+ raise RuntimeError("Limit tree is empty. Cannot pop from an empty tree.")
248
+ self._leaf_node.set(current_leaf.parent)
249
+ return current_leaf
250
+
251
+
252
+ token_limit_tree: _Tree[_TokenLimit] = _Tree("token_limit_tree")
253
+ # Store the message limit leaf node so that we know which limit to check in
254
+ # check_message_limit().
255
+ message_limit_tree: _Tree[_MessageLimit] = _Tree("message_limit_tree")
256
+
194
257
 
258
+ class _Node:
259
+ """Mixin for objects used as nodes in a limit tree.
195
260
 
196
- class _TokenLimit(Limit):
261
+ This allows us to have an "internal" parent property which is not exported as part
262
+ of the public API.
263
+ """
264
+
265
+ parent: Self | None
266
+
267
+ def _pop_and_check_identity(self, tree: _Tree[TNode]) -> None:
268
+ popped = tree.pop()
269
+ if popped is not self:
270
+ raise RuntimeError(
271
+ "The limit context manager being closed is not the leaf node in the "
272
+ "tree. Make sure to open and close the context managers in a "
273
+ "stack-like manner using a `with` statement."
274
+ )
275
+
276
+
277
+ class _TokenLimit(Limit, _Node):
197
278
  def __init__(self, limit: int | None) -> None:
279
+ from inspect_ai.model._model_output import ModelUsage
280
+
281
+ super().__init__()
198
282
  self._validate_token_limit(limit)
199
- self._limit_value_wrapper = _LimitValueWrapper(limit)
283
+ self._limit = limit
284
+ self._usage = ModelUsage()
200
285
 
201
286
  def __enter__(self) -> Limit:
202
- current_node = token_limit_leaf_node.get()
203
- new_node = _TokenLimitNode(self._limit_value_wrapper, current_node)
204
- # Note that we don't store new_node as an instance variable, because the context
205
- # manager may be used across multiple execution contexts, or opened multiple
206
- # times.
207
- token_limit_leaf_node.set(new_node)
287
+ super()._check_reuse()
288
+ token_limit_tree.push(self)
208
289
  return self
209
290
 
210
291
  def __exit__(
@@ -213,60 +294,22 @@ class _TokenLimit(Limit):
213
294
  exc_val: BaseException | None,
214
295
  exc_tb: TracebackType | None,
215
296
  ) -> None:
216
- current_node = token_limit_leaf_node.get()
217
- assert current_node is not None, (
218
- "Token limit node should not be None when exiting context manager."
219
- )
220
- token_limit_leaf_node.set(current_node.parent)
297
+ self._pop_and_check_identity(token_limit_tree)
221
298
 
222
299
  @property
223
300
  def limit(self) -> int | None:
224
301
  """Get the configured token limit value."""
225
- return self._limit_value_wrapper.value
302
+ return self._limit
226
303
 
227
304
  @limit.setter
228
305
  def limit(self, value: int | None) -> None:
229
306
  """Update the token limit value.
230
307
 
231
- This will affect the limit for all active token limit nodes derived from this
232
- context manager.
233
-
234
308
  This does not trigger a check of the token limit (which could now have been
235
309
  exceeded).
236
310
  """
237
311
  self._validate_token_limit(value)
238
- self._limit_value_wrapper.value = value
239
-
240
- def _validate_token_limit(self, value: int | None) -> None:
241
- if value is not None and value < 0:
242
- raise ValueError("Token limit value must be a non-negative integer.")
243
-
244
-
245
- class _TokenLimitNode:
246
- def __init__(
247
- self,
248
- limit: _LimitValueWrapper,
249
- parent: _TokenLimitNode | None,
250
- ) -> None:
251
- """
252
- Initialize a token limit node.
253
-
254
- Forms part of a tree structure. Each node has a pointer to its parent, or None
255
- if it is the root node.
256
-
257
- Tracks the token usage for this node and its parent nodes and checks if the
258
- usage has exceeded a (variable) limit.
259
-
260
- Args:
261
- limit: The maximum number of tokens that can be used while the context
262
- manager is open.
263
- parent: The parent node in the tree.
264
- """
265
- from inspect_ai.model._model_output import ModelUsage
266
-
267
- self._limit = limit
268
- self.parent = parent
269
- self._usage = ModelUsage()
312
+ self._limit = value
270
313
 
271
314
  def record(self, usage: ModelUsage) -> None:
272
315
  """Record model usage for this node and its parent nodes."""
@@ -275,41 +318,47 @@ class _TokenLimitNode:
275
318
  self._usage += usage
276
319
 
277
320
  def check(self) -> None:
278
- """Check if this token limit or any parent limits have been exceeded."""
279
- self._check_self()
321
+ """Check if this token limit or any ancestor limits have been exceeded.
322
+
323
+ The checks occur from root to leaf. This is so that if multiple limits are
324
+ simultaneously exceeded, the outermost (closest to root) one raises the error,
325
+ preventing certain sub-agent architectures from ending up in an infinite loop.
326
+ """
280
327
  if self.parent is not None:
281
328
  self.parent.check()
329
+ self._check_self()
330
+
331
+ def _validate_token_limit(self, value: int | None) -> None:
332
+ if value is not None and value < 0:
333
+ raise ValueError(
334
+ f"Token limit value must be a non-negative integer or None: {value}"
335
+ )
282
336
 
283
337
  def _check_self(self) -> None:
284
338
  from inspect_ai.log._transcript import SampleLimitEvent, transcript
285
339
 
286
- if self._limit.value is None:
340
+ if self.limit is None:
287
341
  return
288
342
  total = self._usage.total_tokens
289
- if total > self._limit.value:
290
- message = (
291
- f"Token limit exceeded. value: {total:,}; limit: {self._limit.value:,}"
292
- )
343
+ if total > self.limit:
344
+ message = f"Token limit exceeded. value: {total:,}; limit: {self.limit:,}"
293
345
  transcript()._event(
294
- SampleLimitEvent(type="token", limit=self._limit.value, message=message)
346
+ SampleLimitEvent(type="token", limit=self.limit, message=message)
295
347
  )
296
348
  raise LimitExceededError(
297
- "token", value=total, limit=self._limit.value, message=message
349
+ "token", value=total, limit=self.limit, message=message, source=self
298
350
  )
299
351
 
300
352
 
301
- class _MessageLimit(Limit):
353
+ class _MessageLimit(Limit, _Node):
302
354
  def __init__(self, limit: int | None) -> None:
355
+ super().__init__()
303
356
  self._validate_message_limit(limit)
304
- self._limit_value_wrapper = _LimitValueWrapper(limit)
357
+ self._limit = limit
305
358
 
306
359
  def __enter__(self) -> Limit:
307
- current_node = message_limit_leaf_node.get()
308
- new_node = _MessageLimitNode(self._limit_value_wrapper, current_node)
309
- # Note that we don't store new_node as an instance variable, because the context
310
- # manager may be used across multiple execution contexts, or opened multiple
311
- # times.
312
- message_limit_leaf_node.set(new_node)
360
+ super()._check_reuse()
361
+ message_limit_tree.push(self)
313
362
  return self
314
363
 
315
364
  def __exit__(
@@ -318,16 +367,12 @@ class _MessageLimit(Limit):
318
367
  exc_val: BaseException | None,
319
368
  exc_tb: TracebackType | None,
320
369
  ) -> None:
321
- current_node = message_limit_leaf_node.get()
322
- assert current_node is not None, (
323
- "Message limit node should not be None when exiting context manager."
324
- )
325
- message_limit_leaf_node.set(current_node.parent)
370
+ self._pop_and_check_identity(message_limit_tree)
326
371
 
327
372
  @property
328
373
  def limit(self) -> int | None:
329
374
  """Get the configured message limit value."""
330
- return self._limit_value_wrapper.value
375
+ return self._limit
331
376
 
332
377
  @limit.setter
333
378
  def limit(self, value: int | None) -> None:
@@ -340,54 +385,32 @@ class _MessageLimit(Limit):
340
385
  exceeded).
341
386
  """
342
387
  self._validate_message_limit(value)
343
- self._limit_value_wrapper.value = value
344
-
345
- def _validate_message_limit(self, value: int | None) -> None:
346
- if value is not None and value < 0:
347
- raise ValueError("Message limit value must be a non-negative integer.")
348
-
349
-
350
- class _MessageLimitNode:
351
- def __init__(
352
- self,
353
- limit: _LimitValueWrapper,
354
- parent: _MessageLimitNode | None,
355
- ) -> None:
356
- """
357
- Initialize a message limit node.
358
-
359
- Forms part of a tree structure. Each node has a pointer to its parent, or None
360
- if it is the root node.
361
-
362
- Checks if the message count for this node has exceeded a (variable) limit.
363
-
364
- Args:
365
- limit: The maximum conversation length (number of messages) allowed while this
366
- node is the lead node of the current execution context.
367
- parent: The parent node in the tree.
368
- """
369
- self._limit = limit
370
- self.parent = parent
388
+ self._limit = value
371
389
 
372
390
  def check(self, count: int, raise_for_equal: bool) -> None:
373
391
  """Check if this message limit has been exceeded.
374
392
 
375
- Does not check parents.
393
+ Does not check ancestors.
376
394
  """
377
395
  from inspect_ai.log._transcript import SampleLimitEvent, transcript
378
396
 
379
- if self._limit.value is None:
397
+ if self.limit is None:
380
398
  return
381
- limit = self._limit.value
382
- if count > limit or (raise_for_equal and count == limit):
383
- reached_or_exceeded = "reached" if count == limit else "exceeded"
399
+ if count > self.limit or (raise_for_equal and count == self.limit):
400
+ reached_or_exceeded = "reached" if count == self.limit else "exceeded"
384
401
  message = (
385
402
  f"Message limit {reached_or_exceeded}. count: {count:,}; "
386
- f"limit: {limit:,}"
403
+ f"limit: {self.limit:,}"
387
404
  )
388
405
  transcript()._event(
389
- SampleLimitEvent(type="message", limit=limit, message=message)
406
+ SampleLimitEvent(type="message", limit=self.limit, message=message)
390
407
  )
391
408
  raise LimitExceededError(
392
- "message", value=count, limit=limit, message=message
409
+ "message", value=count, limit=self.limit, message=message, source=self
410
+ )
411
+
412
+ def _validate_message_limit(self, value: int | None) -> None:
413
+ if value is not None and value < 0:
414
+ raise ValueError(
415
+ f"Message limit value must be a non-negative integer or None: {value}"
393
416
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: inspect_ai
3
- Version: 0.3.98
3
+ Version: 0.3.99
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Security Institute
6
6
  License: MIT License