inspect-ai 0.3.98__py3-none-any.whl → 0.3.100__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. inspect_ai/__init__.py +2 -0
  2. inspect_ai/_cli/log.py +1 -1
  3. inspect_ai/_display/core/config.py +11 -5
  4. inspect_ai/_display/core/panel.py +66 -2
  5. inspect_ai/_display/core/textual.py +5 -2
  6. inspect_ai/_display/plain/display.py +1 -0
  7. inspect_ai/_display/rich/display.py +2 -2
  8. inspect_ai/_display/textual/widgets/transcript.py +41 -1
  9. inspect_ai/_eval/run.py +12 -4
  10. inspect_ai/_eval/score.py +2 -4
  11. inspect_ai/_eval/task/log.py +1 -1
  12. inspect_ai/_eval/task/run.py +59 -81
  13. inspect_ai/_eval/task/task.py +1 -1
  14. inspect_ai/_util/_async.py +1 -1
  15. inspect_ai/_util/content.py +11 -6
  16. inspect_ai/_util/interrupt.py +2 -2
  17. inspect_ai/_util/text.py +7 -0
  18. inspect_ai/_util/working.py +8 -37
  19. inspect_ai/_view/__init__.py +0 -0
  20. inspect_ai/_view/schema.py +3 -1
  21. inspect_ai/_view/view.py +14 -0
  22. inspect_ai/_view/www/CLAUDE.md +15 -0
  23. inspect_ai/_view/www/dist/assets/index.css +273 -169
  24. inspect_ai/_view/www/dist/assets/index.js +20079 -17019
  25. inspect_ai/_view/www/log-schema.json +122 -8
  26. inspect_ai/_view/www/package.json +5 -1
  27. inspect_ai/_view/www/src/@types/log.d.ts +20 -2
  28. inspect_ai/_view/www/src/app/App.tsx +1 -15
  29. inspect_ai/_view/www/src/app/appearance/icons.ts +4 -1
  30. inspect_ai/_view/www/src/app/content/MetaDataGrid.tsx +24 -6
  31. inspect_ai/_view/www/src/app/content/MetadataGrid.module.css +0 -5
  32. inspect_ai/_view/www/src/app/content/RenderedContent.tsx +221 -205
  33. inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +2 -1
  34. inspect_ai/_view/www/src/app/log-view/tabs/SamplesTab.tsx +5 -0
  35. inspect_ai/_view/www/src/app/routing/url.ts +84 -4
  36. inspect_ai/_view/www/src/app/samples/InlineSampleDisplay.module.css +0 -5
  37. inspect_ai/_view/www/src/app/samples/SampleDialog.module.css +1 -1
  38. inspect_ai/_view/www/src/app/samples/SampleDisplay.module.css +7 -0
  39. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +26 -19
  40. inspect_ai/_view/www/src/app/samples/SampleSummaryView.module.css +1 -2
  41. inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +8 -6
  42. inspect_ai/_view/www/src/app/samples/chat/ChatMessageRow.tsx +0 -4
  43. inspect_ai/_view/www/src/app/samples/chat/ChatViewVirtualList.tsx +3 -2
  44. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +2 -0
  45. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +2 -0
  46. inspect_ai/_view/www/src/app/samples/chat/messages.ts +1 -0
  47. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +1 -0
  48. inspect_ai/_view/www/src/app/samples/list/SampleRow.tsx +1 -1
  49. inspect_ai/_view/www/src/app/samples/scores/SampleScoresGrid.module.css +2 -2
  50. inspect_ai/_view/www/src/app/samples/transcript/ErrorEventView.tsx +2 -3
  51. inspect_ai/_view/www/src/app/samples/transcript/InfoEventView.tsx +1 -1
  52. inspect_ai/_view/www/src/app/samples/transcript/InputEventView.tsx +1 -2
  53. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.module.css +1 -1
  54. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  55. inspect_ai/_view/www/src/app/samples/transcript/SampleInitEventView.tsx +1 -1
  56. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +3 -2
  57. inspect_ai/_view/www/src/app/samples/transcript/SandboxEventView.tsx +4 -5
  58. inspect_ai/_view/www/src/app/samples/transcript/ScoreEventView.tsx +1 -1
  59. inspect_ai/_view/www/src/app/samples/transcript/SpanEventView.tsx +1 -2
  60. inspect_ai/_view/www/src/app/samples/transcript/StepEventView.tsx +1 -3
  61. inspect_ai/_view/www/src/app/samples/transcript/SubtaskEventView.tsx +1 -2
  62. inspect_ai/_view/www/src/app/samples/transcript/ToolEventView.tsx +3 -4
  63. inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.module.css +42 -0
  64. inspect_ai/_view/www/src/app/samples/transcript/TranscriptPanel.tsx +77 -0
  65. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualList.tsx +27 -71
  66. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.module.css +13 -3
  67. inspect_ai/_view/www/src/app/samples/transcript/TranscriptVirtualListComponent.tsx +27 -2
  68. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.module.css +1 -0
  69. inspect_ai/_view/www/src/app/samples/transcript/event/EventPanel.tsx +21 -22
  70. inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.module.css +45 -0
  71. inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +223 -0
  72. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.module.css +10 -0
  73. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +258 -0
  74. inspect_ai/_view/www/src/app/samples/transcript/outline/tree-visitors.ts +187 -0
  75. inspect_ai/_view/www/src/app/samples/transcript/state/StateEventRenderers.tsx +8 -1
  76. inspect_ai/_view/www/src/app/samples/transcript/state/StateEventView.tsx +3 -4
  77. inspect_ai/_view/www/src/app/samples/transcript/transform/hooks.ts +78 -0
  78. inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +340 -135
  79. inspect_ai/_view/www/src/app/samples/transcript/transform/utils.ts +3 -0
  80. inspect_ai/_view/www/src/app/samples/transcript/types.ts +2 -0
  81. inspect_ai/_view/www/src/app/types.ts +5 -1
  82. inspect_ai/_view/www/src/client/api/api-browser.ts +2 -2
  83. inspect_ai/_view/www/src/components/LiveVirtualList.tsx +6 -1
  84. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +1 -1
  85. inspect_ai/_view/www/src/components/PopOver.tsx +422 -0
  86. inspect_ai/_view/www/src/components/PulsingDots.module.css +9 -9
  87. inspect_ai/_view/www/src/components/PulsingDots.tsx +4 -1
  88. inspect_ai/_view/www/src/components/StickyScroll.tsx +183 -0
  89. inspect_ai/_view/www/src/components/TabSet.tsx +4 -0
  90. inspect_ai/_view/www/src/state/hooks.ts +52 -2
  91. inspect_ai/_view/www/src/state/logSlice.ts +4 -3
  92. inspect_ai/_view/www/src/state/samplePolling.ts +8 -0
  93. inspect_ai/_view/www/src/state/sampleSlice.ts +53 -9
  94. inspect_ai/_view/www/src/state/scrolling.ts +152 -0
  95. inspect_ai/_view/www/src/utils/attachments.ts +7 -0
  96. inspect_ai/_view/www/src/utils/python.ts +18 -0
  97. inspect_ai/_view/www/yarn.lock +269 -6
  98. inspect_ai/agent/_react.py +12 -7
  99. inspect_ai/agent/_run.py +46 -11
  100. inspect_ai/analysis/beta/_dataframe/samples/table.py +19 -18
  101. inspect_ai/log/_bundle.py +5 -3
  102. inspect_ai/log/_log.py +3 -3
  103. inspect_ai/log/_recorders/file.py +2 -9
  104. inspect_ai/log/_transcript.py +1 -1
  105. inspect_ai/model/_call_tools.py +6 -2
  106. inspect_ai/model/_openai.py +1 -1
  107. inspect_ai/model/_openai_responses.py +78 -39
  108. inspect_ai/model/_openai_web_search.py +31 -0
  109. inspect_ai/model/_providers/anthropic.py +3 -6
  110. inspect_ai/model/_providers/azureai.py +72 -3
  111. inspect_ai/model/_providers/openai.py +2 -1
  112. inspect_ai/model/_providers/providers.py +1 -1
  113. inspect_ai/scorer/_metric.py +1 -2
  114. inspect_ai/solver/_task_state.py +2 -2
  115. inspect_ai/tool/_tool.py +6 -2
  116. inspect_ai/tool/_tool_def.py +27 -4
  117. inspect_ai/tool/_tool_info.py +2 -0
  118. inspect_ai/tool/_tools/_web_search/_google.py +15 -4
  119. inspect_ai/tool/_tools/_web_search/_tavily.py +35 -12
  120. inspect_ai/tool/_tools/_web_search/_web_search.py +214 -45
  121. inspect_ai/util/__init__.py +6 -0
  122. inspect_ai/util/_json.py +3 -0
  123. inspect_ai/util/_limit.py +374 -141
  124. inspect_ai/util/_sandbox/docker/compose.py +20 -11
  125. inspect_ai/util/_span.py +1 -1
  126. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/METADATA +3 -3
  127. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/RECORD +131 -117
  128. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/WHEEL +1 -1
  129. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/entry_points.txt +0 -0
  130. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/licenses/LICENSE +0 -0
  131. {inspect_ai-0.3.98.dist-info → inspect_ai-0.3.100.dist-info}/top_level.txt +0 -0
inspect_ai/util/_limit.py CHANGED
@@ -5,7 +5,10 @@ import logging
5
5
  from contextlib import ExitStack, contextmanager
6
6
  from contextvars import ContextVar
7
7
  from types import TracebackType
8
- from typing import TYPE_CHECKING, Iterator, Literal
8
+ from typing import TYPE_CHECKING, Generic, Iterator, Literal, TypeVar
9
+
10
+ import anyio
11
+ from typing_extensions import Self
9
12
 
10
13
  from inspect_ai._util.logger import warn_once
11
14
 
@@ -16,18 +19,7 @@ if TYPE_CHECKING:
16
19
 
17
20
 
18
21
  logger = logging.getLogger(__name__)
19
-
20
- # Stores the current execution context's leaf _TokenLimitNode.
21
- # The resulting data structure is a tree of _TokenLimitNode nodes which each
22
- # have a pointer to their parent node. Each additional context manager inserts a new
23
- # child node into the tree. The fact that there can be multiple execution contexts is
24
- # what makes this a tree rather than a stack.
25
- token_limit_leaf_node: ContextVar[_TokenLimitNode | None] = ContextVar(
26
- "token_limit_leaf_node", default=None
27
- )
28
- message_limit_leaf_node: ContextVar[_MessageLimitNode | None] = ContextVar(
29
- "message_limit_leaf_node", default=None
30
- )
22
+ TNode = TypeVar("TNode", bound="_Node")
31
23
 
32
24
 
33
25
  class LimitExceededError(Exception):
@@ -42,20 +34,25 @@ class LimitExceededError(Exception):
42
34
  value: Value compared to.
43
35
  limit: Limit applied.
44
36
  message (str | None): Optional. Human readable message.
37
+ source (Limit | None): Optional. The `Limit` instance which was responsible for raising this error.
45
38
  """
46
39
 
47
40
  def __init__(
48
41
  self,
49
42
  type: Literal["message", "time", "working", "token", "operator", "custom"],
50
43
  *,
51
- value: int,
52
- limit: int,
44
+ value: float,
45
+ limit: float,
53
46
  message: str | None = None,
47
+ source: Limit | None = None,
54
48
  ) -> None:
55
49
  self.type = type
56
50
  self.value = value
51
+ self.value_str = self._format_float_or_int(value)
57
52
  self.limit = limit
53
+ self.limit_str = self._format_float_or_int(limit)
58
54
  self.message = f"Exceeded {type} limit: {limit:,}"
55
+ self.source = source
59
56
  super().__init__(message)
60
57
 
61
58
  def with_state(self, state: TaskState) -> LimitExceededError:
@@ -65,9 +62,18 @@ class LimitExceededError(Exception):
65
62
  )
66
63
  return self
67
64
 
65
+ def _format_float_or_int(self, value: float | int) -> str:
66
+ if isinstance(value, int):
67
+ return f"{value:,}"
68
+ else:
69
+ return f"{value:,.2f}"
70
+
68
71
 
69
72
  class Limit(abc.ABC):
70
- """Base class for all limits."""
73
+ """Base class for all limit context managers."""
74
+
75
+ def __init__(self) -> None:
76
+ self._entered = False
71
77
 
72
78
  @abc.abstractmethod
73
79
  def __enter__(self) -> Limit:
@@ -82,35 +88,81 @@ class Limit(abc.ABC):
82
88
  ) -> None:
83
89
  pass
84
90
 
91
+ @property
92
+ @abc.abstractmethod
93
+ def usage(self) -> float:
94
+ """The current usage of the resource being limited."""
95
+ pass
96
+
97
+ def _check_reuse(self) -> None:
98
+ if self._entered:
99
+ raise RuntimeError(
100
+ "Each Limit may only be used once in a single 'with' block. Please "
101
+ "create a new instance of the Limit."
102
+ )
103
+ self._entered = True
104
+
85
105
 
86
106
  @contextmanager
87
- def apply_limits(limits: list[Limit]) -> Iterator[None]:
107
+ def apply_limits(
108
+ limits: list[Limit], catch_errors: bool = False
109
+ ) -> Iterator[LimitScope]:
88
110
  """
89
111
  Apply a list of limits within a context manager.
90
112
 
113
+ Optionally catches any `LimitExceededError` raised by the applied limits, while
114
+ allowing other limit errors from any other scope (e.g. the Sample level) to
115
+ propagate.
116
+
117
+ Yields a `LimitScope` object which can be used once the context manager is closed
118
+ to determine which, if any, limits were exceeded.
119
+
91
120
  Args:
92
121
  limits: List of limits to apply while the context manager is open. Should a
93
- limit be exceeded, a LimitExceededError is raised.
122
+ limit be exceeded, a `LimitExceededError` is raised.
123
+ catch_errors: If True, catch any `LimitExceededError` raised by the applied
124
+ limits. Callers can determine whether any limits were exceeded by checking the
125
+ limit_error property of the `LimitScope` object yielded by this function. If
126
+ False, all `LimitExceededError` exceptions will be allowed to propagate.
127
+ """
128
+ limit_scope = LimitScope()
129
+ # Try scope is outside the `with ExitStack()` so that we can catch any errors raised
130
+ # when exiting it (which will be where time_limit() would raise LimitExceededError).
131
+ try:
132
+ with ExitStack() as stack:
133
+ for limit in limits:
134
+ stack.enter_context(limit)
135
+ yield limit_scope
136
+ except LimitExceededError as e:
137
+ # If it was not one of the limits we applied.
138
+ if e.source is None or e.source not in limits:
139
+ raise
140
+ limit_scope.limit_error = e
141
+ if not catch_errors:
142
+ raise
143
+
144
+
145
+ class LimitScope:
146
+ """Object returned from `apply_limits()`.
147
+
148
+ Used to check which, if any, limits were exceeded.
94
149
  """
95
- with ExitStack() as stack:
96
- for limit in limits:
97
- stack.enter_context(limit)
98
- yield
150
+
151
+ def __init__(self) -> None:
152
+ self.limit_error: LimitExceededError | None = None
99
153
 
100
154
 
101
155
  def token_limit(limit: int | None) -> _TokenLimit:
102
156
  """Limits the total number of tokens which can be used.
103
157
 
104
158
  The counter starts when the context manager is opened and ends when it is closed.
105
- The context manager can be opened multiple times, even in different execution
106
- contexts.
107
159
 
108
160
  These limits can be stacked.
109
161
 
110
- This relies on "cooperative" checking - consumers must call check_token_limit()
162
+ This relies on "cooperative" checking - consumers must call `check_token_limit()`
111
163
  themselves whenever tokens are consumed.
112
164
 
113
- When a limit is exceeded, a LimitExceededError is raised.
165
+ When a limit is exceeded, a `LimitExceededError` is raised.
114
166
 
115
167
  Args:
116
168
  limit: The maximum number of tokens that can be used while the context manager is
@@ -125,7 +177,7 @@ def record_model_usage(usage: ModelUsage) -> None:
125
177
 
126
178
  Does not check if the limit has been exceeded.
127
179
  """
128
- node = token_limit_leaf_node.get()
180
+ node = token_limit_tree.get()
129
181
  if node is None:
130
182
  return
131
183
  node.record(usage)
@@ -138,7 +190,7 @@ def check_token_limit() -> None:
138
190
 
139
191
  Note that all active token limits are checked, not just the most recent one.
140
192
  """
141
- node = token_limit_leaf_node.get()
193
+ node = token_limit_tree.get()
142
194
  if node is None:
143
195
  return
144
196
  node.check()
@@ -148,15 +200,14 @@ def message_limit(limit: int | None) -> _MessageLimit:
148
200
  """Limits the number of messages in a conversation.
149
201
 
150
202
  The total number of messages in the conversation are compared to the limit (not just
151
- "new" messages). The context manager can be opened multiple times, even in different
152
- execution contexts.
203
+ "new" messages).
153
204
 
154
205
  These limits can be stacked.
155
206
 
156
207
  This relies on "cooperative" checking - consumers must call check_message_limit()
157
208
  themselves whenever the message count is updated.
158
209
 
159
- When a limit is exceeded, a LimitExceededError is raised.
210
+ When a limit is exceeded, a `LimitExceededError` is raised.
160
211
 
161
212
  Args:
162
213
  limit: The maximum conversation length (number of messages) allowed while the
@@ -176,35 +227,135 @@ def check_message_limit(count: int, raise_for_equal: bool) -> None:
176
227
  limit, otherwise, only raise an error if the message count is greater than the
177
228
  limit.
178
229
  """
179
- node = message_limit_leaf_node.get()
230
+ node = message_limit_tree.get()
180
231
  if node is None:
181
232
  return
182
233
  node.check(count, raise_for_equal)
183
234
 
184
235
 
185
- class _LimitValueWrapper:
186
- """Container/wrapper type for the limit value.
236
+ def time_limit(limit: float | None) -> _TimeLimit:
237
+ """Limits the wall clock time which can elapse.
238
+
239
+ The timer starts when the context manager is opened and stops when it is closed.
240
+
241
+ These limits can be stacked.
242
+
243
+ When a limit is exceeded, the code block is cancelled and a `LimitExceededError` is
244
+ raised.
245
+
246
+ Uses anyio's cancellation scopes meaning that the operations within the context
247
+ manager block are cancelled if the limit is exceeded. The `LimitExceededError` is
248
+ therefore raised at the level that the `time_limit()` context manager was opened,
249
+ not at the level of the operation which caused the limit to be exceeded (e.g. a call
250
+ to `generate()`). Ensure you handle `LimitExceededError` at the level of opening the context manager.
251
+
252
+ Args:
253
+ limit: The maximum number of seconds that can pass while the context manager is
254
+ open. A value of None means unlimited time.
255
+ """
256
+ return _TimeLimit(limit)
257
+
258
+
259
+ def working_limit(limit: float | None) -> _WorkingLimit:
260
+ """Limits the working time which can elapse.
261
+
262
+ Working time is the wall clock time minus any waiting time e.g. waiting before
263
+ retrying in response to rate limits or waiting on a semaphore.
187
264
 
188
- This facilitates updating the limit value, which may have been passed to many
189
- _TokenLimitNode instances.
265
+ The timer starts when the context manager is opened and stops when it is closed.
266
+
267
+ These limits can be stacked.
268
+
269
+ When a limit is exceeded, a `LimitExceededError` is raised.
270
+
271
+ Args:
272
+ limit: The maximum number of seconds of working that can pass while the context
273
+ manager is open. A value of None means unlimited time.
190
274
  """
275
+ return _WorkingLimit(limit)
276
+
277
+
278
+ def record_waiting_time(waiting_time: float) -> None:
279
+ node = working_limit_tree.get()
280
+ if node is None:
281
+ return
282
+ node.record_waiting_time(waiting_time)
283
+
284
+
285
+ def check_working_limit() -> None:
286
+ node = working_limit_tree.get()
287
+ if node is None:
288
+ return
289
+ node.check()
191
290
 
192
- def __init__(self, value: int | None) -> None:
193
- self.value = value
194
291
 
292
+ class _Tree(Generic[TNode]):
293
+ """A tree data structure of limit nodes.
195
294
 
196
- class _TokenLimit(Limit):
295
+ Each node has a pointer to its parent, or None if it is a root node.
296
+
297
+ Each additional context manager inserts a new child node into the tree. The fact
298
+ that there can be multiple execution contexts is what makes this a tree rather than
299
+ a stack and why a context variable is used to store the leaf node.
300
+ """
301
+
302
+ def __init__(self, id: str) -> None:
303
+ self._leaf_node: ContextVar[TNode | None] = ContextVar(id, default=None)
304
+
305
+ def get(self) -> TNode | None:
306
+ return self._leaf_node.get()
307
+
308
+ def push(self, new_node: TNode) -> None:
309
+ current_leaf = self._leaf_node.get()
310
+ new_node.parent = current_leaf
311
+ self._leaf_node.set(new_node)
312
+
313
+ def pop(self) -> TNode:
314
+ current_leaf = self._leaf_node.get()
315
+ if current_leaf is None:
316
+ raise RuntimeError("Limit tree is empty. Cannot pop from an empty tree.")
317
+ self._leaf_node.set(current_leaf.parent)
318
+ return current_leaf
319
+
320
+
321
+ token_limit_tree: _Tree[_TokenLimit] = _Tree("token_limit_tree")
322
+ # Store the message limit leaf node so that we know which limit to check in
323
+ # check_message_limit().
324
+ message_limit_tree: _Tree[_MessageLimit] = _Tree("message_limit_tree")
325
+ working_limit_tree: _Tree[_WorkingLimit] = _Tree("working_limit_tree")
326
+
327
+
328
+ class _Node:
329
+ """Mixin for objects used as nodes in a limit tree.
330
+
331
+ This allows us to have an "internal" parent property which is not exported as part
332
+ of the public API.
333
+ """
334
+
335
+ parent: Self | None
336
+
337
+ def _pop_and_check_identity(self, tree: _Tree[TNode]) -> None:
338
+ popped = tree.pop()
339
+ if popped is not self:
340
+ raise RuntimeError(
341
+ "The limit context manager being closed is not the leaf node in the "
342
+ "tree. Make sure to open and close the context managers in a "
343
+ "stack-like manner using a `with` statement."
344
+ )
345
+
346
+
347
+ class _TokenLimit(Limit, _Node):
197
348
  def __init__(self, limit: int | None) -> None:
349
+ from inspect_ai.model._model_output import ModelUsage
350
+
351
+ super().__init__()
198
352
  self._validate_token_limit(limit)
199
- self._limit_value_wrapper = _LimitValueWrapper(limit)
353
+ self._limit = limit
354
+ self._usage = ModelUsage()
200
355
 
201
356
  def __enter__(self) -> Limit:
202
- current_node = token_limit_leaf_node.get()
203
- new_node = _TokenLimitNode(self._limit_value_wrapper, current_node)
204
- # Note that we don't store new_node as an instance variable, because the context
205
- # manager may be used across multiple execution contexts, or opened multiple
206
- # times.
207
- token_limit_leaf_node.set(new_node)
357
+ super()._check_reuse()
358
+ token_limit_tree.push(self)
208
359
  return self
209
360
 
210
361
  def __exit__(
@@ -213,103 +364,75 @@ class _TokenLimit(Limit):
213
364
  exc_val: BaseException | None,
214
365
  exc_tb: TracebackType | None,
215
366
  ) -> None:
216
- current_node = token_limit_leaf_node.get()
217
- assert current_node is not None, (
218
- "Token limit node should not be None when exiting context manager."
219
- )
220
- token_limit_leaf_node.set(current_node.parent)
367
+ self._pop_and_check_identity(token_limit_tree)
368
+
369
+ @property
370
+ def usage(self) -> float:
371
+ return self._usage.total_tokens
221
372
 
222
373
  @property
223
374
  def limit(self) -> int | None:
224
375
  """Get the configured token limit value."""
225
- return self._limit_value_wrapper.value
376
+ return self._limit
226
377
 
227
378
  @limit.setter
228
379
  def limit(self, value: int | None) -> None:
229
380
  """Update the token limit value.
230
381
 
231
- This will affect the limit for all active token limit nodes derived from this
232
- context manager.
233
-
234
382
  This does not trigger a check of the token limit (which could now have been
235
383
  exceeded).
236
384
  """
237
385
  self._validate_token_limit(value)
238
- self._limit_value_wrapper.value = value
239
-
240
- def _validate_token_limit(self, value: int | None) -> None:
241
- if value is not None and value < 0:
242
- raise ValueError("Token limit value must be a non-negative integer.")
243
-
244
-
245
- class _TokenLimitNode:
246
- def __init__(
247
- self,
248
- limit: _LimitValueWrapper,
249
- parent: _TokenLimitNode | None,
250
- ) -> None:
251
- """
252
- Initialize a token limit node.
253
-
254
- Forms part of a tree structure. Each node has a pointer to its parent, or None
255
- if it is the root node.
256
-
257
- Tracks the token usage for this node and its parent nodes and checks if the
258
- usage has exceeded a (variable) limit.
259
-
260
- Args:
261
- limit: The maximum number of tokens that can be used while the context
262
- manager is open.
263
- parent: The parent node in the tree.
264
- """
265
- from inspect_ai.model._model_output import ModelUsage
266
-
267
- self._limit = limit
268
- self.parent = parent
269
- self._usage = ModelUsage()
386
+ self._limit = value
270
387
 
271
388
  def record(self, usage: ModelUsage) -> None:
272
- """Record model usage for this node and its parent nodes."""
389
+ """Record model usage for this node and its ancestor nodes."""
273
390
  if self.parent is not None:
274
391
  self.parent.record(usage)
275
392
  self._usage += usage
276
393
 
277
394
  def check(self) -> None:
278
- """Check if this token limit or any parent limits have been exceeded."""
279
- self._check_self()
395
+ """Check if this token limit or any ancestor limits have been exceeded.
396
+
397
+ The checks occur from root to leaf. This is so that if multiple limits are
398
+ simultaneously exceeded, the outermost (closest to root) one raises the error,
399
+ preventing certain sub-agent architectures from ending up in an infinite loop.
400
+ """
280
401
  if self.parent is not None:
281
402
  self.parent.check()
403
+ self._check_self()
404
+
405
+ def _validate_token_limit(self, value: int | None) -> None:
406
+ if value is not None and value < 0:
407
+ raise ValueError(
408
+ f"Token limit value must be a non-negative integer or None: {value}"
409
+ )
282
410
 
283
411
  def _check_self(self) -> None:
284
412
  from inspect_ai.log._transcript import SampleLimitEvent, transcript
285
413
 
286
- if self._limit.value is None:
414
+ if self.limit is None:
287
415
  return
288
416
  total = self._usage.total_tokens
289
- if total > self._limit.value:
290
- message = (
291
- f"Token limit exceeded. value: {total:,}; limit: {self._limit.value:,}"
292
- )
417
+ if total > self.limit:
418
+ message = f"Token limit exceeded. value: {total:,}; limit: {self.limit:,}"
293
419
  transcript()._event(
294
- SampleLimitEvent(type="token", limit=self._limit.value, message=message)
420
+ SampleLimitEvent(type="token", limit=self.limit, message=message)
295
421
  )
296
422
  raise LimitExceededError(
297
- "token", value=total, limit=self._limit.value, message=message
423
+ "token", value=total, limit=self.limit, message=message, source=self
298
424
  )
299
425
 
300
426
 
301
- class _MessageLimit(Limit):
427
+ class _MessageLimit(Limit, _Node):
302
428
  def __init__(self, limit: int | None) -> None:
429
+ super().__init__()
303
430
  self._validate_message_limit(limit)
304
- self._limit_value_wrapper = _LimitValueWrapper(limit)
431
+ self._limit = limit
305
432
 
306
433
  def __enter__(self) -> Limit:
307
- current_node = message_limit_leaf_node.get()
308
- new_node = _MessageLimitNode(self._limit_value_wrapper, current_node)
309
- # Note that we don't store new_node as an instance variable, because the context
310
- # manager may be used across multiple execution contexts, or opened multiple
311
- # times.
312
- message_limit_leaf_node.set(new_node)
434
+ super()._check_reuse()
435
+ message_limit_tree.push(self)
313
436
  return self
314
437
 
315
438
  def __exit__(
@@ -318,16 +441,19 @@ class _MessageLimit(Limit):
318
441
  exc_val: BaseException | None,
319
442
  exc_tb: TracebackType | None,
320
443
  ) -> None:
321
- current_node = message_limit_leaf_node.get()
322
- assert current_node is not None, (
323
- "Message limit node should not be None when exiting context manager."
444
+ self._pop_and_check_identity(message_limit_tree)
445
+
446
+ @property
447
+ def usage(self) -> float:
448
+ raise NotImplementedError(
449
+ "Retrieving the message count from a limit is not supported. Please query "
450
+ "the messages property on the task or agent state instead."
324
451
  )
325
- message_limit_leaf_node.set(current_node.parent)
326
452
 
327
453
  @property
328
454
  def limit(self) -> int | None:
329
455
  """Get the configured message limit value."""
330
- return self._limit_value_wrapper.value
456
+ return self._limit
331
457
 
332
458
  @limit.setter
333
459
  def limit(self, value: int | None) -> None:
@@ -340,54 +466,161 @@ class _MessageLimit(Limit):
340
466
  exceeded).
341
467
  """
342
468
  self._validate_message_limit(value)
343
- self._limit_value_wrapper.value = value
469
+ self._limit = value
470
+
471
+ def check(self, count: int, raise_for_equal: bool) -> None:
472
+ """Check if this message limit has been exceeded.
473
+
474
+ Does not check ancestors.
475
+ """
476
+ from inspect_ai.log._transcript import SampleLimitEvent, transcript
477
+
478
+ if self.limit is None:
479
+ return
480
+ if count > self.limit or (raise_for_equal and count == self.limit):
481
+ reached_or_exceeded = "reached" if count == self.limit else "exceeded"
482
+ message = (
483
+ f"Message limit {reached_or_exceeded}. count: {count:,}; "
484
+ f"limit: {self.limit:,}"
485
+ )
486
+ transcript()._event(
487
+ SampleLimitEvent(type="message", limit=self.limit, message=message)
488
+ )
489
+ raise LimitExceededError(
490
+ "message", value=count, limit=self.limit, message=message, source=self
491
+ )
344
492
 
345
493
  def _validate_message_limit(self, value: int | None) -> None:
346
494
  if value is not None and value < 0:
347
- raise ValueError("Message limit value must be a non-negative integer.")
495
+ raise ValueError(
496
+ f"Message limit value must be a non-negative integer or None: {value}"
497
+ )
348
498
 
349
499
 
350
- class _MessageLimitNode:
351
- def __init__(
500
+ class _TimeLimit(Limit):
501
+ def __init__(self, limit: float | None) -> None:
502
+ super().__init__()
503
+ _validate_time_limit("Time", limit)
504
+ self._limit = limit
505
+ self._start_time: float | None = None
506
+ self._end_time: float | None = None
507
+
508
+ def __enter__(self) -> Limit:
509
+ super()._check_reuse()
510
+ # Unlike the other limits, this one is not stored in a tree. Anyio handles all
511
+ # of the state.
512
+ self._cancel_scope = anyio.move_on_after(self._limit)
513
+ self._cancel_scope.__enter__()
514
+ self._start_time = anyio.current_time()
515
+ return self
516
+
517
+ def __exit__(
352
518
  self,
353
- limit: _LimitValueWrapper,
354
- parent: _MessageLimitNode | None,
519
+ exc_type: type[BaseException] | None,
520
+ exc_val: BaseException | None,
521
+ exc_tb: TracebackType | None,
355
522
  ) -> None:
356
- """
357
- Initialize a message limit node.
358
-
359
- Forms part of a tree structure. Each node has a pointer to its parent, or None
360
- if it is the root node.
523
+ from inspect_ai.log._transcript import SampleLimitEvent, transcript
361
524
 
362
- Checks if the message count for this node has exceeded a (variable) limit.
525
+ self._cancel_scope.__exit__(exc_type, exc_val, exc_tb)
526
+ self._end_time = anyio.current_time()
527
+ if self._cancel_scope.cancel_called and self._limit is not None:
528
+ message = f"Time limit exceeded. limit: {self._limit} seconds"
529
+ assert self._start_time is not None
530
+ # Note we've measured the elapsed time independently of anyio's cancel scope
531
+ # so this is an approximation.
532
+ time_elapsed = self._end_time - self._start_time
533
+ transcript()._event(
534
+ SampleLimitEvent(type="time", message=message, limit=self._limit)
535
+ )
536
+ raise LimitExceededError(
537
+ "time",
538
+ value=time_elapsed,
539
+ limit=self._limit,
540
+ message=message,
541
+ source=self,
542
+ ) from exc_val
363
543
 
364
- Args:
365
- limit: The maximum conversation length (number of messages) allowed while this
366
- node is the lead node of the current execution context.
367
- parent: The parent node in the tree.
368
- """
544
+ @property
545
+ def usage(self) -> float:
546
+ if self._start_time is None:
547
+ return 0.0
548
+ if self._end_time is None:
549
+ return anyio.current_time() - self._start_time
550
+ return self._end_time - self._start_time
551
+
552
+
553
+ class _WorkingLimit(Limit, _Node):
554
+ def __init__(self, limit: float | None) -> None:
555
+ super().__init__()
556
+ _validate_time_limit("Working time", limit)
369
557
  self._limit = limit
370
- self.parent = parent
558
+ self.parent: _WorkingLimit | None = None
559
+ self._start_time: float | None = None
560
+ self._end_time: float | None = None
371
561
 
372
- def check(self, count: int, raise_for_equal: bool) -> None:
373
- """Check if this message limit has been exceeded.
562
+ def __enter__(self) -> Limit:
563
+ super()._check_reuse()
564
+ self._start_time = anyio.current_time()
565
+ self._waiting_time = 0.0
566
+ working_limit_tree.push(self)
567
+ return self
374
568
 
375
- Does not check parents.
569
+ def __exit__(
570
+ self,
571
+ exc_type: type[BaseException] | None,
572
+ exc_val: BaseException | None,
573
+ exc_tb: TracebackType | None,
574
+ ) -> None:
575
+ self._end_time = anyio.current_time()
576
+ self._pop_and_check_identity(working_limit_tree)
577
+
578
+ @property
579
+ def usage(self) -> float:
580
+ if self._start_time is None:
581
+ return 0.0
582
+ if self._end_time is None:
583
+ return anyio.current_time() - self._start_time - self._waiting_time
584
+ return self._end_time - self._start_time - self._waiting_time
585
+
586
+ def record_waiting_time(self, waiting_time: float) -> None:
587
+ """Record waiting time for this node and its ancestor nodes."""
588
+ if self.parent is not None:
589
+ self.parent.record_waiting_time(waiting_time)
590
+ self._waiting_time += waiting_time
591
+
592
+ def check(self) -> None:
593
+ """Check if this working time limit or any ancestor limits have been exceeded.
594
+
595
+ The checks occur from root to leaf. This is so that if multiple limits are
596
+ simultaneously exceeded, the outermost (closest to root) one raises the error,
597
+ preventing certain sub-agent architectures from ending up in an infinite loop.
376
598
  """
599
+ if self.parent is not None:
600
+ self.parent.check()
601
+ self._check_self()
602
+
603
+ def _check_self(self) -> None:
377
604
  from inspect_ai.log._transcript import SampleLimitEvent, transcript
378
605
 
379
- if self._limit.value is None:
606
+ if self._limit is None:
380
607
  return
381
- limit = self._limit.value
382
- if count > limit or (raise_for_equal and count == limit):
383
- reached_or_exceeded = "reached" if count == limit else "exceeded"
384
- message = (
385
- f"Message limit {reached_or_exceeded}. count: {count:,}; "
386
- f"limit: {limit:,}"
387
- )
608
+ if self.usage > self._limit:
609
+ message = f"Working time limit exceeded. limit: {self._limit} seconds"
388
610
  transcript()._event(
389
- SampleLimitEvent(type="message", limit=limit, message=message)
611
+ SampleLimitEvent(type="working", message=message, limit=self._limit)
390
612
  )
391
613
  raise LimitExceededError(
392
- "message", value=count, limit=limit, message=message
614
+ "working",
615
+ value=self.usage,
616
+ limit=self._limit,
617
+ message=message,
618
+ source=self,
393
619
  )
620
+
621
+
622
+ def _validate_time_limit(name: str, value: float | None) -> None:
623
+ if value is not None and value < 0:
624
+ raise ValueError(
625
+ f"{name} limit value must be a non-negative float or None: {value}"
626
+ )