inspect-ai 0.3.68__py3-none-any.whl → 0.3.70__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. inspect_ai/_cli/eval.py +13 -1
  2. inspect_ai/_display/plain/display.py +9 -11
  3. inspect_ai/_display/textual/app.py +5 -5
  4. inspect_ai/_display/textual/widgets/samples.py +47 -18
  5. inspect_ai/_display/textual/widgets/transcript.py +25 -12
  6. inspect_ai/_eval/eval.py +14 -2
  7. inspect_ai/_eval/evalset.py +6 -1
  8. inspect_ai/_eval/run.py +6 -0
  9. inspect_ai/_eval/task/run.py +44 -15
  10. inspect_ai/_eval/task/task.py +26 -3
  11. inspect_ai/_util/interrupt.py +15 -0
  12. inspect_ai/_util/logger.py +23 -0
  13. inspect_ai/_util/rich.py +7 -8
  14. inspect_ai/_util/text.py +301 -1
  15. inspect_ai/_util/transcript.py +10 -2
  16. inspect_ai/_util/working.py +46 -0
  17. inspect_ai/_view/www/dist/assets/index.css +56 -12
  18. inspect_ai/_view/www/dist/assets/index.js +905 -751
  19. inspect_ai/_view/www/log-schema.json +337 -2
  20. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  21. inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
  22. inspect_ai/_view/www/src/appearance/icons.ts +3 -1
  23. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +0 -1
  24. inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
  25. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +28 -1
  26. inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
  27. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +23 -2
  28. inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +1 -1
  29. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -0
  30. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
  31. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +152 -0
  32. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +9 -2
  33. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +19 -1
  34. inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
  35. inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
  36. inspect_ai/_view/www/src/types/log.d.ts +188 -108
  37. inspect_ai/_view/www/src/utils/format.ts +7 -4
  38. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -6
  39. inspect_ai/log/__init__.py +2 -0
  40. inspect_ai/log/_condense.py +1 -0
  41. inspect_ai/log/_log.py +72 -12
  42. inspect_ai/log/_samples.py +5 -5
  43. inspect_ai/log/_transcript.py +31 -1
  44. inspect_ai/model/_call_tools.py +1 -1
  45. inspect_ai/model/_conversation.py +1 -1
  46. inspect_ai/model/_model.py +35 -16
  47. inspect_ai/model/_model_call.py +10 -3
  48. inspect_ai/model/_providers/anthropic.py +13 -2
  49. inspect_ai/model/_providers/bedrock.py +7 -0
  50. inspect_ai/model/_providers/cloudflare.py +20 -7
  51. inspect_ai/model/_providers/google.py +358 -302
  52. inspect_ai/model/_providers/groq.py +57 -23
  53. inspect_ai/model/_providers/hf.py +6 -0
  54. inspect_ai/model/_providers/mistral.py +81 -52
  55. inspect_ai/model/_providers/openai.py +9 -0
  56. inspect_ai/model/_providers/providers.py +6 -6
  57. inspect_ai/model/_providers/util/tracker.py +92 -0
  58. inspect_ai/model/_providers/vllm.py +13 -5
  59. inspect_ai/solver/_basic_agent.py +1 -3
  60. inspect_ai/solver/_bridge/patch.py +0 -2
  61. inspect_ai/solver/_limit.py +4 -4
  62. inspect_ai/solver/_plan.py +3 -3
  63. inspect_ai/solver/_solver.py +3 -0
  64. inspect_ai/solver/_task_state.py +10 -1
  65. inspect_ai/tool/_tools/_web_search.py +3 -3
  66. inspect_ai/util/_concurrency.py +14 -8
  67. inspect_ai/util/_sandbox/context.py +15 -0
  68. inspect_ai/util/_sandbox/docker/cleanup.py +8 -3
  69. inspect_ai/util/_sandbox/docker/compose.py +5 -9
  70. inspect_ai/util/_sandbox/docker/docker.py +20 -6
  71. inspect_ai/util/_sandbox/docker/util.py +10 -1
  72. inspect_ai/util/_sandbox/environment.py +32 -1
  73. inspect_ai/util/_sandbox/events.py +149 -0
  74. inspect_ai/util/_sandbox/local.py +3 -3
  75. inspect_ai/util/_sandbox/self_check.py +2 -1
  76. inspect_ai/util/_subprocess.py +4 -1
  77. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/METADATA +5 -5
  78. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/RECORD +82 -74
  79. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/LICENSE +0 -0
  80. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/WHEEL +0 -0
  81. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/entry_points.txt +0 -0
  82. {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  from copy import deepcopy
2
2
  from dataclasses import dataclass
3
3
  from logging import getLogger
4
- from typing import Any, Callable, Sequence, cast
4
+ from typing import Any, Awaitable, Callable, Sequence, cast
5
5
 
6
6
  from pydantic import BaseModel
7
7
  from typing_extensions import TypedDict, Unpack
@@ -17,6 +17,7 @@ from inspect_ai.scorer import Metric, Scorer
17
17
  from inspect_ai.scorer._reducer import ScoreReducers, create_reducers
18
18
  from inspect_ai.solver import Plan, Solver, generate
19
19
  from inspect_ai.solver._chain import chain
20
+ from inspect_ai.solver._task_state import TaskState
20
21
  from inspect_ai.util._sandbox.environment import (
21
22
  SandboxEnvironmentSpec,
22
23
  SandboxEnvironmentType,
@@ -46,6 +47,7 @@ class Task:
46
47
  dataset: Dataset | Sequence[Sample] | None = None,
47
48
  setup: Solver | list[Solver] | None = None,
48
49
  solver: Solver | list[Solver] = generate(),
50
+ cleanup: Callable[[TaskState], Awaitable[None]] | None = None,
49
51
  scorer: Scorer | list[Scorer] | None = None,
50
52
  metrics: list[Metric] | dict[str, list[Metric]] | None = None,
51
53
  config: GenerateConfig = GenerateConfig(),
@@ -56,6 +58,7 @@ class Task:
56
58
  message_limit: int | None = None,
57
59
  token_limit: int | None = None,
58
60
  time_limit: int | None = None,
61
+ working_limit: int | None = None,
59
62
  name: str | None = None,
60
63
  version: int = 0,
61
64
  metadata: dict[str, Any] | None = None,
@@ -69,6 +72,9 @@ class Task:
69
72
  even when the main `solver` is replaced).
70
73
  solver: (Solver | list[Solver]): Solver or list of solvers.
71
74
  Defaults to generate(), a normal call to the model.
75
+ cleanup: Optional cleanup function for task. Called after
76
+ all solvers have run for each sample (including if an
77
+ exception occurs during the run)
72
78
  scorer: (Scorer | list[Scorer] | None): Scorer used to evaluate model output.
73
79
  metrics (list[Metric] | dict[str, list[Metric]] | None):
74
80
  Alternative metrics (overrides the metrics provided by the specified scorer).
@@ -86,7 +92,10 @@ class Task:
86
92
  eval if a count of samples fails.
87
93
  message_limit (int | None): Limit on total messages used for each sample.
88
94
  token_limit (int | None): Limit on total tokens used for each sample.
89
- time_limit (int | None): Limit on time (in seconds) for execution of each sample.
95
+ time_limit: Limit on clock time (in seconds) for samples.
96
+ working_limit: Limit on working time (in seconds) for sample. Working
97
+ time includes model generation, tool calls, etc. but does not include
98
+ time spent waiting on retries or shared resources.
90
99
  name: (str | None): Task name. If not specified is automatically
91
100
  determined based on the name of the task directory (or "task")
92
101
  if its anonymous task (e.g. created in a notebook and passed to
@@ -123,6 +132,7 @@ class Task:
123
132
  self.dataset = resolve_dataset(dataset)
124
133
  self.setup = setup
125
134
  self.solver = resolve_solver(solver)
135
+ self.cleanup = cleanup
126
136
  self.scorer = resolve_scorer(scorer)
127
137
  self.metrics = metrics
128
138
  self.config = config
@@ -135,6 +145,7 @@ class Task:
135
145
  self.message_limit = message_limit
136
146
  self.token_limit = token_limit
137
147
  self.time_limit = time_limit
148
+ self.working_limit = working_limit
138
149
  self.version = version
139
150
  self._name = name
140
151
  self.metadata = metadata
@@ -162,6 +173,7 @@ def task_with(
162
173
  dataset: Dataset | Sequence[Sample] | None | NotGiven = NOT_GIVEN,
163
174
  setup: Solver | list[Solver] | None | NotGiven = NOT_GIVEN,
164
175
  solver: Solver | list[Solver] | NotGiven = NOT_GIVEN,
176
+ cleanup: Callable[[TaskState], Awaitable[None]] | None | NotGiven = NOT_GIVEN,
165
177
  scorer: Scorer | list[Scorer] | None | NotGiven = NOT_GIVEN,
166
178
  metrics: list[Metric] | dict[str, list[Metric]] | None | NotGiven = NOT_GIVEN,
167
179
  config: GenerateConfig | NotGiven = NOT_GIVEN,
@@ -172,6 +184,7 @@ def task_with(
172
184
  message_limit: int | None | NotGiven = NOT_GIVEN,
173
185
  token_limit: int | None | NotGiven = NOT_GIVEN,
174
186
  time_limit: int | None | NotGiven = NOT_GIVEN,
187
+ working_limit: int | None | NotGiven = NOT_GIVEN,
175
188
  name: str | None | NotGiven = NOT_GIVEN,
176
189
  version: int | NotGiven = NOT_GIVEN,
177
190
  metadata: dict[str, Any] | None | NotGiven = NOT_GIVEN,
@@ -185,6 +198,9 @@ def task_with(
185
198
  even when the main `solver` is replaced).
186
199
  solver: (Solver | list[Solver]): Solver or list of solvers.
187
200
  Defaults to generate(), a normal call to the model.
201
+ cleanup: Optional cleanup function for task. Called after
202
+ all solvers have run for each sample (including if an
203
+ exception occurs during the run)
188
204
  scorer: (Scorer | list[Scorer] | None): Scorer used to evaluate model output.
189
205
  metrics (list[Metric] | dict[str, list[Metric]] | None):
190
206
  Alternative metrics (overrides the metrics provided by the specified scorer).
@@ -202,7 +218,10 @@ def task_with(
202
218
  eval if a count of samples fails.
203
219
  message_limit (int | None): Limit on total messages used for each sample.
204
220
  token_limit (int | None): Limit on total tokens used for each sample.
205
- time_limit (int | None): Limit on time (in seconds) for execution of each sample.
221
+ time_limit: Limit on clock time (in seconds) for samples.
222
+ working_limit: Limit on execution time (in seconds) for sample. Execution
223
+ time includes model generation, tool calls, etc. but does not include
224
+ time spent waiting on retries or shared resources.
206
225
  name: (str | None): Task name. If not specified is automatically
207
226
  determined based on the name of the task directory (or "task")
208
227
  if its anonymous task (e.g. created in a notebook and passed to
@@ -223,6 +242,8 @@ def task_with(
223
242
  task.setup = setup
224
243
  if not isinstance(solver, NotGiven):
225
244
  task.solver = resolve_solver(solver)
245
+ if not isinstance(cleanup, NotGiven):
246
+ task.cleanup = cleanup
226
247
  if not isinstance(scorer, NotGiven):
227
248
  task.scorer = resolve_scorer(scorer)
228
249
  if not isinstance(metrics, NotGiven):
@@ -245,6 +266,8 @@ def task_with(
245
266
  task.token_limit = token_limit
246
267
  if not isinstance(time_limit, NotGiven):
247
268
  task.time_limit = time_limit
269
+ if not isinstance(working_limit, NotGiven):
270
+ task.working_limit = working_limit
248
271
  if not isinstance(version, NotGiven):
249
272
  task.version = version
250
273
  if not isinstance(name, NotGiven):
@@ -0,0 +1,15 @@
1
+ import asyncio
2
+
3
+ from .working import check_sample_working_limit
4
+
5
+
6
+ def check_sample_interrupt() -> None:
7
+ from inspect_ai.log._samples import sample_active
8
+
9
+ # check for user interrupt
10
+ sample = sample_active()
11
+ if sample and sample.interrupt_action:
12
+ raise asyncio.CancelledError()
13
+
14
+ # check for working_limit
15
+ check_sample_working_limit()
@@ -90,6 +90,10 @@ class LogHandler(RichHandler):
90
90
  if "Event loop is closed" in record.getMessage():
91
91
  return
92
92
 
93
+ # skip google-genai AFC message
94
+ if "AFC is enabled with max remote calls" in record.getMessage():
95
+ return
96
+
93
97
  # write to stderr if we are at or above the threshold
94
98
  if record.levelno >= self.display_level:
95
99
  super().emit(record)
@@ -156,7 +160,9 @@ def init_logger(
156
160
 
157
161
  # init logging handler on demand
158
162
  global _logHandler
163
+ removed_root_handlers = False
159
164
  if not _logHandler:
165
+ removed_root_handlers = remove_non_pytest_root_logger_handlers()
160
166
  _logHandler = LogHandler(min(DEBUG, levelno), transcript_levelno)
161
167
  getLogger().addHandler(_logHandler)
162
168
 
@@ -169,6 +175,11 @@ def init_logger(
169
175
  getLogger("httpx").setLevel(capture_level)
170
176
  getLogger("botocore").setLevel(DEBUG)
171
177
 
178
+ if removed_root_handlers:
179
+ getLogger(PKG_NAME).warning(
180
+ "Inspect removed pre-existing root logger handlers and replaced them with its own handler."
181
+ )
182
+
172
183
  # set the levelno on the global handler
173
184
  _logHandler.display_level = levelno
174
185
 
@@ -176,6 +187,18 @@ def init_logger(
176
187
  _logHandler: LogHandler | None = None
177
188
 
178
189
 
190
+ def remove_non_pytest_root_logger_handlers() -> bool:
191
+ root_logger = getLogger()
192
+ non_pytest_handlers = [
193
+ handler
194
+ for handler in root_logger.handlers
195
+ if handler.__module__ != "_pytest.logging"
196
+ ]
197
+ for handler in non_pytest_handlers:
198
+ root_logger.removeHandler(handler)
199
+ return len(non_pytest_handlers) > 0
200
+
201
+
179
202
  def notify_logger_record(record: LogRecord, write: bool) -> None:
180
203
  from inspect_ai.log._message import LoggingMessage
181
204
  from inspect_ai.log._transcript import LoggerEvent, transcript
inspect_ai/_util/rich.py CHANGED
@@ -2,23 +2,22 @@ from rich.console import RenderableType
2
2
  from rich.style import Style
3
3
  from rich.text import Text
4
4
 
5
+ from inspect_ai._util.text import truncate_lines
6
+
5
7
 
6
8
  def lines_display(
7
9
  text: str, max_lines: int = 100, style: str | Style = ""
8
10
  ) -> list[RenderableType]:
9
- lines = text.splitlines()
10
- if len(lines) > max_lines:
11
- content: list[RenderableType] = [
12
- Text("\n".join(lines[0:max_lines]), style=style)
13
- ]
11
+ lines, truncated = truncate_lines(text, max_lines)
12
+
13
+ content: list[RenderableType] = [Text(lines, style=style)]
14
+ if truncated is not None:
14
15
  content.append(Text())
15
16
  content.append(
16
17
  Text.from_markup(
17
- f"[italic]Output truncated ({len(lines) - max_lines} additional lines)...[/italic]",
18
+ f"[italic]Output truncated ({truncated} additional lines)...[/italic]",
18
19
  style=style,
19
20
  )
20
21
  )
21
- else:
22
- content = [Text(text, style=style)]
23
22
 
24
23
  return content
inspect_ai/_util/text.py CHANGED
@@ -1,7 +1,8 @@
1
+ import random
1
2
  import re
2
3
  import string
3
4
  from logging import getLogger
4
- from typing import NamedTuple
5
+ from typing import List, NamedTuple
5
6
 
6
7
  logger = getLogger(__name__)
7
8
 
@@ -131,3 +132,302 @@ def truncate(text: str, length: int, overflow: str = "...", pad: bool = True) ->
131
132
  truncated = text[: length - overflow_length] + overflow
132
133
 
133
134
  return truncated
135
+
136
+
137
+ def truncate_lines(
138
+ text: str, max_lines: int = 100, max_characters: int | None = 100 * 100
139
+ ) -> tuple[str, int | None]:
140
+ if max_characters is not None:
141
+ text = truncate(text, max_characters)
142
+ lines = text.splitlines()
143
+ if len(lines) > max_lines:
144
+ output = "\n".join(lines[0:max_lines])
145
+ return output, len(lines) - max_lines
146
+ else:
147
+ return text, None
148
+
149
+
150
+ def generate_large_text(target_tokens: int) -> str:
151
+ """Generate a large amount of text with approximately the target number of tokens"""
152
+ generated_text = []
153
+ estimated_tokens = 0
154
+
155
+ while estimated_tokens < target_tokens:
156
+ sentence = generate_sentence()
157
+
158
+ # Add paragraph breaks occasionally
159
+ if random.random() < 0.1:
160
+ sentence += "\n\n"
161
+
162
+ generated_text.append(sentence)
163
+
164
+ # Rough estimate of tokens (words + punctuation)
165
+ estimated_tokens += len(sentence.split()) + 2
166
+
167
+ return " ".join(generated_text)
168
+
169
+
170
+ def generate_sentence() -> str:
171
+ """Generate a random sentence using predefined templates"""
172
+ adjectives, nouns, verbs = create_word_lists()
173
+
174
+ templates = [
175
+ f"The {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)} the {random.choice(adjectives)} {random.choice(nouns)}.",
176
+ f"A {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)} near the {random.choice(nouns)}.",
177
+ f"In the {random.choice(adjectives)} {random.choice(nouns)}, the {random.choice(nouns)} {random.choice(verbs)} {random.choice(adjectives)}.",
178
+ f"When the {random.choice(nouns)} {random.choice(verbs)}, a {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)}.",
179
+ f"The {random.choice(nouns)} {random.choice(verbs)} while the {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)}.",
180
+ ]
181
+
182
+ return random.choice(templates)
183
+
184
+
185
+ def create_word_lists() -> tuple[List[str], List[str], List[str]]:
186
+ """Create basic word lists for sentence generation"""
187
+ # Common adjectives
188
+ adjectives = [
189
+ "red",
190
+ "blue",
191
+ "green",
192
+ "dark",
193
+ "bright",
194
+ "quiet",
195
+ "loud",
196
+ "small",
197
+ "large",
198
+ "quick",
199
+ "slow",
200
+ "happy",
201
+ "sad",
202
+ "clever",
203
+ "wise",
204
+ "ancient",
205
+ "modern",
206
+ "complex",
207
+ "simple",
208
+ "elegant",
209
+ "rough",
210
+ "smooth",
211
+ "sharp",
212
+ "dull",
213
+ "fresh",
214
+ "stale",
215
+ "clean",
216
+ "dirty",
217
+ "heavy",
218
+ "light",
219
+ "hot",
220
+ "cold",
221
+ "dry",
222
+ "wet",
223
+ "rich",
224
+ "poor",
225
+ "thick",
226
+ "thin",
227
+ "strong",
228
+ "weak",
229
+ "early",
230
+ "late",
231
+ "young",
232
+ "old",
233
+ "good",
234
+ "bad",
235
+ "high",
236
+ "low",
237
+ "long",
238
+ "short",
239
+ "deep",
240
+ "shallow",
241
+ "hard",
242
+ "soft",
243
+ "near",
244
+ "far",
245
+ "wide",
246
+ "narrow",
247
+ "big",
248
+ "little",
249
+ "fast",
250
+ "slow",
251
+ "busy",
252
+ "lazy",
253
+ "new",
254
+ "old",
255
+ "full",
256
+ "empty",
257
+ "loud",
258
+ "quiet",
259
+ "sweet",
260
+ "sour",
261
+ "brave",
262
+ "scared",
263
+ ]
264
+
265
+ # Common nouns
266
+ nouns = [
267
+ "time",
268
+ "person",
269
+ "year",
270
+ "way",
271
+ "day",
272
+ "thing",
273
+ "man",
274
+ "world",
275
+ "life",
276
+ "hand",
277
+ "part",
278
+ "child",
279
+ "eye",
280
+ "woman",
281
+ "place",
282
+ "work",
283
+ "week",
284
+ "case",
285
+ "point",
286
+ "group",
287
+ "number",
288
+ "room",
289
+ "fact",
290
+ "idea",
291
+ "water",
292
+ "money",
293
+ "month",
294
+ "book",
295
+ "line",
296
+ "city",
297
+ "business",
298
+ "night",
299
+ "question",
300
+ "story",
301
+ "job",
302
+ "word",
303
+ "house",
304
+ "power",
305
+ "game",
306
+ "country",
307
+ "plant",
308
+ "animal",
309
+ "tree",
310
+ "stone",
311
+ "river",
312
+ "fire",
313
+ "problem",
314
+ "theory",
315
+ "street",
316
+ "family",
317
+ "history",
318
+ "mind",
319
+ "car",
320
+ "music",
321
+ "art",
322
+ "nation",
323
+ "science",
324
+ "nature",
325
+ "truth",
326
+ "peace",
327
+ "voice",
328
+ "class",
329
+ "paper",
330
+ "space",
331
+ "ground",
332
+ "market",
333
+ "court",
334
+ "force",
335
+ "price",
336
+ "action",
337
+ "reason",
338
+ "love",
339
+ "law",
340
+ "bird",
341
+ "literature",
342
+ "knowledge",
343
+ "society",
344
+ "valley",
345
+ "ocean",
346
+ "machine",
347
+ "energy",
348
+ "metal",
349
+ "mountain",
350
+ ]
351
+
352
+ # Common verbs (present tense)
353
+ verbs = [
354
+ "run",
355
+ "walk",
356
+ "jump",
357
+ "sing",
358
+ "dance",
359
+ "write",
360
+ "read",
361
+ "speak",
362
+ "listen",
363
+ "watch",
364
+ "think",
365
+ "grow",
366
+ "live",
367
+ "play",
368
+ "work",
369
+ "move",
370
+ "stop",
371
+ "start",
372
+ "create",
373
+ "destroy",
374
+ "build",
375
+ "break",
376
+ "push",
377
+ "pull",
378
+ "open",
379
+ "close",
380
+ "rise",
381
+ "fall",
382
+ "increase",
383
+ "decrease",
384
+ "begin",
385
+ "end",
386
+ "love",
387
+ "hate",
388
+ "help",
389
+ "hurt",
390
+ "make",
391
+ "take",
392
+ "give",
393
+ "receive",
394
+ "buy",
395
+ "sell",
396
+ "eat",
397
+ "drink",
398
+ "sleep",
399
+ "wake",
400
+ "laugh",
401
+ "cry",
402
+ "learn",
403
+ "teach",
404
+ "change",
405
+ "stay",
406
+ "come",
407
+ "go",
408
+ "arrive",
409
+ "leave",
410
+ "enter",
411
+ "exit",
412
+ "succeed",
413
+ "fail",
414
+ "win",
415
+ "lose",
416
+ "fight",
417
+ "defend",
418
+ "attack",
419
+ "protect",
420
+ "save",
421
+ "waste",
422
+ "gather",
423
+ "scatter",
424
+ "collect",
425
+ "distribute",
426
+ "join",
427
+ "separate",
428
+ "unite",
429
+ "divide",
430
+ "share",
431
+ ]
432
+
433
+ return adjectives, nouns, verbs
@@ -122,8 +122,16 @@ def transcript_reasoning(reasoning: str) -> list[RenderableType]:
122
122
  return content
123
123
 
124
124
 
125
- def transcript_separator(title: str, color: str) -> RenderableType:
126
- return Rule(title=title, style=f"{color} bold", align="center", end="\n\n")
125
+ def transcript_separator(
126
+ title: str, color: str, characters: str = ""
127
+ ) -> RenderableType:
128
+ return Rule(
129
+ title=title,
130
+ characters=characters,
131
+ style=f"{color} bold",
132
+ align="center",
133
+ end="\n\n",
134
+ )
127
135
 
128
136
 
129
137
  def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableType:
@@ -0,0 +1,46 @@
1
+ import time
2
+ from contextvars import ContextVar
3
+
4
+
5
+ def init_sample_working_limit(start_time: float, working_limit: float | None) -> None:
6
+ _sample_working_limit.set(working_limit)
7
+ _sample_start_time.set(start_time)
8
+ _sample_waiting_time.set(0)
9
+
10
+
11
+ def sample_waiting_time() -> float:
12
+ return _sample_waiting_time.get()
13
+
14
+
15
+ def report_sample_waiting_time(waiting_time: float) -> None:
16
+ _sample_waiting_time.set(_sample_waiting_time.get() + waiting_time)
17
+ check_sample_working_limit()
18
+
19
+
20
+ def check_sample_working_limit() -> None:
21
+ # no check if we don't have a limit
22
+ working_limit = _sample_working_limit.get()
23
+ if working_limit is None:
24
+ return
25
+
26
+ # are we over the limit?
27
+ running_time = time.monotonic() - _sample_start_time.get()
28
+ working_time = running_time - sample_waiting_time()
29
+ if working_time > working_limit:
30
+ from inspect_ai.solver._limit import SampleLimitExceededError
31
+
32
+ raise SampleLimitExceededError(
33
+ type="working",
34
+ value=int(working_time),
35
+ limit=int(working_limit),
36
+ message=f"Exceeded working time limit ({working_limit:,} seconds)",
37
+ )
38
+
39
+
40
+ _sample_working_limit: ContextVar[float | None] = ContextVar(
41
+ "sample_working_limit", default=None
42
+ )
43
+
44
+ _sample_start_time: ContextVar[float] = ContextVar("sample_start_time", default=0)
45
+
46
+ _sample_waiting_time: ContextVar[float] = ContextVar("sample_waiting_time", default=0)