inspect-ai 0.3.68__py3-none-any.whl → 0.3.70__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +13 -1
- inspect_ai/_display/plain/display.py +9 -11
- inspect_ai/_display/textual/app.py +5 -5
- inspect_ai/_display/textual/widgets/samples.py +47 -18
- inspect_ai/_display/textual/widgets/transcript.py +25 -12
- inspect_ai/_eval/eval.py +14 -2
- inspect_ai/_eval/evalset.py +6 -1
- inspect_ai/_eval/run.py +6 -0
- inspect_ai/_eval/task/run.py +44 -15
- inspect_ai/_eval/task/task.py +26 -3
- inspect_ai/_util/interrupt.py +15 -0
- inspect_ai/_util/logger.py +23 -0
- inspect_ai/_util/rich.py +7 -8
- inspect_ai/_util/text.py +301 -1
- inspect_ai/_util/transcript.py +10 -2
- inspect_ai/_util/working.py +46 -0
- inspect_ai/_view/www/dist/assets/index.css +56 -12
- inspect_ai/_view/www/dist/assets/index.js +905 -751
- inspect_ai/_view/www/log-schema.json +337 -2
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/flatted/python/test.py +63 -0
- inspect_ai/_view/www/src/appearance/icons.ts +3 -1
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +0 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +28 -1
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +23 -2
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +4 -0
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +152 -0
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +9 -2
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +19 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
- inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
- inspect_ai/_view/www/src/types/log.d.ts +188 -108
- inspect_ai/_view/www/src/utils/format.ts +7 -4
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -6
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_condense.py +1 -0
- inspect_ai/log/_log.py +72 -12
- inspect_ai/log/_samples.py +5 -5
- inspect_ai/log/_transcript.py +31 -1
- inspect_ai/model/_call_tools.py +1 -1
- inspect_ai/model/_conversation.py +1 -1
- inspect_ai/model/_model.py +35 -16
- inspect_ai/model/_model_call.py +10 -3
- inspect_ai/model/_providers/anthropic.py +13 -2
- inspect_ai/model/_providers/bedrock.py +7 -0
- inspect_ai/model/_providers/cloudflare.py +20 -7
- inspect_ai/model/_providers/google.py +358 -302
- inspect_ai/model/_providers/groq.py +57 -23
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +81 -52
- inspect_ai/model/_providers/openai.py +9 -0
- inspect_ai/model/_providers/providers.py +6 -6
- inspect_ai/model/_providers/util/tracker.py +92 -0
- inspect_ai/model/_providers/vllm.py +13 -5
- inspect_ai/solver/_basic_agent.py +1 -3
- inspect_ai/solver/_bridge/patch.py +0 -2
- inspect_ai/solver/_limit.py +4 -4
- inspect_ai/solver/_plan.py +3 -3
- inspect_ai/solver/_solver.py +3 -0
- inspect_ai/solver/_task_state.py +10 -1
- inspect_ai/tool/_tools/_web_search.py +3 -3
- inspect_ai/util/_concurrency.py +14 -8
- inspect_ai/util/_sandbox/context.py +15 -0
- inspect_ai/util/_sandbox/docker/cleanup.py +8 -3
- inspect_ai/util/_sandbox/docker/compose.py +5 -9
- inspect_ai/util/_sandbox/docker/docker.py +20 -6
- inspect_ai/util/_sandbox/docker/util.py +10 -1
- inspect_ai/util/_sandbox/environment.py +32 -1
- inspect_ai/util/_sandbox/events.py +149 -0
- inspect_ai/util/_sandbox/local.py +3 -3
- inspect_ai/util/_sandbox/self_check.py +2 -1
- inspect_ai/util/_subprocess.py +4 -1
- {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/METADATA +5 -5
- {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/RECORD +82 -74
- {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.68.dist-info → inspect_ai-0.3.70.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/task/task.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from copy import deepcopy
|
2
2
|
from dataclasses import dataclass
|
3
3
|
from logging import getLogger
|
4
|
-
from typing import Any, Callable, Sequence, cast
|
4
|
+
from typing import Any, Awaitable, Callable, Sequence, cast
|
5
5
|
|
6
6
|
from pydantic import BaseModel
|
7
7
|
from typing_extensions import TypedDict, Unpack
|
@@ -17,6 +17,7 @@ from inspect_ai.scorer import Metric, Scorer
|
|
17
17
|
from inspect_ai.scorer._reducer import ScoreReducers, create_reducers
|
18
18
|
from inspect_ai.solver import Plan, Solver, generate
|
19
19
|
from inspect_ai.solver._chain import chain
|
20
|
+
from inspect_ai.solver._task_state import TaskState
|
20
21
|
from inspect_ai.util._sandbox.environment import (
|
21
22
|
SandboxEnvironmentSpec,
|
22
23
|
SandboxEnvironmentType,
|
@@ -46,6 +47,7 @@ class Task:
|
|
46
47
|
dataset: Dataset | Sequence[Sample] | None = None,
|
47
48
|
setup: Solver | list[Solver] | None = None,
|
48
49
|
solver: Solver | list[Solver] = generate(),
|
50
|
+
cleanup: Callable[[TaskState], Awaitable[None]] | None = None,
|
49
51
|
scorer: Scorer | list[Scorer] | None = None,
|
50
52
|
metrics: list[Metric] | dict[str, list[Metric]] | None = None,
|
51
53
|
config: GenerateConfig = GenerateConfig(),
|
@@ -56,6 +58,7 @@ class Task:
|
|
56
58
|
message_limit: int | None = None,
|
57
59
|
token_limit: int | None = None,
|
58
60
|
time_limit: int | None = None,
|
61
|
+
working_limit: int | None = None,
|
59
62
|
name: str | None = None,
|
60
63
|
version: int = 0,
|
61
64
|
metadata: dict[str, Any] | None = None,
|
@@ -69,6 +72,9 @@ class Task:
|
|
69
72
|
even when the main `solver` is replaced).
|
70
73
|
solver: (Solver | list[Solver]): Solver or list of solvers.
|
71
74
|
Defaults to generate(), a normal call to the model.
|
75
|
+
cleanup: Optional cleanup function for task. Called after
|
76
|
+
all solvers have run for each sample (including if an
|
77
|
+
exception occurs during the run)
|
72
78
|
scorer: (Scorer | list[Scorer] | None): Scorer used to evaluate model output.
|
73
79
|
metrics (list[Metric] | dict[str, list[Metric]] | None):
|
74
80
|
Alternative metrics (overrides the metrics provided by the specified scorer).
|
@@ -86,7 +92,10 @@ class Task:
|
|
86
92
|
eval if a count of samples fails.
|
87
93
|
message_limit (int | None): Limit on total messages used for each sample.
|
88
94
|
token_limit (int | None): Limit on total tokens used for each sample.
|
89
|
-
time_limit
|
95
|
+
time_limit: Limit on clock time (in seconds) for samples.
|
96
|
+
working_limit: Limit on working time (in seconds) for sample. Working
|
97
|
+
time includes model generation, tool calls, etc. but does not include
|
98
|
+
time spent waiting on retries or shared resources.
|
90
99
|
name: (str | None): Task name. If not specified is automatically
|
91
100
|
determined based on the name of the task directory (or "task")
|
92
101
|
if its anonymous task (e.g. created in a notebook and passed to
|
@@ -123,6 +132,7 @@ class Task:
|
|
123
132
|
self.dataset = resolve_dataset(dataset)
|
124
133
|
self.setup = setup
|
125
134
|
self.solver = resolve_solver(solver)
|
135
|
+
self.cleanup = cleanup
|
126
136
|
self.scorer = resolve_scorer(scorer)
|
127
137
|
self.metrics = metrics
|
128
138
|
self.config = config
|
@@ -135,6 +145,7 @@ class Task:
|
|
135
145
|
self.message_limit = message_limit
|
136
146
|
self.token_limit = token_limit
|
137
147
|
self.time_limit = time_limit
|
148
|
+
self.working_limit = working_limit
|
138
149
|
self.version = version
|
139
150
|
self._name = name
|
140
151
|
self.metadata = metadata
|
@@ -162,6 +173,7 @@ def task_with(
|
|
162
173
|
dataset: Dataset | Sequence[Sample] | None | NotGiven = NOT_GIVEN,
|
163
174
|
setup: Solver | list[Solver] | None | NotGiven = NOT_GIVEN,
|
164
175
|
solver: Solver | list[Solver] | NotGiven = NOT_GIVEN,
|
176
|
+
cleanup: Callable[[TaskState], Awaitable[None]] | None | NotGiven = NOT_GIVEN,
|
165
177
|
scorer: Scorer | list[Scorer] | None | NotGiven = NOT_GIVEN,
|
166
178
|
metrics: list[Metric] | dict[str, list[Metric]] | None | NotGiven = NOT_GIVEN,
|
167
179
|
config: GenerateConfig | NotGiven = NOT_GIVEN,
|
@@ -172,6 +184,7 @@ def task_with(
|
|
172
184
|
message_limit: int | None | NotGiven = NOT_GIVEN,
|
173
185
|
token_limit: int | None | NotGiven = NOT_GIVEN,
|
174
186
|
time_limit: int | None | NotGiven = NOT_GIVEN,
|
187
|
+
working_limit: int | None | NotGiven = NOT_GIVEN,
|
175
188
|
name: str | None | NotGiven = NOT_GIVEN,
|
176
189
|
version: int | NotGiven = NOT_GIVEN,
|
177
190
|
metadata: dict[str, Any] | None | NotGiven = NOT_GIVEN,
|
@@ -185,6 +198,9 @@ def task_with(
|
|
185
198
|
even when the main `solver` is replaced).
|
186
199
|
solver: (Solver | list[Solver]): Solver or list of solvers.
|
187
200
|
Defaults to generate(), a normal call to the model.
|
201
|
+
cleanup: Optional cleanup function for task. Called after
|
202
|
+
all solvers have run for each sample (including if an
|
203
|
+
exception occurs during the run)
|
188
204
|
scorer: (Scorer | list[Scorer] | None): Scorer used to evaluate model output.
|
189
205
|
metrics (list[Metric] | dict[str, list[Metric]] | None):
|
190
206
|
Alternative metrics (overrides the metrics provided by the specified scorer).
|
@@ -202,7 +218,10 @@ def task_with(
|
|
202
218
|
eval if a count of samples fails.
|
203
219
|
message_limit (int | None): Limit on total messages used for each sample.
|
204
220
|
token_limit (int | None): Limit on total tokens used for each sample.
|
205
|
-
time_limit
|
221
|
+
time_limit: Limit on clock time (in seconds) for samples.
|
222
|
+
working_limit: Limit on execution time (in seconds) for sample. Execution
|
223
|
+
time includes model generation, tool calls, etc. but does not include
|
224
|
+
time spent waiting on retries or shared resources.
|
206
225
|
name: (str | None): Task name. If not specified is automatically
|
207
226
|
determined based on the name of the task directory (or "task")
|
208
227
|
if its anonymous task (e.g. created in a notebook and passed to
|
@@ -223,6 +242,8 @@ def task_with(
|
|
223
242
|
task.setup = setup
|
224
243
|
if not isinstance(solver, NotGiven):
|
225
244
|
task.solver = resolve_solver(solver)
|
245
|
+
if not isinstance(cleanup, NotGiven):
|
246
|
+
task.cleanup = cleanup
|
226
247
|
if not isinstance(scorer, NotGiven):
|
227
248
|
task.scorer = resolve_scorer(scorer)
|
228
249
|
if not isinstance(metrics, NotGiven):
|
@@ -245,6 +266,8 @@ def task_with(
|
|
245
266
|
task.token_limit = token_limit
|
246
267
|
if not isinstance(time_limit, NotGiven):
|
247
268
|
task.time_limit = time_limit
|
269
|
+
if not isinstance(working_limit, NotGiven):
|
270
|
+
task.working_limit = working_limit
|
248
271
|
if not isinstance(version, NotGiven):
|
249
272
|
task.version = version
|
250
273
|
if not isinstance(name, NotGiven):
|
@@ -0,0 +1,15 @@
|
|
1
|
+
import asyncio
|
2
|
+
|
3
|
+
from .working import check_sample_working_limit
|
4
|
+
|
5
|
+
|
6
|
+
def check_sample_interrupt() -> None:
|
7
|
+
from inspect_ai.log._samples import sample_active
|
8
|
+
|
9
|
+
# check for user interrupt
|
10
|
+
sample = sample_active()
|
11
|
+
if sample and sample.interrupt_action:
|
12
|
+
raise asyncio.CancelledError()
|
13
|
+
|
14
|
+
# check for working_limit
|
15
|
+
check_sample_working_limit()
|
inspect_ai/_util/logger.py
CHANGED
@@ -90,6 +90,10 @@ class LogHandler(RichHandler):
|
|
90
90
|
if "Event loop is closed" in record.getMessage():
|
91
91
|
return
|
92
92
|
|
93
|
+
# skip google-genai AFC message
|
94
|
+
if "AFC is enabled with max remote calls" in record.getMessage():
|
95
|
+
return
|
96
|
+
|
93
97
|
# write to stderr if we are at or above the threshold
|
94
98
|
if record.levelno >= self.display_level:
|
95
99
|
super().emit(record)
|
@@ -156,7 +160,9 @@ def init_logger(
|
|
156
160
|
|
157
161
|
# init logging handler on demand
|
158
162
|
global _logHandler
|
163
|
+
removed_root_handlers = False
|
159
164
|
if not _logHandler:
|
165
|
+
removed_root_handlers = remove_non_pytest_root_logger_handlers()
|
160
166
|
_logHandler = LogHandler(min(DEBUG, levelno), transcript_levelno)
|
161
167
|
getLogger().addHandler(_logHandler)
|
162
168
|
|
@@ -169,6 +175,11 @@ def init_logger(
|
|
169
175
|
getLogger("httpx").setLevel(capture_level)
|
170
176
|
getLogger("botocore").setLevel(DEBUG)
|
171
177
|
|
178
|
+
if removed_root_handlers:
|
179
|
+
getLogger(PKG_NAME).warning(
|
180
|
+
"Inspect removed pre-existing root logger handlers and replaced them with its own handler."
|
181
|
+
)
|
182
|
+
|
172
183
|
# set the levelno on the global handler
|
173
184
|
_logHandler.display_level = levelno
|
174
185
|
|
@@ -176,6 +187,18 @@ def init_logger(
|
|
176
187
|
_logHandler: LogHandler | None = None
|
177
188
|
|
178
189
|
|
190
|
+
def remove_non_pytest_root_logger_handlers() -> bool:
|
191
|
+
root_logger = getLogger()
|
192
|
+
non_pytest_handlers = [
|
193
|
+
handler
|
194
|
+
for handler in root_logger.handlers
|
195
|
+
if handler.__module__ != "_pytest.logging"
|
196
|
+
]
|
197
|
+
for handler in non_pytest_handlers:
|
198
|
+
root_logger.removeHandler(handler)
|
199
|
+
return len(non_pytest_handlers) > 0
|
200
|
+
|
201
|
+
|
179
202
|
def notify_logger_record(record: LogRecord, write: bool) -> None:
|
180
203
|
from inspect_ai.log._message import LoggingMessage
|
181
204
|
from inspect_ai.log._transcript import LoggerEvent, transcript
|
inspect_ai/_util/rich.py
CHANGED
@@ -2,23 +2,22 @@ from rich.console import RenderableType
|
|
2
2
|
from rich.style import Style
|
3
3
|
from rich.text import Text
|
4
4
|
|
5
|
+
from inspect_ai._util.text import truncate_lines
|
6
|
+
|
5
7
|
|
6
8
|
def lines_display(
|
7
9
|
text: str, max_lines: int = 100, style: str | Style = ""
|
8
10
|
) -> list[RenderableType]:
|
9
|
-
lines = text
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
]
|
11
|
+
lines, truncated = truncate_lines(text, max_lines)
|
12
|
+
|
13
|
+
content: list[RenderableType] = [Text(lines, style=style)]
|
14
|
+
if truncated is not None:
|
14
15
|
content.append(Text())
|
15
16
|
content.append(
|
16
17
|
Text.from_markup(
|
17
|
-
f"[italic]Output truncated ({
|
18
|
+
f"[italic]Output truncated ({truncated} additional lines)...[/italic]",
|
18
19
|
style=style,
|
19
20
|
)
|
20
21
|
)
|
21
|
-
else:
|
22
|
-
content = [Text(text, style=style)]
|
23
22
|
|
24
23
|
return content
|
inspect_ai/_util/text.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
+
import random
|
1
2
|
import re
|
2
3
|
import string
|
3
4
|
from logging import getLogger
|
4
|
-
from typing import NamedTuple
|
5
|
+
from typing import List, NamedTuple
|
5
6
|
|
6
7
|
logger = getLogger(__name__)
|
7
8
|
|
@@ -131,3 +132,302 @@ def truncate(text: str, length: int, overflow: str = "...", pad: bool = True) ->
|
|
131
132
|
truncated = text[: length - overflow_length] + overflow
|
132
133
|
|
133
134
|
return truncated
|
135
|
+
|
136
|
+
|
137
|
+
def truncate_lines(
|
138
|
+
text: str, max_lines: int = 100, max_characters: int | None = 100 * 100
|
139
|
+
) -> tuple[str, int | None]:
|
140
|
+
if max_characters is not None:
|
141
|
+
text = truncate(text, max_characters)
|
142
|
+
lines = text.splitlines()
|
143
|
+
if len(lines) > max_lines:
|
144
|
+
output = "\n".join(lines[0:max_lines])
|
145
|
+
return output, len(lines) - max_lines
|
146
|
+
else:
|
147
|
+
return text, None
|
148
|
+
|
149
|
+
|
150
|
+
def generate_large_text(target_tokens: int) -> str:
|
151
|
+
"""Generate a large amount of text with approximately the target number of tokens"""
|
152
|
+
generated_text = []
|
153
|
+
estimated_tokens = 0
|
154
|
+
|
155
|
+
while estimated_tokens < target_tokens:
|
156
|
+
sentence = generate_sentence()
|
157
|
+
|
158
|
+
# Add paragraph breaks occasionally
|
159
|
+
if random.random() < 0.1:
|
160
|
+
sentence += "\n\n"
|
161
|
+
|
162
|
+
generated_text.append(sentence)
|
163
|
+
|
164
|
+
# Rough estimate of tokens (words + punctuation)
|
165
|
+
estimated_tokens += len(sentence.split()) + 2
|
166
|
+
|
167
|
+
return " ".join(generated_text)
|
168
|
+
|
169
|
+
|
170
|
+
def generate_sentence() -> str:
|
171
|
+
"""Generate a random sentence using predefined templates"""
|
172
|
+
adjectives, nouns, verbs = create_word_lists()
|
173
|
+
|
174
|
+
templates = [
|
175
|
+
f"The {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)} the {random.choice(adjectives)} {random.choice(nouns)}.",
|
176
|
+
f"A {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)} near the {random.choice(nouns)}.",
|
177
|
+
f"In the {random.choice(adjectives)} {random.choice(nouns)}, the {random.choice(nouns)} {random.choice(verbs)} {random.choice(adjectives)}.",
|
178
|
+
f"When the {random.choice(nouns)} {random.choice(verbs)}, a {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)}.",
|
179
|
+
f"The {random.choice(nouns)} {random.choice(verbs)} while the {random.choice(adjectives)} {random.choice(nouns)} {random.choice(verbs)}.",
|
180
|
+
]
|
181
|
+
|
182
|
+
return random.choice(templates)
|
183
|
+
|
184
|
+
|
185
|
+
def create_word_lists() -> tuple[List[str], List[str], List[str]]:
|
186
|
+
"""Create basic word lists for sentence generation"""
|
187
|
+
# Common adjectives
|
188
|
+
adjectives = [
|
189
|
+
"red",
|
190
|
+
"blue",
|
191
|
+
"green",
|
192
|
+
"dark",
|
193
|
+
"bright",
|
194
|
+
"quiet",
|
195
|
+
"loud",
|
196
|
+
"small",
|
197
|
+
"large",
|
198
|
+
"quick",
|
199
|
+
"slow",
|
200
|
+
"happy",
|
201
|
+
"sad",
|
202
|
+
"clever",
|
203
|
+
"wise",
|
204
|
+
"ancient",
|
205
|
+
"modern",
|
206
|
+
"complex",
|
207
|
+
"simple",
|
208
|
+
"elegant",
|
209
|
+
"rough",
|
210
|
+
"smooth",
|
211
|
+
"sharp",
|
212
|
+
"dull",
|
213
|
+
"fresh",
|
214
|
+
"stale",
|
215
|
+
"clean",
|
216
|
+
"dirty",
|
217
|
+
"heavy",
|
218
|
+
"light",
|
219
|
+
"hot",
|
220
|
+
"cold",
|
221
|
+
"dry",
|
222
|
+
"wet",
|
223
|
+
"rich",
|
224
|
+
"poor",
|
225
|
+
"thick",
|
226
|
+
"thin",
|
227
|
+
"strong",
|
228
|
+
"weak",
|
229
|
+
"early",
|
230
|
+
"late",
|
231
|
+
"young",
|
232
|
+
"old",
|
233
|
+
"good",
|
234
|
+
"bad",
|
235
|
+
"high",
|
236
|
+
"low",
|
237
|
+
"long",
|
238
|
+
"short",
|
239
|
+
"deep",
|
240
|
+
"shallow",
|
241
|
+
"hard",
|
242
|
+
"soft",
|
243
|
+
"near",
|
244
|
+
"far",
|
245
|
+
"wide",
|
246
|
+
"narrow",
|
247
|
+
"big",
|
248
|
+
"little",
|
249
|
+
"fast",
|
250
|
+
"slow",
|
251
|
+
"busy",
|
252
|
+
"lazy",
|
253
|
+
"new",
|
254
|
+
"old",
|
255
|
+
"full",
|
256
|
+
"empty",
|
257
|
+
"loud",
|
258
|
+
"quiet",
|
259
|
+
"sweet",
|
260
|
+
"sour",
|
261
|
+
"brave",
|
262
|
+
"scared",
|
263
|
+
]
|
264
|
+
|
265
|
+
# Common nouns
|
266
|
+
nouns = [
|
267
|
+
"time",
|
268
|
+
"person",
|
269
|
+
"year",
|
270
|
+
"way",
|
271
|
+
"day",
|
272
|
+
"thing",
|
273
|
+
"man",
|
274
|
+
"world",
|
275
|
+
"life",
|
276
|
+
"hand",
|
277
|
+
"part",
|
278
|
+
"child",
|
279
|
+
"eye",
|
280
|
+
"woman",
|
281
|
+
"place",
|
282
|
+
"work",
|
283
|
+
"week",
|
284
|
+
"case",
|
285
|
+
"point",
|
286
|
+
"group",
|
287
|
+
"number",
|
288
|
+
"room",
|
289
|
+
"fact",
|
290
|
+
"idea",
|
291
|
+
"water",
|
292
|
+
"money",
|
293
|
+
"month",
|
294
|
+
"book",
|
295
|
+
"line",
|
296
|
+
"city",
|
297
|
+
"business",
|
298
|
+
"night",
|
299
|
+
"question",
|
300
|
+
"story",
|
301
|
+
"job",
|
302
|
+
"word",
|
303
|
+
"house",
|
304
|
+
"power",
|
305
|
+
"game",
|
306
|
+
"country",
|
307
|
+
"plant",
|
308
|
+
"animal",
|
309
|
+
"tree",
|
310
|
+
"stone",
|
311
|
+
"river",
|
312
|
+
"fire",
|
313
|
+
"problem",
|
314
|
+
"theory",
|
315
|
+
"street",
|
316
|
+
"family",
|
317
|
+
"history",
|
318
|
+
"mind",
|
319
|
+
"car",
|
320
|
+
"music",
|
321
|
+
"art",
|
322
|
+
"nation",
|
323
|
+
"science",
|
324
|
+
"nature",
|
325
|
+
"truth",
|
326
|
+
"peace",
|
327
|
+
"voice",
|
328
|
+
"class",
|
329
|
+
"paper",
|
330
|
+
"space",
|
331
|
+
"ground",
|
332
|
+
"market",
|
333
|
+
"court",
|
334
|
+
"force",
|
335
|
+
"price",
|
336
|
+
"action",
|
337
|
+
"reason",
|
338
|
+
"love",
|
339
|
+
"law",
|
340
|
+
"bird",
|
341
|
+
"literature",
|
342
|
+
"knowledge",
|
343
|
+
"society",
|
344
|
+
"valley",
|
345
|
+
"ocean",
|
346
|
+
"machine",
|
347
|
+
"energy",
|
348
|
+
"metal",
|
349
|
+
"mountain",
|
350
|
+
]
|
351
|
+
|
352
|
+
# Common verbs (present tense)
|
353
|
+
verbs = [
|
354
|
+
"run",
|
355
|
+
"walk",
|
356
|
+
"jump",
|
357
|
+
"sing",
|
358
|
+
"dance",
|
359
|
+
"write",
|
360
|
+
"read",
|
361
|
+
"speak",
|
362
|
+
"listen",
|
363
|
+
"watch",
|
364
|
+
"think",
|
365
|
+
"grow",
|
366
|
+
"live",
|
367
|
+
"play",
|
368
|
+
"work",
|
369
|
+
"move",
|
370
|
+
"stop",
|
371
|
+
"start",
|
372
|
+
"create",
|
373
|
+
"destroy",
|
374
|
+
"build",
|
375
|
+
"break",
|
376
|
+
"push",
|
377
|
+
"pull",
|
378
|
+
"open",
|
379
|
+
"close",
|
380
|
+
"rise",
|
381
|
+
"fall",
|
382
|
+
"increase",
|
383
|
+
"decrease",
|
384
|
+
"begin",
|
385
|
+
"end",
|
386
|
+
"love",
|
387
|
+
"hate",
|
388
|
+
"help",
|
389
|
+
"hurt",
|
390
|
+
"make",
|
391
|
+
"take",
|
392
|
+
"give",
|
393
|
+
"receive",
|
394
|
+
"buy",
|
395
|
+
"sell",
|
396
|
+
"eat",
|
397
|
+
"drink",
|
398
|
+
"sleep",
|
399
|
+
"wake",
|
400
|
+
"laugh",
|
401
|
+
"cry",
|
402
|
+
"learn",
|
403
|
+
"teach",
|
404
|
+
"change",
|
405
|
+
"stay",
|
406
|
+
"come",
|
407
|
+
"go",
|
408
|
+
"arrive",
|
409
|
+
"leave",
|
410
|
+
"enter",
|
411
|
+
"exit",
|
412
|
+
"succeed",
|
413
|
+
"fail",
|
414
|
+
"win",
|
415
|
+
"lose",
|
416
|
+
"fight",
|
417
|
+
"defend",
|
418
|
+
"attack",
|
419
|
+
"protect",
|
420
|
+
"save",
|
421
|
+
"waste",
|
422
|
+
"gather",
|
423
|
+
"scatter",
|
424
|
+
"collect",
|
425
|
+
"distribute",
|
426
|
+
"join",
|
427
|
+
"separate",
|
428
|
+
"unite",
|
429
|
+
"divide",
|
430
|
+
"share",
|
431
|
+
]
|
432
|
+
|
433
|
+
return adjectives, nouns, verbs
|
inspect_ai/_util/transcript.py
CHANGED
@@ -122,8 +122,16 @@ def transcript_reasoning(reasoning: str) -> list[RenderableType]:
|
|
122
122
|
return content
|
123
123
|
|
124
124
|
|
125
|
-
def transcript_separator(
|
126
|
-
|
125
|
+
def transcript_separator(
|
126
|
+
title: str, color: str, characters: str = "─"
|
127
|
+
) -> RenderableType:
|
128
|
+
return Rule(
|
129
|
+
title=title,
|
130
|
+
characters=characters,
|
131
|
+
style=f"{color} bold",
|
132
|
+
align="center",
|
133
|
+
end="\n\n",
|
134
|
+
)
|
127
135
|
|
128
136
|
|
129
137
|
def transcript_function(function: str, arguments: dict[str, Any]) -> RenderableType:
|
@@ -0,0 +1,46 @@
|
|
1
|
+
import time
|
2
|
+
from contextvars import ContextVar
|
3
|
+
|
4
|
+
|
5
|
+
def init_sample_working_limit(start_time: float, working_limit: float | None) -> None:
|
6
|
+
_sample_working_limit.set(working_limit)
|
7
|
+
_sample_start_time.set(start_time)
|
8
|
+
_sample_waiting_time.set(0)
|
9
|
+
|
10
|
+
|
11
|
+
def sample_waiting_time() -> float:
|
12
|
+
return _sample_waiting_time.get()
|
13
|
+
|
14
|
+
|
15
|
+
def report_sample_waiting_time(waiting_time: float) -> None:
|
16
|
+
_sample_waiting_time.set(_sample_waiting_time.get() + waiting_time)
|
17
|
+
check_sample_working_limit()
|
18
|
+
|
19
|
+
|
20
|
+
def check_sample_working_limit() -> None:
|
21
|
+
# no check if we don't have a limit
|
22
|
+
working_limit = _sample_working_limit.get()
|
23
|
+
if working_limit is None:
|
24
|
+
return
|
25
|
+
|
26
|
+
# are we over the limit?
|
27
|
+
running_time = time.monotonic() - _sample_start_time.get()
|
28
|
+
working_time = running_time - sample_waiting_time()
|
29
|
+
if working_time > working_limit:
|
30
|
+
from inspect_ai.solver._limit import SampleLimitExceededError
|
31
|
+
|
32
|
+
raise SampleLimitExceededError(
|
33
|
+
type="working",
|
34
|
+
value=int(working_time),
|
35
|
+
limit=int(working_limit),
|
36
|
+
message=f"Exceeded working time limit ({working_limit:,} seconds)",
|
37
|
+
)
|
38
|
+
|
39
|
+
|
40
|
+
_sample_working_limit: ContextVar[float | None] = ContextVar(
|
41
|
+
"sample_working_limit", default=None
|
42
|
+
)
|
43
|
+
|
44
|
+
_sample_start_time: ContextVar[float] = ContextVar("sample_start_time", default=0)
|
45
|
+
|
46
|
+
_sample_waiting_time: ContextVar[float] = ContextVar("sample_waiting_time", default=0)
|