inspect-ai 0.3.65__py3-none-any.whl → 0.3.67__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_display/core/config.py +4 -0
- inspect_ai/_display/textual/app.py +13 -5
- inspect_ai/_display/textual/widgets/footer.py +2 -2
- inspect_ai/_display/textual/widgets/sandbox.py +1 -1
- inspect_ai/_display/textual/widgets/task_detail.py +7 -5
- inspect_ai/_display/textual/widgets/tasks.py +8 -6
- inspect_ai/_display/textual/widgets/transcript.py +1 -1
- inspect_ai/_eval/task/run.py +5 -3
- inspect_ai/_eval/task/task.py +9 -1
- inspect_ai/_util/format.py +58 -0
- inspect_ai/_view/www/dist/assets/index.css +29 -9
- inspect_ai/_view/www/dist/assets/index.js +368 -304
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +1 -1
- inspect_ai/_view/www/src/samples/sample-tools/filters.ts +41 -20
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -1
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +28 -6
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +5 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +1 -3
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +31 -16
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +4 -1
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +1 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +2 -2
- inspect_ai/model/_model.py +89 -2
- inspect_ai/model/_providers/anthropic.py +4 -0
- inspect_ai/model/_providers/azureai.py +5 -0
- inspect_ai/model/_providers/bedrock.py +5 -0
- inspect_ai/model/_providers/cloudflare.py +4 -0
- inspect_ai/model/_providers/goodfire.py +5 -0
- inspect_ai/model/_providers/google.py +16 -3
- inspect_ai/model/_providers/groq.py +4 -0
- inspect_ai/model/_providers/hf.py +7 -0
- inspect_ai/model/_providers/mistral.py +4 -0
- inspect_ai/model/_providers/openai.py +4 -0
- inspect_ai/model/_providers/vertex.py +5 -0
- inspect_ai/model/_providers/vllm.py +7 -0
- inspect_ai/solver/__init__.py +8 -1
- inspect_ai/solver/_human_agent/panel.py +11 -5
- inspect_ai/solver/_prompt.py +38 -5
- inspect_ai/util/_sandbox/docker/config.py +4 -1
- inspect_ai/util/_sandbox/docker/util.py +2 -1
- {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/METADATA +3 -2
- {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/RECORD +46 -46
- {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/top_level.txt +0 -0
@@ -35,6 +35,10 @@ def task_config(
|
|
35
35
|
value = [str(v) for v in value]
|
36
36
|
config_print.append(f"{name}: {','.join(value)}")
|
37
37
|
elif name not in ["limit", "model"]:
|
38
|
+
if isinstance(value, list):
|
39
|
+
value = ",".join([str(v) for v in value])
|
40
|
+
if isinstance(value, str):
|
41
|
+
value = value.replace("[", "\\[")
|
38
42
|
config_print.append(f"{name}: {value}")
|
39
43
|
values = ", ".join(config_print)
|
40
44
|
if values:
|
@@ -1,13 +1,21 @@
|
|
1
1
|
import asyncio
|
2
2
|
import contextlib
|
3
3
|
from asyncio import CancelledError
|
4
|
-
from typing import
|
4
|
+
from typing import (
|
5
|
+
Any,
|
6
|
+
AsyncIterator,
|
7
|
+
ClassVar,
|
8
|
+
Coroutine,
|
9
|
+
Generic,
|
10
|
+
Iterator,
|
11
|
+
cast,
|
12
|
+
)
|
5
13
|
|
6
14
|
import rich
|
7
15
|
from rich.console import Console
|
16
|
+
from rich.text import Text
|
8
17
|
from textual.app import App, ComposeResult
|
9
18
|
from textual.binding import Binding, BindingType
|
10
|
-
from textual.content import Content
|
11
19
|
from textual.css.query import NoMatches
|
12
20
|
from textual.events import Print
|
13
21
|
from textual.widget import Widget
|
@@ -308,9 +316,9 @@ class TaskScreenApp(App[TR]):
|
|
308
316
|
|
309
317
|
def set_unread(unread: int | None) -> None:
|
310
318
|
if unread is not None:
|
311
|
-
console_tab.label =
|
319
|
+
console_tab.label = Text(f"Console ({unread}")
|
312
320
|
else:
|
313
|
-
console_tab.label =
|
321
|
+
console_tab.label = Text("Console")
|
314
322
|
|
315
323
|
self.watch(console_view, "unread", set_unread)
|
316
324
|
|
@@ -377,7 +385,7 @@ class TaskScreenApp(App[TR]):
|
|
377
385
|
def set_title(self, title: str) -> None:
|
378
386
|
tabs = self.app.query_one(TabbedContent)
|
379
387
|
tab = tabs.get_tab(self.tab_id)
|
380
|
-
tab.label =
|
388
|
+
tab.label = Text(title)
|
381
389
|
|
382
390
|
def activate(self) -> None:
|
383
391
|
# show the tab
|
@@ -26,8 +26,8 @@ class AppFooter(Widget):
|
|
26
26
|
right: reactive[RenderableType] = reactive("")
|
27
27
|
|
28
28
|
def compose(self) -> ComposeResult:
|
29
|
-
yield Static(id="footer-left")
|
30
|
-
yield Static(id="footer-right")
|
29
|
+
yield Static(id="footer-left", markup=False)
|
30
|
+
yield Static(id="footer-right", markup=False)
|
31
31
|
|
32
32
|
def watch_left(self, new_left: RenderableType) -> None:
|
33
33
|
footer_left = cast(Static, self.query_one("#footer-left"))
|
@@ -38,6 +38,6 @@ class SandboxView(Vertical):
|
|
38
38
|
with Horizontal():
|
39
39
|
yield Static("", classes="indent" if self.sandbox_name else "no_indent")
|
40
40
|
with Vertical():
|
41
|
-
yield Static(self.connection.command)
|
41
|
+
yield Static(self.connection.command, markup=False)
|
42
42
|
if self.connection.ports:
|
43
43
|
yield PortMappingsView(self.connection.ports)
|
@@ -233,20 +233,22 @@ class TaskMetrics(Widget):
|
|
233
233
|
for metric in self.metrics:
|
234
234
|
# Add the value static but keep it around
|
235
235
|
# for future updates
|
236
|
-
self.value_widgets[metric.name] = Static(
|
236
|
+
self.value_widgets[metric.name] = Static(
|
237
|
+
self._metric_value(metric.value), markup=False
|
238
|
+
)
|
237
239
|
|
238
|
-
grid.mount(Static(metric.name))
|
240
|
+
grid.mount(Static(metric.name, markup=False))
|
239
241
|
grid.mount(self.value_widgets[metric.name])
|
240
242
|
|
241
243
|
def _title(self) -> Widget:
|
242
244
|
if self.scorer is None:
|
243
245
|
return Static("")
|
244
246
|
elif self.reducer is None:
|
245
|
-
return Static(self.scorer)
|
247
|
+
return Static(self.scorer, markup=False)
|
246
248
|
else:
|
247
249
|
return Horizontal(
|
248
|
-
Static(self.scorer, classes="scorer"),
|
249
|
-
Static(f"({self.reducer})", classes="reducer"),
|
250
|
+
Static(self.scorer, classes="scorer", markup=False),
|
251
|
+
Static(f"({self.reducer})", classes="reducer", markup=False),
|
250
252
|
)
|
251
253
|
|
252
254
|
def _metric_value(self, val: float) -> str:
|
@@ -128,8 +128,8 @@ class TasksView(Container):
|
|
128
128
|
progress_view.update_count_width(self.sample_count_width)
|
129
129
|
|
130
130
|
def compose(self) -> ComposeResult:
|
131
|
-
yield Static(id="tasks-config")
|
132
|
-
yield Static(id="tasks-targets")
|
131
|
+
yield Static(id="tasks-config", markup=False)
|
132
|
+
yield Static(id="tasks-targets", markup=False)
|
133
133
|
yield ScrollableContainer(id="tasks-progress")
|
134
134
|
|
135
135
|
def watch_config(self, new_config: RenderableType) -> None:
|
@@ -191,8 +191,8 @@ class TaskProgressView(Widget):
|
|
191
191
|
self.model_name_width = model_name_width
|
192
192
|
|
193
193
|
self.progress_bar = ProgressBar(total=task.profile.steps, show_eta=False)
|
194
|
-
self.count_display = Static()
|
195
|
-
self.metrics_display = Static(id="task-metrics")
|
194
|
+
self.count_display = Static(markup=False)
|
195
|
+
self.metrics_display = Static(id="task-metrics", markup=False)
|
196
196
|
self.task_progress = TaskProgress(self.progress_bar)
|
197
197
|
|
198
198
|
self.toggle = Toggle()
|
@@ -211,10 +211,12 @@ class TaskProgressView(Widget):
|
|
211
211
|
yield (self.toggle if self.display_metrics else Static())
|
212
212
|
yield TaskStatusIcon()
|
213
213
|
yield Static(
|
214
|
-
progress_description(self.t.profile, self.description_width, pad=True)
|
214
|
+
progress_description(self.t.profile, self.description_width, pad=True),
|
215
|
+
markup=False,
|
215
216
|
)
|
216
217
|
yield Static(
|
217
|
-
progress_model_name(self.t.profile.model, self.model_name_width, pad=True)
|
218
|
+
progress_model_name(self.t.profile.model, self.model_name_width, pad=True),
|
219
|
+
markup=False,
|
218
220
|
)
|
219
221
|
yield self.progress_bar
|
220
222
|
yield self.count_display
|
@@ -117,7 +117,7 @@ class TranscriptView(ScrollableContainer):
|
|
117
117
|
)
|
118
118
|
if isinstance(d.content, Markdown):
|
119
119
|
set_transcript_markdown_options(d.content)
|
120
|
-
widgets.append(Static(d.content))
|
120
|
+
widgets.append(Static(d.content, markup=False))
|
121
121
|
widgets.append(Static(Text(" ")))
|
122
122
|
return widgets
|
123
123
|
|
inspect_ai/_eval/task/run.py
CHANGED
@@ -551,9 +551,11 @@ async def task_run_sample(
|
|
551
551
|
# helper to handle exceptions (will throw if we've exceeded the limit)
|
552
552
|
def handle_error(ex: BaseException) -> tuple[EvalError, BaseException | None]:
|
553
553
|
err = sample_error(ex)
|
554
|
-
|
555
|
-
|
556
|
-
|
554
|
+
# if we aren't raising the error then print a warning
|
555
|
+
if err[1] is None:
|
556
|
+
py_logger.warning(
|
557
|
+
f"Sample error (id: {sample.id}, epoch: {state.epoch}): {exception_message(ex)})"
|
558
|
+
)
|
557
559
|
transcript()._event(ErrorEvent(error=err[0]))
|
558
560
|
return err
|
559
561
|
|
inspect_ai/_eval/task/task.py
CHANGED
@@ -331,7 +331,15 @@ def resolve_epochs(epochs: int | Epochs | None) -> Epochs | None:
|
|
331
331
|
|
332
332
|
|
333
333
|
def resolve_dataset(dataset: Dataset | Sequence[Sample] | None) -> Dataset:
|
334
|
-
|
334
|
+
# this is a convenience for tests that don't want to define a dummy sample
|
335
|
+
if dataset is None:
|
336
|
+
dataset = [Sample(input="prompt")]
|
337
|
+
|
338
|
+
# raise error if the dataset is empty
|
339
|
+
if len(dataset) == 0:
|
340
|
+
raise ValueError("The specified dataset is empty (has no samples)")
|
341
|
+
|
342
|
+
# resolve sequence to dataset if necessary
|
335
343
|
return dataset if isinstance(dataset, Dataset) else MemoryDataset(list(dataset))
|
336
344
|
|
337
345
|
|
inspect_ai/_util/format.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import pprint
|
2
|
+
from string import Formatter
|
2
3
|
from textwrap import indent
|
3
4
|
from typing import Any
|
4
5
|
|
@@ -33,3 +34,60 @@ def format_progress_time(time: float, pad_hours: bool = True) -> str:
|
|
33
34
|
hours, minutes = divmod(minutes, 60)
|
34
35
|
hours_fmt = f"{hours:2.0f}" if pad_hours else f"{hours:.0f}"
|
35
36
|
return f"{hours_fmt}:{minutes:02.0f}:{seconds:02.0f}"
|
37
|
+
|
38
|
+
|
39
|
+
def format_template(
|
40
|
+
template: str,
|
41
|
+
params: dict[str, Any],
|
42
|
+
skip_unknown: bool = True,
|
43
|
+
) -> str:
|
44
|
+
"""Format a template string, optionally preserving unknown placeholders.
|
45
|
+
|
46
|
+
Args:
|
47
|
+
template: A string containing {placeholders} to be formatted
|
48
|
+
params: Dictionary of parameters to substitute into the template
|
49
|
+
skip_unknown: If True, preserve unknown placeholders; if False, raise KeyError
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
The formatted string with parameters substituted
|
53
|
+
|
54
|
+
Examples:
|
55
|
+
>>> format_template("Hello {name}!", {"name": "World"})
|
56
|
+
'Hello World!'
|
57
|
+
>>> format_template("Hello {name}!", {}, skip_unknown=True)
|
58
|
+
'Hello {name}!'
|
59
|
+
"""
|
60
|
+
|
61
|
+
class SafeFormatter(Formatter):
|
62
|
+
def get_field(self, field_name: str, args: Any, kwargs: Any) -> Any:
|
63
|
+
try:
|
64
|
+
# Handle array indexing and nested attributes
|
65
|
+
first, rest = (
|
66
|
+
field_name.split(".", 1)
|
67
|
+
if "." in field_name
|
68
|
+
else (field_name, None)
|
69
|
+
)
|
70
|
+
first = first.split("[")[0] # Remove any array indexing for the check
|
71
|
+
|
72
|
+
if first not in params and skip_unknown:
|
73
|
+
return "{" + field_name + "}", field_name
|
74
|
+
|
75
|
+
obj = params.get(first)
|
76
|
+
if obj is None and skip_unknown:
|
77
|
+
return "{" + field_name + "}", field_name
|
78
|
+
|
79
|
+
return super().get_field(field_name, args, kwargs)
|
80
|
+
except (AttributeError, KeyError, IndexError) as e:
|
81
|
+
if skip_unknown:
|
82
|
+
return "{" + field_name + "}", field_name
|
83
|
+
raise KeyError(f"Failed to format field '{field_name}'") from e
|
84
|
+
|
85
|
+
def format_field(self, value: Any, format_spec: str) -> Any:
|
86
|
+
try:
|
87
|
+
return super().format_field(value, format_spec)
|
88
|
+
except (ValueError, TypeError):
|
89
|
+
if skip_unknown:
|
90
|
+
return "{" + str(value) + ":" + format_spec + "}"
|
91
|
+
raise
|
92
|
+
|
93
|
+
return SafeFormatter().format(template, **params)
|
@@ -16346,22 +16346,24 @@ ul.jsondiffpatch-textdiff {
|
|
16346
16346
|
._noTop_14odp_27 {
|
16347
16347
|
margin-top: 0;
|
16348
16348
|
}
|
16349
|
-
.
|
16349
|
+
._flatBody_gk2ju_1 {
|
16350
16350
|
color: var(--bs-danger);
|
16351
16351
|
display: grid;
|
16352
|
-
grid-template-columns:
|
16353
|
-
|
16354
|
-
|
16352
|
+
grid-template-columns: max-content max-content;
|
16353
|
+
column-gap: 0.2em;
|
16354
|
+
margin-top: 0.4rem;
|
16355
16355
|
}
|
16356
16356
|
|
16357
|
-
.
|
16357
|
+
._iconSmall_gk2ju_9 {
|
16358
16358
|
font-size: var(--inspect-font-size-small);
|
16359
16359
|
line-height: var(--inspect-font-size-small);
|
16360
16360
|
height: var(--inspect-font-size-small);
|
16361
16361
|
}
|
16362
16362
|
|
16363
|
-
.
|
16364
|
-
|
16363
|
+
._lineBase_gk2ju_15 {
|
16364
|
+
font-size: var(--inspect-font-size-base);
|
16365
|
+
line-height: var(--inspect-font-size-base);
|
16366
|
+
height: var(--inspect-font-size-base);
|
16365
16367
|
}
|
16366
16368
|
._target_yamz4_1 {
|
16367
16369
|
padding-left: 0;
|
@@ -19222,6 +19224,23 @@ span.ap-marker-container:hover span.ap-marker {
|
|
19222
19224
|
.message-band-btn.warning {
|
19223
19225
|
color: var(--bs-warning-text-emphasis);
|
19224
19226
|
}
|
19227
|
+
._body_5y0hl_1 {
|
19228
|
+
color: var(--bs-danger);
|
19229
|
+
display: grid;
|
19230
|
+
grid-template-columns: 1fr;
|
19231
|
+
align-content: align;
|
19232
|
+
justify-items: center;
|
19233
|
+
}
|
19234
|
+
|
19235
|
+
._iconSmall_5y0hl_9 {
|
19236
|
+
font-size: var(--inspect-font-size-small);
|
19237
|
+
line-height: var(--inspect-font-size-small);
|
19238
|
+
height: var(--inspect-font-size-small);
|
19239
|
+
}
|
19240
|
+
|
19241
|
+
._message_5y0hl_15 {
|
19242
|
+
width: 300px;
|
19243
|
+
}
|
19225
19244
|
._grid_1kcta_1 {
|
19226
19245
|
display: grid;
|
19227
19246
|
padding-top: 1em;
|
@@ -19585,16 +19604,17 @@ span.ap-marker-container:hover span.ap-marker {
|
|
19585
19604
|
padding: 0 0.2em;
|
19586
19605
|
justify-content: center;
|
19587
19606
|
}
|
19588
|
-
.
|
19607
|
+
._statusPanel_66f9o_1 {
|
19589
19608
|
padding: 1em;
|
19590
19609
|
margin-top: 0.5em;
|
19591
19610
|
text-transform: uppercase;
|
19592
19611
|
font-size: var(--inspect-font-size-smaller);
|
19593
19612
|
display: grid;
|
19594
19613
|
grid-template-columns: auto auto;
|
19614
|
+
justify-content: end;
|
19595
19615
|
}
|
19596
19616
|
|
19597
|
-
.
|
19617
|
+
._statusIcon_66f9o_11 {
|
19598
19618
|
font-size: var(--inspect-font-size-large);
|
19599
19619
|
margin-right: 0.3em;
|
19600
19620
|
margin-top: -0.1em;
|