inspect-ai 0.3.65__py3-none-any.whl → 0.3.67__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. inspect_ai/_display/core/config.py +4 -0
  2. inspect_ai/_display/textual/app.py +13 -5
  3. inspect_ai/_display/textual/widgets/footer.py +2 -2
  4. inspect_ai/_display/textual/widgets/sandbox.py +1 -1
  5. inspect_ai/_display/textual/widgets/task_detail.py +7 -5
  6. inspect_ai/_display/textual/widgets/tasks.py +8 -6
  7. inspect_ai/_display/textual/widgets/transcript.py +1 -1
  8. inspect_ai/_eval/task/run.py +5 -3
  9. inspect_ai/_eval/task/task.py +9 -1
  10. inspect_ai/_util/format.py +58 -0
  11. inspect_ai/_view/www/dist/assets/index.css +29 -9
  12. inspect_ai/_view/www/dist/assets/index.js +368 -304
  13. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +1 -1
  14. inspect_ai/_view/www/src/samples/sample-tools/filters.ts +41 -20
  15. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -1
  16. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +28 -6
  17. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +5 -0
  18. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +1 -3
  19. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +31 -16
  20. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +4 -1
  21. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +1 -0
  22. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +2 -2
  23. inspect_ai/model/_model.py +89 -2
  24. inspect_ai/model/_providers/anthropic.py +4 -0
  25. inspect_ai/model/_providers/azureai.py +5 -0
  26. inspect_ai/model/_providers/bedrock.py +5 -0
  27. inspect_ai/model/_providers/cloudflare.py +4 -0
  28. inspect_ai/model/_providers/goodfire.py +5 -0
  29. inspect_ai/model/_providers/google.py +16 -3
  30. inspect_ai/model/_providers/groq.py +4 -0
  31. inspect_ai/model/_providers/hf.py +7 -0
  32. inspect_ai/model/_providers/mistral.py +4 -0
  33. inspect_ai/model/_providers/openai.py +4 -0
  34. inspect_ai/model/_providers/vertex.py +5 -0
  35. inspect_ai/model/_providers/vllm.py +7 -0
  36. inspect_ai/solver/__init__.py +8 -1
  37. inspect_ai/solver/_human_agent/panel.py +11 -5
  38. inspect_ai/solver/_prompt.py +38 -5
  39. inspect_ai/util/_sandbox/docker/config.py +4 -1
  40. inspect_ai/util/_sandbox/docker/util.py +2 -1
  41. {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/METADATA +3 -2
  42. {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/RECORD +46 -46
  43. {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/LICENSE +0 -0
  44. {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/WHEEL +0 -0
  45. {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/entry_points.txt +0 -0
  46. {inspect_ai-0.3.65.dist-info → inspect_ai-0.3.67.dist-info}/top_level.txt +0 -0
@@ -35,6 +35,10 @@ def task_config(
35
35
  value = [str(v) for v in value]
36
36
  config_print.append(f"{name}: {','.join(value)}")
37
37
  elif name not in ["limit", "model"]:
38
+ if isinstance(value, list):
39
+ value = ",".join([str(v) for v in value])
40
+ if isinstance(value, str):
41
+ value = value.replace("[", "\\[")
38
42
  config_print.append(f"{name}: {value}")
39
43
  values = ", ".join(config_print)
40
44
  if values:
@@ -1,13 +1,21 @@
1
1
  import asyncio
2
2
  import contextlib
3
3
  from asyncio import CancelledError
4
- from typing import Any, AsyncIterator, ClassVar, Coroutine, Generic, Iterator, cast
4
+ from typing import (
5
+ Any,
6
+ AsyncIterator,
7
+ ClassVar,
8
+ Coroutine,
9
+ Generic,
10
+ Iterator,
11
+ cast,
12
+ )
5
13
 
6
14
  import rich
7
15
  from rich.console import Console
16
+ from rich.text import Text
8
17
  from textual.app import App, ComposeResult
9
18
  from textual.binding import Binding, BindingType
10
- from textual.content import Content
11
19
  from textual.css.query import NoMatches
12
20
  from textual.events import Print
13
21
  from textual.widget import Widget
@@ -308,9 +316,9 @@ class TaskScreenApp(App[TR]):
308
316
 
309
317
  def set_unread(unread: int | None) -> None:
310
318
  if unread is not None:
311
- console_tab.label = Content.from_rich_text(f"Console ({unread}")
319
+ console_tab.label = Text(f"Console ({unread}")
312
320
  else:
313
- console_tab.label = Content.from_rich_text("Console")
321
+ console_tab.label = Text("Console")
314
322
 
315
323
  self.watch(console_view, "unread", set_unread)
316
324
 
@@ -377,7 +385,7 @@ class TaskScreenApp(App[TR]):
377
385
  def set_title(self, title: str) -> None:
378
386
  tabs = self.app.query_one(TabbedContent)
379
387
  tab = tabs.get_tab(self.tab_id)
380
- tab.label = Content.from_rich_text(title)
388
+ tab.label = Text(title)
381
389
 
382
390
  def activate(self) -> None:
383
391
  # show the tab
@@ -26,8 +26,8 @@ class AppFooter(Widget):
26
26
  right: reactive[RenderableType] = reactive("")
27
27
 
28
28
  def compose(self) -> ComposeResult:
29
- yield Static(id="footer-left")
30
- yield Static(id="footer-right")
29
+ yield Static(id="footer-left", markup=False)
30
+ yield Static(id="footer-right", markup=False)
31
31
 
32
32
  def watch_left(self, new_left: RenderableType) -> None:
33
33
  footer_left = cast(Static, self.query_one("#footer-left"))
@@ -38,6 +38,6 @@ class SandboxView(Vertical):
38
38
  with Horizontal():
39
39
  yield Static("", classes="indent" if self.sandbox_name else "no_indent")
40
40
  with Vertical():
41
- yield Static(self.connection.command)
41
+ yield Static(self.connection.command, markup=False)
42
42
  if self.connection.ports:
43
43
  yield PortMappingsView(self.connection.ports)
@@ -233,20 +233,22 @@ class TaskMetrics(Widget):
233
233
  for metric in self.metrics:
234
234
  # Add the value static but keep it around
235
235
  # for future updates
236
- self.value_widgets[metric.name] = Static(self._metric_value(metric.value))
236
+ self.value_widgets[metric.name] = Static(
237
+ self._metric_value(metric.value), markup=False
238
+ )
237
239
 
238
- grid.mount(Static(metric.name))
240
+ grid.mount(Static(metric.name, markup=False))
239
241
  grid.mount(self.value_widgets[metric.name])
240
242
 
241
243
  def _title(self) -> Widget:
242
244
  if self.scorer is None:
243
245
  return Static("")
244
246
  elif self.reducer is None:
245
- return Static(self.scorer)
247
+ return Static(self.scorer, markup=False)
246
248
  else:
247
249
  return Horizontal(
248
- Static(self.scorer, classes="scorer"),
249
- Static(f"({self.reducer})", classes="reducer"),
250
+ Static(self.scorer, classes="scorer", markup=False),
251
+ Static(f"({self.reducer})", classes="reducer", markup=False),
250
252
  )
251
253
 
252
254
  def _metric_value(self, val: float) -> str:
@@ -128,8 +128,8 @@ class TasksView(Container):
128
128
  progress_view.update_count_width(self.sample_count_width)
129
129
 
130
130
  def compose(self) -> ComposeResult:
131
- yield Static(id="tasks-config")
132
- yield Static(id="tasks-targets")
131
+ yield Static(id="tasks-config", markup=False)
132
+ yield Static(id="tasks-targets", markup=False)
133
133
  yield ScrollableContainer(id="tasks-progress")
134
134
 
135
135
  def watch_config(self, new_config: RenderableType) -> None:
@@ -191,8 +191,8 @@ class TaskProgressView(Widget):
191
191
  self.model_name_width = model_name_width
192
192
 
193
193
  self.progress_bar = ProgressBar(total=task.profile.steps, show_eta=False)
194
- self.count_display = Static()
195
- self.metrics_display = Static(id="task-metrics")
194
+ self.count_display = Static(markup=False)
195
+ self.metrics_display = Static(id="task-metrics", markup=False)
196
196
  self.task_progress = TaskProgress(self.progress_bar)
197
197
 
198
198
  self.toggle = Toggle()
@@ -211,10 +211,12 @@ class TaskProgressView(Widget):
211
211
  yield (self.toggle if self.display_metrics else Static())
212
212
  yield TaskStatusIcon()
213
213
  yield Static(
214
- progress_description(self.t.profile, self.description_width, pad=True)
214
+ progress_description(self.t.profile, self.description_width, pad=True),
215
+ markup=False,
215
216
  )
216
217
  yield Static(
217
- progress_model_name(self.t.profile.model, self.model_name_width, pad=True)
218
+ progress_model_name(self.t.profile.model, self.model_name_width, pad=True),
219
+ markup=False,
218
220
  )
219
221
  yield self.progress_bar
220
222
  yield self.count_display
@@ -117,7 +117,7 @@ class TranscriptView(ScrollableContainer):
117
117
  )
118
118
  if isinstance(d.content, Markdown):
119
119
  set_transcript_markdown_options(d.content)
120
- widgets.append(Static(d.content))
120
+ widgets.append(Static(d.content, markup=False))
121
121
  widgets.append(Static(Text(" ")))
122
122
  return widgets
123
123
 
@@ -551,9 +551,11 @@ async def task_run_sample(
551
551
  # helper to handle exceptions (will throw if we've exceeded the limit)
552
552
  def handle_error(ex: BaseException) -> tuple[EvalError, BaseException | None]:
553
553
  err = sample_error(ex)
554
- py_logger.warning(
555
- f"Sample error (id: {sample.id}, epoch: {state.epoch}): {exception_message(ex)})"
556
- )
554
+ # if we aren't raising the error then print a warning
555
+ if err[1] is None:
556
+ py_logger.warning(
557
+ f"Sample error (id: {sample.id}, epoch: {state.epoch}): {exception_message(ex)})"
558
+ )
557
559
  transcript()._event(ErrorEvent(error=err[0]))
558
560
  return err
559
561
 
@@ -331,7 +331,15 @@ def resolve_epochs(epochs: int | Epochs | None) -> Epochs | None:
331
331
 
332
332
 
333
333
  def resolve_dataset(dataset: Dataset | Sequence[Sample] | None) -> Dataset:
334
- dataset = dataset or [Sample(input="prompt")]
334
+ # this is a convenience for tests that don't want to define a dummy sample
335
+ if dataset is None:
336
+ dataset = [Sample(input="prompt")]
337
+
338
+ # raise error if the dataset is empty
339
+ if len(dataset) == 0:
340
+ raise ValueError("The specified dataset is empty (has no samples)")
341
+
342
+ # resolve sequence to dataset if necessary
335
343
  return dataset if isinstance(dataset, Dataset) else MemoryDataset(list(dataset))
336
344
 
337
345
 
@@ -1,4 +1,5 @@
1
1
  import pprint
2
+ from string import Formatter
2
3
  from textwrap import indent
3
4
  from typing import Any
4
5
 
@@ -33,3 +34,60 @@ def format_progress_time(time: float, pad_hours: bool = True) -> str:
33
34
  hours, minutes = divmod(minutes, 60)
34
35
  hours_fmt = f"{hours:2.0f}" if pad_hours else f"{hours:.0f}"
35
36
  return f"{hours_fmt}:{minutes:02.0f}:{seconds:02.0f}"
37
+
38
+
39
+ def format_template(
40
+ template: str,
41
+ params: dict[str, Any],
42
+ skip_unknown: bool = True,
43
+ ) -> str:
44
+ """Format a template string, optionally preserving unknown placeholders.
45
+
46
+ Args:
47
+ template: A string containing {placeholders} to be formatted
48
+ params: Dictionary of parameters to substitute into the template
49
+ skip_unknown: If True, preserve unknown placeholders; if False, raise KeyError
50
+
51
+ Returns:
52
+ The formatted string with parameters substituted
53
+
54
+ Examples:
55
+ >>> format_template("Hello {name}!", {"name": "World"})
56
+ 'Hello World!'
57
+ >>> format_template("Hello {name}!", {}, skip_unknown=True)
58
+ 'Hello {name}!'
59
+ """
60
+
61
+ class SafeFormatter(Formatter):
62
+ def get_field(self, field_name: str, args: Any, kwargs: Any) -> Any:
63
+ try:
64
+ # Handle array indexing and nested attributes
65
+ first, rest = (
66
+ field_name.split(".", 1)
67
+ if "." in field_name
68
+ else (field_name, None)
69
+ )
70
+ first = first.split("[")[0] # Remove any array indexing for the check
71
+
72
+ if first not in params and skip_unknown:
73
+ return "{" + field_name + "}", field_name
74
+
75
+ obj = params.get(first)
76
+ if obj is None and skip_unknown:
77
+ return "{" + field_name + "}", field_name
78
+
79
+ return super().get_field(field_name, args, kwargs)
80
+ except (AttributeError, KeyError, IndexError) as e:
81
+ if skip_unknown:
82
+ return "{" + field_name + "}", field_name
83
+ raise KeyError(f"Failed to format field '{field_name}'") from e
84
+
85
+ def format_field(self, value: Any, format_spec: str) -> Any:
86
+ try:
87
+ return super().format_field(value, format_spec)
88
+ except (ValueError, TypeError):
89
+ if skip_unknown:
90
+ return "{" + str(value) + ":" + format_spec + "}"
91
+ raise
92
+
93
+ return SafeFormatter().format(template, **params)
@@ -16346,22 +16346,24 @@ ul.jsondiffpatch-textdiff {
16346
16346
  ._noTop_14odp_27 {
16347
16347
  margin-top: 0;
16348
16348
  }
16349
- ._body_5y0hl_1 {
16349
+ ._flatBody_gk2ju_1 {
16350
16350
  color: var(--bs-danger);
16351
16351
  display: grid;
16352
- grid-template-columns: 1fr;
16353
- align-content: align;
16354
- justify-items: center;
16352
+ grid-template-columns: max-content max-content;
16353
+ column-gap: 0.2em;
16354
+ margin-top: 0.4rem;
16355
16355
  }
16356
16356
 
16357
- ._iconSmall_5y0hl_9 {
16357
+ ._iconSmall_gk2ju_9 {
16358
16358
  font-size: var(--inspect-font-size-small);
16359
16359
  line-height: var(--inspect-font-size-small);
16360
16360
  height: var(--inspect-font-size-small);
16361
16361
  }
16362
16362
 
16363
- ._message_5y0hl_15 {
16364
- width: 300px;
16363
+ ._lineBase_gk2ju_15 {
16364
+ font-size: var(--inspect-font-size-base);
16365
+ line-height: var(--inspect-font-size-base);
16366
+ height: var(--inspect-font-size-base);
16365
16367
  }
16366
16368
  ._target_yamz4_1 {
16367
16369
  padding-left: 0;
@@ -19222,6 +19224,23 @@ span.ap-marker-container:hover span.ap-marker {
19222
19224
  .message-band-btn.warning {
19223
19225
  color: var(--bs-warning-text-emphasis);
19224
19226
  }
19227
+ ._body_5y0hl_1 {
19228
+ color: var(--bs-danger);
19229
+ display: grid;
19230
+ grid-template-columns: 1fr;
19231
+ align-content: align;
19232
+ justify-items: center;
19233
+ }
19234
+
19235
+ ._iconSmall_5y0hl_9 {
19236
+ font-size: var(--inspect-font-size-small);
19237
+ line-height: var(--inspect-font-size-small);
19238
+ height: var(--inspect-font-size-small);
19239
+ }
19240
+
19241
+ ._message_5y0hl_15 {
19242
+ width: 300px;
19243
+ }
19225
19244
  ._grid_1kcta_1 {
19226
19245
  display: grid;
19227
19246
  padding-top: 1em;
@@ -19585,16 +19604,17 @@ span.ap-marker-container:hover span.ap-marker {
19585
19604
  padding: 0 0.2em;
19586
19605
  justify-content: center;
19587
19606
  }
19588
- ._statusPanel_1fzh4_1 {
19607
+ ._statusPanel_66f9o_1 {
19589
19608
  padding: 1em;
19590
19609
  margin-top: 0.5em;
19591
19610
  text-transform: uppercase;
19592
19611
  font-size: var(--inspect-font-size-smaller);
19593
19612
  display: grid;
19594
19613
  grid-template-columns: auto auto;
19614
+ justify-content: end;
19595
19615
  }
19596
19616
 
19597
- ._statusIcon_1fzh4_10 {
19617
+ ._statusIcon_66f9o_11 {
19598
19618
  font-size: var(--inspect-font-size-large);
19599
19619
  margin-right: 0.3em;
19600
19620
  margin-top: -0.1em;