inspect-ai 0.3.70__py3-none-any.whl → 0.3.72__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. inspect_ai/_cli/eval.py +14 -8
  2. inspect_ai/_display/core/display.py +2 -0
  3. inspect_ai/_display/core/footer.py +13 -3
  4. inspect_ai/_display/plain/display.py +6 -2
  5. inspect_ai/_display/rich/display.py +19 -6
  6. inspect_ai/_display/textual/app.py +6 -1
  7. inspect_ai/_display/textual/display.py +4 -0
  8. inspect_ai/_display/textual/widgets/transcript.py +10 -6
  9. inspect_ai/_eval/task/run.py +5 -8
  10. inspect_ai/_util/content.py +20 -1
  11. inspect_ai/_util/transcript.py +10 -4
  12. inspect_ai/_util/working.py +4 -0
  13. inspect_ai/_view/www/App.css +6 -0
  14. inspect_ai/_view/www/dist/assets/index.css +115 -87
  15. inspect_ai/_view/www/dist/assets/index.js +5324 -2276
  16. inspect_ai/_view/www/eslint.config.mjs +24 -1
  17. inspect_ai/_view/www/log-schema.json +283 -20
  18. inspect_ai/_view/www/package.json +8 -3
  19. inspect_ai/_view/www/src/App.tsx +2 -2
  20. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +4 -3
  21. inspect_ai/_view/www/src/components/Card.tsx +9 -8
  22. inspect_ai/_view/www/src/components/DownloadButton.tsx +2 -1
  23. inspect_ai/_view/www/src/components/EmptyPanel.tsx +2 -2
  24. inspect_ai/_view/www/src/components/ErrorPanel.tsx +4 -3
  25. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +13 -5
  26. inspect_ai/_view/www/src/components/FindBand.tsx +3 -3
  27. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +3 -3
  28. inspect_ai/_view/www/src/components/LabeledValue.tsx +5 -4
  29. inspect_ai/_view/www/src/components/LargeModal.tsx +18 -13
  30. inspect_ai/_view/www/src/components/{LightboxCarousel.css → LightboxCarousel.module.css} +22 -18
  31. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +36 -27
  32. inspect_ai/_view/www/src/components/MessageBand.tsx +2 -1
  33. inspect_ai/_view/www/src/components/NavPills.tsx +9 -8
  34. inspect_ai/_view/www/src/components/ProgressBar.tsx +2 -1
  35. inspect_ai/_view/www/src/components/TabSet.tsx +21 -15
  36. inspect_ai/_view/www/src/index.tsx +2 -2
  37. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +11 -9
  38. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +3 -2
  39. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +1 -0
  40. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +16 -0
  41. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +3 -2
  42. inspect_ai/_view/www/src/plan/DetailStep.tsx +2 -1
  43. inspect_ai/_view/www/src/plan/PlanCard.tsx +2 -5
  44. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +6 -9
  45. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +2 -1
  46. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +3 -3
  47. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +2 -2
  48. inspect_ai/_view/www/src/samples/SampleDialog.tsx +3 -3
  49. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +2 -2
  50. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
  51. inspect_ai/_view/www/src/samples/SamplesTools.tsx +2 -1
  52. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +3 -19
  53. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +2 -1
  54. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +2 -1
  55. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +2 -1
  56. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +22 -7
  57. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +35 -6
  58. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -2
  59. inspect_ai/_view/www/src/samples/chat/messages.ts +15 -2
  60. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +13 -4
  61. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
  62. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +18 -19
  63. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +1 -1
  64. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +4 -3
  65. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +2 -2
  66. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +2 -3
  67. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +3 -2
  68. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +2 -1
  69. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +2 -1
  70. inspect_ai/_view/www/src/samples/list/SampleList.tsx +57 -45
  71. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +2 -1
  72. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +2 -1
  73. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +2 -2
  74. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +4 -3
  75. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +2 -5
  76. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -2
  77. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +2 -1
  78. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +2 -2
  79. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +2 -1
  80. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +2 -1
  81. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +2 -1
  82. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +2 -1
  83. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +4 -0
  84. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +12 -2
  85. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +1 -1
  86. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +25 -28
  87. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +2 -1
  88. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +5 -4
  89. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +2 -2
  90. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +8 -7
  91. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +2 -2
  92. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +3 -3
  93. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +18 -14
  94. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -5
  95. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +34 -15
  96. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +2 -1
  97. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +2 -1
  98. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +3 -2
  99. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +2 -2
  100. inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.module.css +28 -0
  101. inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.tsx +115 -0
  102. inspect_ai/_view/www/src/samples/transcript/event/utils.ts +29 -0
  103. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +2 -1
  104. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +3 -3
  105. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +11 -8
  106. inspect_ai/_view/www/src/types/log.d.ts +129 -34
  107. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +6 -10
  108. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +4 -0
  109. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +32 -9
  110. inspect_ai/_view/www/src/usage/TokenTable.tsx +4 -6
  111. inspect_ai/_view/www/src/usage/UsageCard.tsx +2 -1
  112. inspect_ai/_view/www/src/utils/format.ts +1 -1
  113. inspect_ai/_view/www/src/utils/json.ts +24 -0
  114. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +6 -5
  115. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -2
  116. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +2 -1
  117. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +2 -1
  118. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +3 -3
  119. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +4 -3
  120. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +5 -4
  121. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +5 -8
  122. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +5 -4
  123. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +2 -1
  124. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +2 -1
  125. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -2
  126. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +2 -1
  127. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +2 -2
  128. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -2
  129. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +2 -5
  130. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +12 -11
  131. inspect_ai/_view/www/yarn.lock +241 -5
  132. inspect_ai/log/_condense.py +3 -0
  133. inspect_ai/log/_recorders/eval.py +6 -1
  134. inspect_ai/log/_transcript.py +58 -1
  135. inspect_ai/model/__init__.py +2 -0
  136. inspect_ai/model/_call_tools.py +7 -0
  137. inspect_ai/model/_chat_message.py +22 -7
  138. inspect_ai/model/_conversation.py +10 -8
  139. inspect_ai/model/_generate_config.py +25 -4
  140. inspect_ai/model/_model.py +133 -57
  141. inspect_ai/model/_model_output.py +3 -0
  142. inspect_ai/model/_openai.py +106 -40
  143. inspect_ai/model/_providers/anthropic.py +281 -153
  144. inspect_ai/model/_providers/google.py +27 -8
  145. inspect_ai/model/_providers/groq.py +9 -4
  146. inspect_ai/model/_providers/openai.py +57 -4
  147. inspect_ai/model/_providers/openai_o1.py +10 -0
  148. inspect_ai/model/_providers/providers.py +1 -1
  149. inspect_ai/model/_reasoning.py +15 -2
  150. inspect_ai/scorer/_model.py +23 -19
  151. inspect_ai/solver/_human_agent/agent.py +14 -10
  152. inspect_ai/solver/_human_agent/commands/__init__.py +7 -3
  153. inspect_ai/solver/_human_agent/commands/submit.py +76 -30
  154. inspect_ai/tool/__init__.py +2 -0
  155. inspect_ai/tool/_tool.py +3 -1
  156. inspect_ai/tool/_tools/_computer/_common.py +117 -58
  157. inspect_ai/tool/_tools/_computer/_computer.py +80 -57
  158. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +7 -1
  159. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +91 -0
  160. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +8 -0
  161. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +12 -0
  162. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +78 -0
  163. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +20 -0
  164. inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +1 -1
  165. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +175 -113
  166. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +76 -20
  167. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +65 -0
  168. inspect_ai/tool/_tools/_computer/test_args.py +151 -0
  169. inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +8 -0
  170. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +24 -0
  171. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +25 -0
  172. inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +5 -6
  173. inspect_ai/tool/_tools/_web_browser/_resources/README.md +10 -11
  174. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +71 -0
  175. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +323 -0
  176. inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +5 -0
  177. inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +279 -0
  178. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +9 -0
  179. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +293 -0
  180. inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +94 -0
  181. inspect_ai/tool/_tools/_web_browser/_resources/constants.py +2 -0
  182. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +2 -0
  183. inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +50 -0
  184. inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +31 -359
  185. inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +280 -0
  186. inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +65 -0
  187. inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +64 -0
  188. inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +146 -0
  189. inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +64 -0
  190. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +180 -0
  191. inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +15 -9
  192. inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +15 -0
  193. inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +44 -0
  194. inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +39 -0
  195. inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +198 -48
  196. inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +26 -25
  197. inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +178 -39
  198. inspect_ai/tool/_tools/_web_browser/_web_browser.py +38 -19
  199. inspect_ai/util/__init__.py +2 -1
  200. inspect_ai/util/_display.py +12 -0
  201. inspect_ai/util/_sandbox/events.py +55 -21
  202. inspect_ai/util/_sandbox/self_check.py +131 -43
  203. inspect_ai/util/_subtask.py +11 -0
  204. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/METADATA +1 -1
  205. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/RECORD +209 -186
  206. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/WHEEL +1 -1
  207. inspect_ai/_view/www/src/components/VirtualList.module.css +0 -19
  208. inspect_ai/_view/www/src/components/VirtualList.tsx +0 -292
  209. inspect_ai/tool/_tools/_computer/_computer_split.py +0 -198
  210. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -312
  211. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -275
  212. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
  213. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -176
  214. inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -135
  215. inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -71
  216. inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -184
  217. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/LICENSE +0 -0
  218. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/entry_points.txt +0 -0
  219. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/top_level.txt +0 -0
inspect_ai/_cli/eval.py CHANGED
@@ -390,15 +390,19 @@ def eval_options(func: Callable[..., Any]) -> Callable[..., click.Context]:
390
390
  @click.option(
391
391
  "--reasoning-effort",
392
392
  type=click.Choice(["low", "medium", "high"]),
393
- help="Constrains effort on reasoning for reasoning models. Open AI o1 models only.",
393
+ help="Constrains effort on reasoning for reasoning models. Open AI o-series models only.",
394
394
  envvar="INSPECT_EVAL_REASONING_EFFORT",
395
395
  )
396
396
  @click.option(
397
- "--reasoning-history/--no-reasoning-history",
398
- type=bool,
399
- is_flag=True,
400
- default=True,
401
- help="Include reasoning in chat message history sent to generate.",
397
+ "--reasoning-tokens",
398
+ type=int,
399
+ help="Maximum number of tokens to use for reasoning. Anthropic Claude models only.",
400
+ envvar="INSPECT_EVAL_REASONING_TOKENS",
401
+ )
402
+ @click.option(
403
+ "--reasoning-history",
404
+ type=click.Choice(["none", "all", "last", "auto"]),
405
+ help='Include reasoning in chat message history sent to generate (defaults to "auto", which uses the recommended default for each provider)',
402
406
  envvar="INSPECT_EVAL_REASONING_HISTORY",
403
407
  )
404
408
  @click.option(
@@ -470,7 +474,8 @@ def eval_command(
470
474
  max_tool_output: int | None,
471
475
  cache_prompt: str | None,
472
476
  reasoning_effort: str | None,
473
- reasoning_history: bool | None,
477
+ reasoning_tokens: int | None,
478
+ reasoning_history: Literal["none", "all", "last", "auto"] | None,
474
479
  message_limit: int | None,
475
480
  token_limit: int | None,
476
481
  time_limit: int | None,
@@ -633,7 +638,8 @@ def eval_set_command(
633
638
  max_tool_output: int | None,
634
639
  cache_prompt: str | None,
635
640
  reasoning_effort: str | None,
636
- reasoning_history: bool | None,
641
+ reasoning_tokens: int | None,
642
+ reasoning_history: Literal["none", "all", "last", "auto"] | None,
637
643
  message_limit: int | None,
638
644
  token_limit: int | None,
639
645
  time_limit: int | None,
@@ -143,3 +143,5 @@ class Display(Protocol):
143
143
 
144
144
  @contextlib.contextmanager
145
145
  def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]: ...
146
+
147
+ def display_counter(self, caption: str, value: str) -> None: ...
@@ -9,10 +9,12 @@ from .config import task_dict
9
9
 
10
10
 
11
11
  @throttle(1)
12
- def task_footer(style: str = "") -> tuple[RenderableType, RenderableType]:
12
+ def task_footer(
13
+ counters: dict[str, str], style: str = ""
14
+ ) -> tuple[RenderableType, RenderableType]:
13
15
  return (
14
16
  Text.from_markup(task_resources(), style=style),
15
- Text.from_markup(task_http_rate_limits(), style=style),
17
+ Text.from_markup(task_counters(counters), style=style),
16
18
  )
17
19
 
18
20
 
@@ -23,5 +25,13 @@ def task_resources() -> str:
23
25
  return task_dict(resources)
24
26
 
25
27
 
26
- def task_http_rate_limits() -> str:
28
+ def task_counters(counters: dict[str, str]) -> str:
29
+ return task_dict(counters | task_http_rate_limits())
30
+
31
+
32
+ def task_http_rate_limits() -> dict[str, str]:
33
+ return {"HTTP rate limits": f"{http_rate_limit_count():,}"}
34
+
35
+
36
+ def task_http_rate_limits_str() -> str:
27
37
  return f"HTTP rate limits: {http_rate_limit_count():,}"
@@ -22,7 +22,7 @@ from ..core.display import (
22
22
  TaskSpec,
23
23
  TaskWithResult,
24
24
  )
25
- from ..core.footer import task_http_rate_limits
25
+ from ..core.footer import task_http_rate_limits_str
26
26
  from ..core.panel import task_panel, task_targets
27
27
  from ..core.results import task_metric, tasks_results
28
28
 
@@ -89,6 +89,10 @@ class PlainDisplay(Display):
89
89
  show_model_names=self.multiple_model_names,
90
90
  )
91
91
 
92
+ def display_counter(self, caption: str, value: str) -> None:
93
+ # Not supported for plain display as counters are only shown for tasks.
94
+ pass
95
+
92
96
  def _print_results(self) -> None:
93
97
  """Print final results using rich panels"""
94
98
  panels = tasks_results(self.tasks)
@@ -178,7 +182,7 @@ class PlainTaskDisplay(TaskDisplay):
178
182
  status_parts.append(resources)
179
183
 
180
184
  # Add rate limits
181
- rate_limits = task_http_rate_limits()
185
+ rate_limits = task_http_rate_limits_str()
182
186
  if rate_limits:
183
187
  status_parts.append(rate_limits)
184
188
 
@@ -60,6 +60,7 @@ class RichDisplay(Display):
60
60
  self.parallel = False
61
61
  self.live: Live | None = None
62
62
  self.timer_handle: asyncio.TimerHandle | None = None
63
+ self.counters: dict[str, str] = {}
63
64
  rich_initialise()
64
65
 
65
66
  @override
@@ -153,13 +154,20 @@ class RichDisplay(Display):
153
154
  and self.live.is_started
154
155
  ):
155
156
  if self.parallel:
156
- r = tasks_live_status(self.total_tasks, self.tasks, self.progress_ui)
157
+ r = tasks_live_status(
158
+ self.total_tasks, self.tasks, self.progress_ui, self.counters
159
+ )
157
160
  else:
158
- r = task_live_status(self.tasks, self.progress_ui)
161
+ r = task_live_status(self.tasks, self.progress_ui, self.counters)
159
162
  self.live.update(r, refresh=True)
160
163
 
161
164
  self.timer_handle = asyncio.get_event_loop().call_later(1, self._update_display)
162
165
 
166
+ @override
167
+ def display_counter(self, caption: str, value: str) -> None:
168
+ self.counters[caption] = value
169
+ self._update_display()
170
+
163
171
 
164
172
  class RichTaskScreen(TaskScreen):
165
173
  def __init__(self, live: Live) -> None:
@@ -286,7 +294,9 @@ class RichTaskDisplay(TaskDisplay):
286
294
  self.p.complete()
287
295
 
288
296
 
289
- def task_live_status(tasks: list[TaskStatus], progress: RProgress) -> RenderableType:
297
+ def task_live_status(
298
+ tasks: list[TaskStatus], progress: RProgress, counters: dict[str, str]
299
+ ) -> RenderableType:
290
300
  theme = rich_theme()
291
301
 
292
302
  # the panel contents
@@ -300,13 +310,16 @@ def task_live_status(tasks: list[TaskStatus], progress: RProgress) -> Renderable
300
310
  show_model=len(tasks) == 1,
301
311
  body=Group("", progress),
302
312
  subtitle=subtitle,
303
- footer=task_footer(theme.light),
313
+ footer=task_footer(counters, theme.light),
304
314
  log_location=None,
305
315
  )
306
316
 
307
317
 
308
318
  def tasks_live_status(
309
- total_tasks: int, tasks: list[TaskStatus], progress: RProgress
319
+ total_tasks: int,
320
+ tasks: list[TaskStatus],
321
+ progress: RProgress,
322
+ counters: dict[str, str],
310
323
  ) -> RenderableType:
311
324
  # rendering context
312
325
  theme = rich_theme()
@@ -325,7 +338,7 @@ def tasks_live_status(
325
338
  footer_table = Table.grid(expand=True)
326
339
  footer_table.add_column()
327
340
  footer_table.add_column(justify="right")
328
- footer = task_footer(theme.light)
341
+ footer = task_footer(counters, theme.light)
329
342
  footer_table.add_row()
330
343
  footer_table.add_row(footer[0], footer[1])
331
344
 
@@ -89,6 +89,7 @@ class TaskScreenApp(App[TR]):
89
89
  self._total_tasks = 0
90
90
  self._parallel = False
91
91
  self._tasks: list[TaskWithResult] = []
92
+ self._counters: dict[str, str] = {}
92
93
 
93
94
  # all tasks processed by app
94
95
  self._app_tasks: list[TaskWithResult] = []
@@ -302,7 +303,7 @@ class TaskScreenApp(App[TR]):
302
303
  samples_view.set_samples(active_and_started_samples)
303
304
 
304
305
  def update_footer(self) -> None:
305
- left, right = task_footer()
306
+ left, right = task_footer(self._counters)
306
307
  footer = self.query_one(AppFooter)
307
308
  footer.left = left
308
309
  footer.right = right
@@ -377,6 +378,10 @@ class TaskScreenApp(App[TR]):
377
378
  except NoMatches:
378
379
  return None
379
380
 
381
+ def display_counter(self, caption: str, value: str) -> None:
382
+ self._counters[caption] = value
383
+ self.update_footer()
384
+
380
385
  class InputPanelHost(InputPanel.Host):
381
386
  def __init__(self, app: "TaskScreenApp[TR]", tab_id: str) -> None:
382
387
  self.app = app
@@ -72,3 +72,7 @@ class TextualDisplay(Display):
72
72
  def task(self, profile: TaskProfile) -> Iterator[TaskDisplay]:
73
73
  with self.app.task_display(profile) as task_display:
74
74
  yield task_display
75
+
76
+ @override
77
+ def display_counter(self, caption: str, value: str) -> None:
78
+ self.app.display_counter(caption, value)
@@ -9,7 +9,7 @@ from textual.containers import ScrollableContainer
9
9
  from textual.widget import Widget
10
10
  from textual.widgets import Static
11
11
 
12
- from inspect_ai._util.content import ContentText
12
+ from inspect_ai._util.content import ContentReasoning, ContentText
13
13
  from inspect_ai._util.rich import lines_display
14
14
  from inspect_ai._util.transcript import (
15
15
  set_transcript_markdown_options,
@@ -36,7 +36,6 @@ from inspect_ai.log._transcript import (
36
36
  )
37
37
  from inspect_ai.model._chat_message import (
38
38
  ChatMessage,
39
- ChatMessageAssistant,
40
39
  ChatMessageUser,
41
40
  )
42
41
  from inspect_ai.model._render import messages_preceding_assistant
@@ -333,11 +332,16 @@ def render_message(message: ChatMessage) -> list[RenderableType]:
333
332
  Text(),
334
333
  ]
335
334
 
336
- if isinstance(message, ChatMessageAssistant) and message.reasoning:
337
- content.extend(transcript_reasoning(message.reasoning))
338
-
339
- if message.text:
335
+ # deal with plain text or with content blocks
336
+ if isinstance(message.content, str):
340
337
  content.extend([transcript_markdown(message.text.strip(), escape=True)])
338
+ else:
339
+ for c in message.content:
340
+ if isinstance(c, ContentReasoning):
341
+ content.extend(transcript_reasoning(c))
342
+ elif isinstance(c, ContentText):
343
+ content.extend([transcript_markdown(c.text.strip(), escape=True)])
344
+
341
345
  return content
342
346
 
343
347
 
@@ -50,11 +50,7 @@ from inspect_ai.log import (
50
50
  from inspect_ai.log._condense import condense_sample
51
51
  from inspect_ai.log._file import eval_log_json_str
52
52
  from inspect_ai.log._log import EvalSampleLimit, EvalSampleReductions, eval_error
53
- from inspect_ai.log._samples import (
54
- active_sample,
55
- set_active_sample_message_limit,
56
- set_active_sample_token_limit,
57
- )
53
+ from inspect_ai.log._samples import active_sample
58
54
  from inspect_ai.log._transcript import (
59
55
  ErrorEvent,
60
56
  SampleInitEvent,
@@ -695,9 +691,10 @@ async def task_run_sample(
695
691
  assert time_limit
696
692
  timeout_cm = timeout(time_limit / 2)
697
693
 
698
- # turn off sample limits
699
- set_active_sample_token_limit(None)
700
- set_active_sample_message_limit(None)
694
+ # turn off message and token limits
695
+ state.message_limit = None
696
+ state.token_limit = None
697
+ set_sample_state(state)
701
698
 
702
699
  # scoring
703
700
  try:
@@ -13,6 +13,25 @@ class ContentText(BaseModel):
13
13
  """Text content."""
14
14
 
15
15
 
16
+ class ContentReasoning(BaseModel):
17
+ """Reasoning content.
18
+
19
+ See the specification for [thinking blocks](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#understanding-thinking-blocks) for Claude models.
20
+ """
21
+
22
+ type: Literal["reasoning"] = Field(default="reasoning")
23
+ """Type."""
24
+
25
+ reasoning: str
26
+ """Reasoning content."""
27
+
28
+ signature: str | None = Field(default=None)
29
+ """Signature for reasoning content (used by some models to ensure that reasoning content is not modified for replay)"""
30
+
31
+ redacted: bool = Field(default=False)
32
+ """Indicates that the explicit content of this reasoning block has been redacted."""
33
+
34
+
16
35
  class ContentImage(BaseModel):
17
36
  """Image content."""
18
37
 
@@ -55,5 +74,5 @@ class ContentVideo(BaseModel):
55
74
  """Format of video data ('mp4', 'mpeg', or 'mov')"""
56
75
 
57
76
 
58
- Content = Union[ContentText, ContentImage, ContentAudio, ContentVideo]
77
+ Content = Union[ContentText, ContentReasoning, ContentImage, ContentAudio, ContentVideo]
59
78
  """Content sent to or received from a model."""
@@ -10,6 +10,8 @@ from rich.panel import Panel
10
10
  from rich.rule import Rule
11
11
  from rich.text import Text
12
12
 
13
+ from inspect_ai._util.content import ContentReasoning
14
+
13
15
  from .format import format_function_call
14
16
 
15
17
 
@@ -111,12 +113,16 @@ def transcript_panel(
111
113
  )
112
114
 
113
115
 
114
- def transcript_reasoning(reasoning: str) -> list[RenderableType]:
116
+ def transcript_reasoning(reasoning: ContentReasoning) -> list[RenderableType]:
115
117
  content: list[RenderableType] = []
118
+ text = (
119
+ reasoning.reasoning
120
+ if not reasoning.redacted
121
+ else "Reasoning encrypted by model provider."
122
+ )
123
+
116
124
  content.append(
117
- transcript_markdown(
118
- f"**<think>** \n{reasoning} \n**</think>**\n\n", escape=True
119
- )
125
+ transcript_markdown(f"**<think>** \n{text} \n**</think>**\n\n", escape=True)
120
126
  )
121
127
  content.append(Text())
122
128
  return content
@@ -12,6 +12,10 @@ def sample_waiting_time() -> float:
12
12
  return _sample_waiting_time.get()
13
13
 
14
14
 
15
+ def sample_working_time() -> float:
16
+ return time.monotonic() - _sample_start_time.get() - sample_waiting_time()
17
+
18
+
15
19
  def report_sample_waiting_time(waiting_time: float) -> None:
16
20
  _sample_waiting_time.set(_sample_waiting_time.get() + waiting_time)
17
21
  check_sample_working_limit()
@@ -805,15 +805,21 @@ table.table.table-sm td {
805
805
  overflow: unset;
806
806
  }
807
807
 
808
+ .markdown-content pre[class*="language-"],
808
809
  pre[class*="language-"].tool-output,
809
810
  .tool-output {
810
811
  background-color: #f8f8f8;
811
812
  }
813
+
814
+ .vscode-dark .model-call pre[class*="language-"],
815
+ .vscode-dark .markdown-content pre[class*="language-"],
812
816
  .vscode-dark pre[class*="language-"].tool-output,
813
817
  .vscode-dark .tool-output {
814
818
  background-color: #333333;
815
819
  }
816
820
 
821
+ .model-call pre[class*="language-"],
822
+ .markdown-content pre[class*="language-"],
817
823
  pre[class*="language-"].tool-output {
818
824
  border: none !important;
819
825
  box-shadow: none !important;