inspect-ai 0.3.70__py3-none-any.whl → 0.3.72__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. inspect_ai/_cli/eval.py +14 -8
  2. inspect_ai/_display/core/display.py +2 -0
  3. inspect_ai/_display/core/footer.py +13 -3
  4. inspect_ai/_display/plain/display.py +6 -2
  5. inspect_ai/_display/rich/display.py +19 -6
  6. inspect_ai/_display/textual/app.py +6 -1
  7. inspect_ai/_display/textual/display.py +4 -0
  8. inspect_ai/_display/textual/widgets/transcript.py +10 -6
  9. inspect_ai/_eval/task/run.py +5 -8
  10. inspect_ai/_util/content.py +20 -1
  11. inspect_ai/_util/transcript.py +10 -4
  12. inspect_ai/_util/working.py +4 -0
  13. inspect_ai/_view/www/App.css +6 -0
  14. inspect_ai/_view/www/dist/assets/index.css +115 -87
  15. inspect_ai/_view/www/dist/assets/index.js +5324 -2276
  16. inspect_ai/_view/www/eslint.config.mjs +24 -1
  17. inspect_ai/_view/www/log-schema.json +283 -20
  18. inspect_ai/_view/www/package.json +8 -3
  19. inspect_ai/_view/www/src/App.tsx +2 -2
  20. inspect_ai/_view/www/src/components/AnsiDisplay.tsx +4 -3
  21. inspect_ai/_view/www/src/components/Card.tsx +9 -8
  22. inspect_ai/_view/www/src/components/DownloadButton.tsx +2 -1
  23. inspect_ai/_view/www/src/components/EmptyPanel.tsx +2 -2
  24. inspect_ai/_view/www/src/components/ErrorPanel.tsx +4 -3
  25. inspect_ai/_view/www/src/components/ExpandablePanel.tsx +13 -5
  26. inspect_ai/_view/www/src/components/FindBand.tsx +3 -3
  27. inspect_ai/_view/www/src/components/HumanBaselineView.tsx +3 -3
  28. inspect_ai/_view/www/src/components/LabeledValue.tsx +5 -4
  29. inspect_ai/_view/www/src/components/LargeModal.tsx +18 -13
  30. inspect_ai/_view/www/src/components/{LightboxCarousel.css → LightboxCarousel.module.css} +22 -18
  31. inspect_ai/_view/www/src/components/LightboxCarousel.tsx +36 -27
  32. inspect_ai/_view/www/src/components/MessageBand.tsx +2 -1
  33. inspect_ai/_view/www/src/components/NavPills.tsx +9 -8
  34. inspect_ai/_view/www/src/components/ProgressBar.tsx +2 -1
  35. inspect_ai/_view/www/src/components/TabSet.tsx +21 -15
  36. inspect_ai/_view/www/src/index.tsx +2 -2
  37. inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +11 -9
  38. inspect_ai/_view/www/src/metadata/MetaDataView.tsx +3 -2
  39. inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +1 -0
  40. inspect_ai/_view/www/src/metadata/RenderedContent.tsx +16 -0
  41. inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +3 -2
  42. inspect_ai/_view/www/src/plan/DetailStep.tsx +2 -1
  43. inspect_ai/_view/www/src/plan/PlanCard.tsx +2 -5
  44. inspect_ai/_view/www/src/plan/PlanDetailView.tsx +6 -9
  45. inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +2 -1
  46. inspect_ai/_view/www/src/plan/SolverDetailView.tsx +3 -3
  47. inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +2 -2
  48. inspect_ai/_view/www/src/samples/SampleDialog.tsx +3 -3
  49. inspect_ai/_view/www/src/samples/SampleDisplay.tsx +2 -2
  50. inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +2 -2
  51. inspect_ai/_view/www/src/samples/SamplesTools.tsx +2 -1
  52. inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +3 -19
  53. inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +2 -1
  54. inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +2 -1
  55. inspect_ai/_view/www/src/samples/chat/ChatView.tsx +2 -1
  56. inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +22 -7
  57. inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +35 -6
  58. inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -2
  59. inspect_ai/_view/www/src/samples/chat/messages.ts +15 -2
  60. inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +13 -4
  61. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
  62. inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +18 -19
  63. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +1 -1
  64. inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +4 -3
  65. inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +2 -2
  66. inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +2 -3
  67. inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +3 -2
  68. inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +2 -1
  69. inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +2 -1
  70. inspect_ai/_view/www/src/samples/list/SampleList.tsx +57 -45
  71. inspect_ai/_view/www/src/samples/list/SampleRow.tsx +2 -1
  72. inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +2 -1
  73. inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +2 -2
  74. inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +4 -3
  75. inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +2 -5
  76. inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -2
  77. inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +2 -1
  78. inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +2 -2
  79. inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +2 -1
  80. inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +2 -1
  81. inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +2 -1
  82. inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +2 -1
  83. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +4 -0
  84. inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +12 -2
  85. inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +1 -1
  86. inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +25 -28
  87. inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +2 -1
  88. inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +5 -4
  89. inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +2 -2
  90. inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +8 -7
  91. inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +2 -2
  92. inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +3 -3
  93. inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +18 -14
  94. inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -5
  95. inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +34 -15
  96. inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +2 -1
  97. inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +2 -1
  98. inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +3 -2
  99. inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +2 -2
  100. inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.module.css +28 -0
  101. inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.tsx +115 -0
  102. inspect_ai/_view/www/src/samples/transcript/event/utils.ts +29 -0
  103. inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +2 -1
  104. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +3 -3
  105. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +11 -8
  106. inspect_ai/_view/www/src/types/log.d.ts +129 -34
  107. inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +6 -10
  108. inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +4 -0
  109. inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +32 -9
  110. inspect_ai/_view/www/src/usage/TokenTable.tsx +4 -6
  111. inspect_ai/_view/www/src/usage/UsageCard.tsx +2 -1
  112. inspect_ai/_view/www/src/utils/format.ts +1 -1
  113. inspect_ai/_view/www/src/utils/json.ts +24 -0
  114. inspect_ai/_view/www/src/workspace/WorkSpace.tsx +6 -5
  115. inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +9 -2
  116. inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +2 -1
  117. inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +2 -1
  118. inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +3 -3
  119. inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +4 -3
  120. inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +5 -4
  121. inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +5 -8
  122. inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +5 -4
  123. inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +2 -1
  124. inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +2 -1
  125. inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -2
  126. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +2 -1
  127. inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +2 -2
  128. inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -2
  129. inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +2 -5
  130. inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +12 -11
  131. inspect_ai/_view/www/yarn.lock +241 -5
  132. inspect_ai/log/_condense.py +3 -0
  133. inspect_ai/log/_recorders/eval.py +6 -1
  134. inspect_ai/log/_transcript.py +58 -1
  135. inspect_ai/model/__init__.py +2 -0
  136. inspect_ai/model/_call_tools.py +7 -0
  137. inspect_ai/model/_chat_message.py +22 -7
  138. inspect_ai/model/_conversation.py +10 -8
  139. inspect_ai/model/_generate_config.py +25 -4
  140. inspect_ai/model/_model.py +133 -57
  141. inspect_ai/model/_model_output.py +3 -0
  142. inspect_ai/model/_openai.py +106 -40
  143. inspect_ai/model/_providers/anthropic.py +281 -153
  144. inspect_ai/model/_providers/google.py +27 -8
  145. inspect_ai/model/_providers/groq.py +9 -4
  146. inspect_ai/model/_providers/openai.py +57 -4
  147. inspect_ai/model/_providers/openai_o1.py +10 -0
  148. inspect_ai/model/_providers/providers.py +1 -1
  149. inspect_ai/model/_reasoning.py +15 -2
  150. inspect_ai/scorer/_model.py +23 -19
  151. inspect_ai/solver/_human_agent/agent.py +14 -10
  152. inspect_ai/solver/_human_agent/commands/__init__.py +7 -3
  153. inspect_ai/solver/_human_agent/commands/submit.py +76 -30
  154. inspect_ai/tool/__init__.py +2 -0
  155. inspect_ai/tool/_tool.py +3 -1
  156. inspect_ai/tool/_tools/_computer/_common.py +117 -58
  157. inspect_ai/tool/_tools/_computer/_computer.py +80 -57
  158. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/Code/User/settings.json +7 -1
  159. inspect_ai/tool/_tools/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfwm4.xml +91 -0
  160. inspect_ai/tool/_tools/_computer/_resources/tool/.pylintrc +8 -0
  161. inspect_ai/tool/_tools/_computer/_resources/tool/.vscode/settings.json +12 -0
  162. inspect_ai/tool/_tools/_computer/_resources/tool/_args.py +78 -0
  163. inspect_ai/tool/_tools/_computer/_resources/tool/_constants.py +20 -0
  164. inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +1 -1
  165. inspect_ai/tool/_tools/_computer/_resources/tool/_x11_client.py +175 -113
  166. inspect_ai/tool/_tools/_computer/_resources/tool/computer_tool.py +76 -20
  167. inspect_ai/tool/_tools/_computer/_resources/tool/pyproject.toml +65 -0
  168. inspect_ai/tool/_tools/_computer/test_args.py +151 -0
  169. inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +8 -0
  170. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +24 -0
  171. inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +25 -0
  172. inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +5 -6
  173. inspect_ai/tool/_tools/_web_browser/_resources/README.md +10 -11
  174. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +71 -0
  175. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +323 -0
  176. inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +5 -0
  177. inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +279 -0
  178. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +9 -0
  179. inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +293 -0
  180. inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +94 -0
  181. inspect_ai/tool/_tools/_web_browser/_resources/constants.py +2 -0
  182. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +2 -0
  183. inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +50 -0
  184. inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +31 -359
  185. inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +280 -0
  186. inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +65 -0
  187. inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +64 -0
  188. inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +146 -0
  189. inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +64 -0
  190. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +180 -0
  191. inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +15 -9
  192. inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +15 -0
  193. inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +44 -0
  194. inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +39 -0
  195. inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +198 -48
  196. inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +26 -25
  197. inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +178 -39
  198. inspect_ai/tool/_tools/_web_browser/_web_browser.py +38 -19
  199. inspect_ai/util/__init__.py +2 -1
  200. inspect_ai/util/_display.py +12 -0
  201. inspect_ai/util/_sandbox/events.py +55 -21
  202. inspect_ai/util/_sandbox/self_check.py +131 -43
  203. inspect_ai/util/_subtask.py +11 -0
  204. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/METADATA +1 -1
  205. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/RECORD +209 -186
  206. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/WHEEL +1 -1
  207. inspect_ai/_view/www/src/components/VirtualList.module.css +0 -19
  208. inspect_ai/_view/www/src/components/VirtualList.tsx +0 -292
  209. inspect_ai/tool/_tools/_computer/_computer_split.py +0 -198
  210. inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -312
  211. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -275
  212. inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
  213. inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -176
  214. inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -135
  215. inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -71
  216. inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -184
  217. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/LICENSE +0 -0
  218. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/entry_points.txt +0 -0
  219. {inspect_ai-0.3.70.dist-info → inspect_ai-0.3.72.dist-info}/top_level.txt +0 -0
@@ -103,7 +103,8 @@ export type InternalTools = boolean | null;
103
103
  export type MaxToolOutput = number | null;
104
104
  export type CachePrompt = "auto" | boolean | null;
105
105
  export type ReasoningEffort = ("low" | "medium" | "high") | null;
106
- export type ReasoningHistory = boolean | null;
106
+ export type ReasoningTokens = number | null;
107
+ export type ReasoningHistory = ("none" | "all" | "last" | "auto") | null;
107
108
  export type TotalSamples = number;
108
109
  export type CompletedSamples = number;
109
110
  export type Name5 = string;
@@ -122,6 +123,7 @@ export type OutputTokens = number;
122
123
  export type TotalTokens = number;
123
124
  export type InputTokensCacheWrite = number | null;
124
125
  export type InputTokensCacheRead = number | null;
126
+ export type ReasoningTokens1 = number | null;
125
127
  export type Message = string;
126
128
  export type Traceback = string;
127
129
  export type TracebackAnsi = string;
@@ -139,47 +141,74 @@ export type Input =
139
141
  export type Role = "system";
140
142
  export type Content =
141
143
  | string
142
- | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
144
+ | (
145
+ | ContentText
146
+ | ContentReasoning
147
+ | ContentImage
148
+ | ContentAudio
149
+ | ContentVideo
150
+ )[];
143
151
  export type Type1 = "text";
144
152
  export type Text = string;
145
- export type Type2 = "image";
153
+ export type Type2 = "reasoning";
154
+ export type Reasoning = string;
155
+ export type Signature = string | null;
156
+ export type Redacted = boolean;
157
+ export type Type3 = "image";
146
158
  export type Image = string;
147
159
  export type Detail = "auto" | "low" | "high";
148
- export type Type3 = "audio";
160
+ export type Type4 = "audio";
149
161
  export type Audio = string;
150
162
  export type Format = "wav" | "mp3";
151
- export type Type4 = "video";
163
+ export type Type5 = "video";
152
164
  export type Video = string;
153
165
  export type Format1 = "mp4" | "mpeg" | "mov";
154
166
  export type Source = ("input" | "generate") | null;
155
167
  export type Role1 = "user";
156
168
  export type Content1 =
157
169
  | string
158
- | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
170
+ | (
171
+ | ContentText
172
+ | ContentReasoning
173
+ | ContentImage
174
+ | ContentAudio
175
+ | ContentVideo
176
+ )[];
159
177
  export type Source1 = ("input" | "generate") | null;
160
178
  export type ToolCallId = string[] | null;
161
179
  export type Role2 = "assistant";
162
180
  export type Content2 =
163
181
  | string
164
- | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
182
+ | (
183
+ | ContentText
184
+ | ContentReasoning
185
+ | ContentImage
186
+ | ContentAudio
187
+ | ContentVideo
188
+ )[];
165
189
  export type Source2 = ("input" | "generate") | null;
166
190
  export type ToolCalls = ToolCall[] | null;
167
191
  export type Id1 = string;
168
192
  export type Function = string;
169
- export type Type5 = "function";
193
+ export type Type6 = "function";
170
194
  export type ParseError = string | null;
171
195
  export type Title = string | null;
172
196
  export type Format2 = "text" | "markdown";
173
197
  export type Content3 = string;
174
- export type Reasoning = string | null;
175
198
  export type Role3 = "tool";
176
199
  export type Content4 =
177
200
  | string
178
- | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
201
+ | (
202
+ | ContentText
203
+ | ContentReasoning
204
+ | ContentImage
205
+ | ContentAudio
206
+ | ContentVideo
207
+ )[];
179
208
  export type Source3 = ("input" | "generate") | null;
180
209
  export type ToolCallId1 = string | null;
181
210
  export type Function1 = string | null;
182
- export type Type6 =
211
+ export type Type7 =
183
212
  | "parsing"
184
213
  | "timeout"
185
214
  | "unicode_decode"
@@ -235,6 +264,7 @@ export type Answer = string | null;
235
264
  export type Explanation = string | null;
236
265
  export type Metadata6 = {} | null;
237
266
  export type Timestamp = string;
267
+ export type WorkingStart = number;
238
268
  export type Pending = boolean | null;
239
269
  export type Event = "sample_init";
240
270
  export type Input1 =
@@ -255,9 +285,10 @@ export type Files1 = {
255
285
  export type Setup1 = string | null;
256
286
  export type JsonValue = unknown;
257
287
  export type Timestamp1 = string;
288
+ export type WorkingStart1 = number;
258
289
  export type Pending1 = boolean | null;
259
290
  export type Event1 = "sample_limit";
260
- export type Type7 =
291
+ export type Type8 =
261
292
  | "message"
262
293
  | "time"
263
294
  | "working"
@@ -267,6 +298,7 @@ export type Type7 =
267
298
  export type Message2 = string;
268
299
  export type Limit1 = number | null;
269
300
  export type Timestamp2 = string;
301
+ export type WorkingStart2 = number;
270
302
  export type Pending2 = boolean | null;
271
303
  export type Event2 = "sandbox";
272
304
  export type Action = "exec" | "read_file" | "write_file";
@@ -278,7 +310,9 @@ export type File = string | null;
278
310
  export type Input2 = string | null;
279
311
  export type Result = number | null;
280
312
  export type Output = string | null;
313
+ export type Completed = string | null;
281
314
  export type Timestamp3 = string;
315
+ export type WorkingStart3 = number;
282
316
  export type Pending3 = boolean | null;
283
317
  export type Event3 = "state";
284
318
  export type Op = "remove" | "add" | "replace" | "move" | "test" | "copy";
@@ -286,10 +320,12 @@ export type Path = string;
286
320
  export type From = string | null;
287
321
  export type Changes = JsonChange[];
288
322
  export type Timestamp4 = string;
323
+ export type WorkingStart4 = number;
289
324
  export type Pending4 = boolean | null;
290
325
  export type Event4 = "store";
291
326
  export type Changes1 = JsonChange[];
292
327
  export type Timestamp5 = string;
328
+ export type WorkingStart5 = number;
293
329
  export type Pending5 = boolean | null;
294
330
  export type Event5 = "model";
295
331
  export type Model2 = string;
@@ -301,8 +337,8 @@ export type Input3 = (
301
337
  )[];
302
338
  export type Name7 = string;
303
339
  export type Description = string;
304
- export type Type8 = "object";
305
- export type Type9 =
340
+ export type Type9 = "object";
341
+ export type Type10 =
306
342
  | ("string" | "integer" | "number" | "boolean" | "array" | "object" | "null")
307
343
  | null;
308
344
  export type Description1 = string | null;
@@ -321,10 +357,13 @@ export type Name8 = string;
321
357
  export type Error1 = string | null;
322
358
  export type Cache = ("read" | "write") | null;
323
359
  export type Time1 = number | null;
360
+ export type Completed1 = string | null;
361
+ export type WorkingTime = number | null;
324
362
  export type Timestamp6 = string;
363
+ export type WorkingStart6 = number;
325
364
  export type Pending6 = boolean | null;
326
365
  export type Event6 = "tool";
327
- export type Type10 = "function";
366
+ export type Type11 = "function";
328
367
  export type Id3 = string;
329
368
  export type Function2 = string;
330
369
  export type Result1 =
@@ -332,12 +371,20 @@ export type Result1 =
332
371
  | number
333
372
  | boolean
334
373
  | ContentText
374
+ | ContentReasoning
335
375
  | ContentImage
336
376
  | ContentAudio
337
377
  | ContentVideo
338
- | (ContentText | ContentImage | ContentAudio | ContentVideo)[];
378
+ | (
379
+ | ContentText
380
+ | ContentReasoning
381
+ | ContentImage
382
+ | ContentAudio
383
+ | ContentVideo
384
+ )[];
339
385
  export type Truncated = [unknown, unknown] | null;
340
386
  export type Timestamp7 = string;
387
+ export type WorkingStart7 = number;
341
388
  export type Pending7 = boolean | null;
342
389
  export type Event7 = "approval";
343
390
  export type Message3 = string;
@@ -350,19 +397,23 @@ export type Decision =
350
397
  | "terminate";
351
398
  export type Explanation1 = string | null;
352
399
  export type Timestamp8 = string;
400
+ export type WorkingStart8 = number;
353
401
  export type Pending8 = boolean | null;
354
402
  export type Event8 = "input";
355
403
  export type Input4 = string;
356
404
  export type InputAnsi = string;
357
405
  export type Timestamp9 = string;
406
+ export type WorkingStart9 = number;
358
407
  export type Pending9 = boolean | null;
359
408
  export type Event9 = "score";
360
409
  export type Target2 = string | string[] | null;
361
410
  export type Intermediate = boolean;
362
411
  export type Timestamp10 = string;
412
+ export type WorkingStart10 = number;
363
413
  export type Pending10 = boolean | null;
364
414
  export type Event10 = "error";
365
415
  export type Timestamp11 = string;
416
+ export type WorkingStart11 = number;
366
417
  export type Pending11 = boolean | null;
367
418
  export type Event11 = "logger";
368
419
  export type Name9 = string | null;
@@ -381,20 +432,23 @@ export type Filename = string;
381
432
  export type Module = string;
382
433
  export type Lineno = number;
383
434
  export type Timestamp12 = string;
435
+ export type WorkingStart12 = number;
384
436
  export type Pending12 = boolean | null;
385
437
  export type Event12 = "info";
386
438
  export type Source4 = string | null;
387
439
  export type Timestamp13 = string;
440
+ export type WorkingStart13 = number;
388
441
  export type Pending13 = boolean | null;
389
442
  export type Event13 = "step";
390
443
  export type Action1 = "begin" | "end";
391
- export type Type11 = string | null;
444
+ export type Type12 = string | null;
392
445
  export type Name10 = string;
393
446
  export type Timestamp14 = string;
447
+ export type WorkingStart14 = number;
394
448
  export type Pending14 = boolean | null;
395
449
  export type Event14 = "subtask";
396
450
  export type Name11 = string;
397
- export type Type12 = string | null;
451
+ export type Type13 = string | null;
398
452
  export type Events2 = (
399
453
  | SampleInitEvent
400
454
  | SampleLimitEvent
@@ -412,6 +466,8 @@ export type Events2 = (
412
466
  | StepEvent
413
467
  | SubtaskEvent
414
468
  )[];
469
+ export type Completed2 = string | null;
470
+ export type WorkingTime1 = number | null;
415
471
  export type Events1 = (
416
472
  | SampleInitEvent
417
473
  | SampleLimitEvent
@@ -429,6 +485,8 @@ export type Events1 = (
429
485
  | StepEvent
430
486
  | SubtaskEvent
431
487
  )[];
488
+ export type Completed3 = string | null;
489
+ export type WorkingTime2 = number | null;
432
490
  export type Events = (
433
491
  | SampleInitEvent
434
492
  | SampleLimitEvent
@@ -447,8 +505,9 @@ export type Events = (
447
505
  | SubtaskEvent
448
506
  )[];
449
507
  export type TotalTime = number | null;
450
- export type WorkingTime = number | null;
451
- export type Type13 =
508
+ export type WorkingTime3 = number | null;
509
+ export type Uuid = string | null;
510
+ export type Type14 =
452
511
  | "context"
453
512
  | "time"
454
513
  | "working"
@@ -643,6 +702,7 @@ export interface GenerateConfig {
643
702
  max_tool_output: MaxToolOutput;
644
703
  cache_prompt: CachePrompt;
645
704
  reasoning_effort: ReasoningEffort;
705
+ reasoning_tokens: ReasoningTokens;
646
706
  reasoning_history: ReasoningHistory;
647
707
  }
648
708
  /**
@@ -699,6 +759,7 @@ export interface ModelUsage1 {
699
759
  total_tokens: TotalTokens;
700
760
  input_tokens_cache_write: InputTokensCacheWrite;
701
761
  input_tokens_cache_read: InputTokensCacheRead;
762
+ reasoning_tokens: ReasoningTokens1;
702
763
  }
703
764
  /**
704
765
  * Eval error details.
@@ -728,7 +789,8 @@ export interface EvalSample {
728
789
  events: Events;
729
790
  model_usage: ModelUsage2;
730
791
  total_time: TotalTime;
731
- working_time: WorkingTime;
792
+ working_time: WorkingTime3;
793
+ uuid: Uuid;
732
794
  error: EvalError | null;
733
795
  attachments: Attachments;
734
796
  limit: EvalSampleLimit | null;
@@ -748,11 +810,22 @@ export interface ContentText {
748
810
  type: Type1;
749
811
  text: Text;
750
812
  }
813
+ /**
814
+ * Reasoning content.
815
+ *
816
+ * See the specification for [thinking blocks](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#understanding-thinking-blocks) for Claude models.
817
+ */
818
+ export interface ContentReasoning {
819
+ type: Type2;
820
+ reasoning: Reasoning;
821
+ signature: Signature;
822
+ redacted: Redacted;
823
+ }
751
824
  /**
752
825
  * Image content.
753
826
  */
754
827
  export interface ContentImage {
755
- type: Type2;
828
+ type: Type3;
756
829
  image: Image;
757
830
  detail: Detail;
758
831
  }
@@ -760,7 +833,7 @@ export interface ContentImage {
760
833
  * Audio content.
761
834
  */
762
835
  export interface ContentAudio {
763
- type: Type3;
836
+ type: Type4;
764
837
  audio: Audio;
765
838
  format: Format;
766
839
  }
@@ -768,7 +841,7 @@ export interface ContentAudio {
768
841
  * Video content.
769
842
  */
770
843
  export interface ContentVideo {
771
- type: Type4;
844
+ type: Type5;
772
845
  video: Video;
773
846
  format: Format1;
774
847
  }
@@ -789,13 +862,12 @@ export interface ChatMessageAssistant {
789
862
  content: Content2;
790
863
  source: Source2;
791
864
  tool_calls: ToolCalls;
792
- reasoning: Reasoning;
793
865
  }
794
866
  export interface ToolCall {
795
867
  id: Id1;
796
868
  function: Function;
797
869
  arguments: Arguments;
798
- type: Type5;
870
+ type: Type6;
799
871
  parse_error: ParseError;
800
872
  view: ToolCallContent | null;
801
873
  }
@@ -820,7 +892,7 @@ export interface ChatMessageTool {
820
892
  error: ToolCallError | null;
821
893
  }
822
894
  export interface ToolCallError {
823
- type: Type6;
895
+ type: Type7;
824
896
  message: Message1;
825
897
  }
826
898
  /**
@@ -881,6 +953,7 @@ export interface Store {}
881
953
  */
882
954
  export interface SampleInitEvent {
883
955
  timestamp: Timestamp;
956
+ working_start: WorkingStart;
884
957
  pending: Pending;
885
958
  event: Event;
886
959
  sample: Sample;
@@ -904,9 +977,10 @@ export interface Sample {
904
977
  */
905
978
  export interface SampleLimitEvent {
906
979
  timestamp: Timestamp1;
980
+ working_start: WorkingStart1;
907
981
  pending: Pending1;
908
982
  event: Event1;
909
- type: Type7;
983
+ type: Type8;
910
984
  message: Message2;
911
985
  limit: Limit1;
912
986
  }
@@ -915,6 +989,7 @@ export interface SampleLimitEvent {
915
989
  */
916
990
  export interface SandboxEvent {
917
991
  timestamp: Timestamp2;
992
+ working_start: WorkingStart2;
918
993
  pending: Pending2;
919
994
  event: Event2;
920
995
  action: Action;
@@ -924,12 +999,14 @@ export interface SandboxEvent {
924
999
  input: Input2;
925
1000
  result: Result;
926
1001
  output: Output;
1002
+ completed: Completed;
927
1003
  }
928
1004
  /**
929
1005
  * Change to the current `TaskState`
930
1006
  */
931
1007
  export interface StateEvent {
932
1008
  timestamp: Timestamp3;
1009
+ working_start: WorkingStart3;
933
1010
  pending: Pending3;
934
1011
  event: Event3;
935
1012
  changes: Changes;
@@ -953,6 +1030,7 @@ export interface JsonChange {
953
1030
  */
954
1031
  export interface StoreEvent {
955
1032
  timestamp: Timestamp4;
1033
+ working_start: WorkingStart4;
956
1034
  pending: Pending4;
957
1035
  event: Event4;
958
1036
  changes: Changes1;
@@ -962,6 +1040,7 @@ export interface StoreEvent {
962
1040
  */
963
1041
  export interface ModelEvent {
964
1042
  timestamp: Timestamp5;
1043
+ working_start: WorkingStart5;
965
1044
  pending: Pending5;
966
1045
  event: Event5;
967
1046
  model: Model2;
@@ -973,6 +1052,8 @@ export interface ModelEvent {
973
1052
  error: Error1;
974
1053
  cache: Cache;
975
1054
  call: ModelCall | null;
1055
+ completed: Completed1;
1056
+ working_time: WorkingTime;
976
1057
  }
977
1058
  /**
978
1059
  * Specification of a tool (JSON Schema compatible)
@@ -1009,7 +1090,7 @@ export interface ToolInfo {
1009
1090
  * Description of tool parameters object in JSON Schema format.
1010
1091
  */
1011
1092
  export interface ToolParams {
1012
- type: Type8;
1093
+ type: Type9;
1013
1094
  properties: Properties;
1014
1095
  required: Required1;
1015
1096
  additionalProperties: Additionalproperties1;
@@ -1021,7 +1102,7 @@ export interface Properties {
1021
1102
  * Description of tool parameter in JSON Schema format.
1022
1103
  */
1023
1104
  export interface ToolParam {
1024
- type: Type9;
1105
+ type: Type10;
1025
1106
  description: Description1;
1026
1107
  default: Default;
1027
1108
  enum: Enum;
@@ -1063,6 +1144,7 @@ export interface GenerateConfig1 {
1063
1144
  max_tool_output: MaxToolOutput;
1064
1145
  cache_prompt: CachePrompt;
1065
1146
  reasoning_effort: ReasoningEffort;
1147
+ reasoning_tokens: ReasoningTokens;
1066
1148
  reasoning_history: ReasoningHistory;
1067
1149
  }
1068
1150
  /**
@@ -1084,9 +1166,10 @@ export interface Response {
1084
1166
  */
1085
1167
  export interface ToolEvent {
1086
1168
  timestamp: Timestamp6;
1169
+ working_start: WorkingStart6;
1087
1170
  pending: Pending6;
1088
1171
  event: Event6;
1089
- type: Type10;
1172
+ type: Type11;
1090
1173
  id: Id3;
1091
1174
  function: Function2;
1092
1175
  arguments: Arguments1;
@@ -1095,6 +1178,8 @@ export interface ToolEvent {
1095
1178
  truncated: Truncated;
1096
1179
  error: ToolCallError | null;
1097
1180
  events: Events1;
1181
+ completed: Completed3;
1182
+ working_time: WorkingTime2;
1098
1183
  }
1099
1184
  export interface Arguments1 {
1100
1185
  [k: string]: JsonValue;
@@ -1104,6 +1189,7 @@ export interface Arguments1 {
1104
1189
  */
1105
1190
  export interface ApprovalEvent {
1106
1191
  timestamp: Timestamp7;
1192
+ working_start: WorkingStart7;
1107
1193
  pending: Pending7;
1108
1194
  event: Event7;
1109
1195
  message: Message3;
@@ -1129,6 +1215,7 @@ export interface ToolCallView {
1129
1215
  */
1130
1216
  export interface InputEvent {
1131
1217
  timestamp: Timestamp8;
1218
+ working_start: WorkingStart8;
1132
1219
  pending: Pending8;
1133
1220
  event: Event8;
1134
1221
  input: Input4;
@@ -1142,6 +1229,7 @@ export interface InputEvent {
1142
1229
  */
1143
1230
  export interface ScoreEvent {
1144
1231
  timestamp: Timestamp9;
1232
+ working_start: WorkingStart9;
1145
1233
  pending: Pending9;
1146
1234
  event: Event9;
1147
1235
  score: Score;
@@ -1153,6 +1241,7 @@ export interface ScoreEvent {
1153
1241
  */
1154
1242
  export interface ErrorEvent {
1155
1243
  timestamp: Timestamp10;
1244
+ working_start: WorkingStart10;
1156
1245
  pending: Pending10;
1157
1246
  event: Event10;
1158
1247
  error: EvalError;
@@ -1162,6 +1251,7 @@ export interface ErrorEvent {
1162
1251
  */
1163
1252
  export interface LoggerEvent {
1164
1253
  timestamp: Timestamp11;
1254
+ working_start: WorkingStart11;
1165
1255
  pending: Pending11;
1166
1256
  event: Event11;
1167
1257
  message: LoggingMessage;
@@ -1183,6 +1273,7 @@ export interface LoggingMessage {
1183
1273
  */
1184
1274
  export interface InfoEvent {
1185
1275
  timestamp: Timestamp12;
1276
+ working_start: WorkingStart12;
1186
1277
  pending: Pending12;
1187
1278
  event: Event12;
1188
1279
  source: Source4;
@@ -1193,10 +1284,11 @@ export interface InfoEvent {
1193
1284
  */
1194
1285
  export interface StepEvent {
1195
1286
  timestamp: Timestamp13;
1287
+ working_start: WorkingStart13;
1196
1288
  pending: Pending13;
1197
1289
  event: Event13;
1198
1290
  action: Action1;
1199
- type: Type11;
1291
+ type: Type12;
1200
1292
  name: Name10;
1201
1293
  }
1202
1294
  /**
@@ -1204,13 +1296,16 @@ export interface StepEvent {
1204
1296
  */
1205
1297
  export interface SubtaskEvent {
1206
1298
  timestamp: Timestamp14;
1299
+ working_start: WorkingStart14;
1207
1300
  pending: Pending14;
1208
1301
  event: Event14;
1209
1302
  name: Name11;
1210
- type: Type12;
1303
+ type: Type13;
1211
1304
  input: Input5;
1212
1305
  result: Result2;
1213
1306
  events: Events2;
1307
+ completed: Completed2;
1308
+ working_time: WorkingTime1;
1214
1309
  }
1215
1310
  export interface Input5 {}
1216
1311
  export interface Result2 {
@@ -1226,7 +1321,7 @@ export interface Attachments {
1226
1321
  * Limit encontered by sample.
1227
1322
  */
1228
1323
  export interface EvalSampleLimit {
1229
- type: Type13;
1324
+ type: Type14;
1230
1325
  limit: Limit2;
1231
1326
  }
1232
1327
  /**
@@ -1,11 +1,13 @@
1
+ import { FC } from "react";
2
+ import { ModelUsage, ModelUsage2 } from "../types/log";
1
3
  import { TokenHeader, TokenRow, TokenTable } from "./TokenTable";
2
4
 
3
- interface ModelTokenTable {
4
- model_usage: any;
5
+ interface ModelTokenTableProps {
6
+ model_usage: ModelUsage | ModelUsage2;
5
7
  className?: string | string[];
6
8
  }
7
9
 
8
- export const ModelTokenTable: React.FC<ModelTokenTable> = ({
10
+ export const ModelTokenTable: FC<ModelTokenTableProps> = ({
9
11
  model_usage,
10
12
  className,
11
13
  }) => {
@@ -14,13 +16,7 @@ export const ModelTokenTable: React.FC<ModelTokenTable> = ({
14
16
  <TokenHeader />
15
17
  <tbody>
16
18
  {Object.keys(model_usage).map((key) => {
17
- return (
18
- <TokenRow
19
- key={key}
20
- model={`${key}-token-row`}
21
- usage={model_usage[key]}
22
- />
23
- );
19
+ return <TokenRow key={key} model={key} usage={model_usage[key]} />;
24
20
  })}
25
21
  </tbody>
26
22
  </TokenTable>
@@ -22,3 +22,7 @@
22
22
  height: 1px;
23
23
  background-color: var(--bs-light-border-subtle);
24
24
  }
25
+
26
+ .padded {
27
+ margin-bottom: 1em;
28
+ }
@@ -1,5 +1,5 @@
1
1
  import clsx from "clsx";
2
- import { Fragment } from "react";
2
+ import { FC, Fragment } from "react";
3
3
  import { ModelUsage1 } from "../types/log";
4
4
  import { formatNumber } from "../utils/format";
5
5
  import styles from "./ModelUsagePanel.module.css";
@@ -13,23 +13,40 @@ interface ModelUsageRow {
13
13
  value?: number;
14
14
  secondary?: boolean;
15
15
  bordered?: boolean;
16
+ padded?: boolean;
16
17
  }
17
18
 
18
19
  /**
19
20
  * Renders the ModelUsagePanel component.
20
21
  */
21
- export const ModelUsagePanel: React.FC<ModelUsageProps> = ({ usage }) => {
22
+ export const ModelUsagePanel: FC<ModelUsageProps> = ({ usage }) => {
22
23
  if (!usage) {
23
24
  return null;
24
25
  }
25
26
 
26
- const rows: ModelUsageRow[] = [
27
- {
28
- label: "input",
29
- value: usage.input_tokens,
27
+ const rows: ModelUsageRow[] = [];
28
+
29
+ if (usage.reasoning_tokens) {
30
+ rows.push({
31
+ label: "Reasoning",
32
+ value: usage.reasoning_tokens,
33
+ secondary: false,
34
+ bordered: true,
35
+ });
36
+
37
+ rows.push({
38
+ label: "---",
39
+ value: undefined,
30
40
  secondary: false,
31
- },
32
- ];
41
+ padded: true,
42
+ });
43
+ }
44
+
45
+ rows.push({
46
+ label: "input",
47
+ value: usage.input_tokens,
48
+ secondary: false,
49
+ });
33
50
 
34
51
  if (usage.input_tokens_cache_read) {
35
52
  rows.push({
@@ -71,7 +88,13 @@ export const ModelUsagePanel: React.FC<ModelUsageProps> = ({ usage }) => {
71
88
  {rows.map((row, idx) => {
72
89
  if (row.label === "---") {
73
90
  return (
74
- <div key={`$usage-sep-${idx}`} className={styles.separator}></div>
91
+ <div
92
+ key={`$usage-sep-${idx}`}
93
+ className={clsx(
94
+ styles.separator,
95
+ row.padded ? styles.padded : undefined,
96
+ )}
97
+ ></div>
75
98
  );
76
99
  } else {
77
100
  return (
@@ -1,17 +1,15 @@
1
1
  import clsx from "clsx";
2
+ import { FC, ReactNode } from "react";
2
3
  import { ModelUsage1 } from "../types/log";
3
4
  import { ModelUsagePanel } from "./ModelUsagePanel";
4
5
  import styles from "./TokenTable.module.css";
5
6
 
6
7
  interface TokenTableProps {
7
8
  className?: string | string[];
8
- children?: React.ReactNode;
9
+ children?: ReactNode;
9
10
  }
10
11
 
11
- export const TokenTable: React.FC<TokenTableProps> = ({
12
- className,
13
- children,
14
- }) => {
12
+ export const TokenTable: FC<TokenTableProps> = ({ className, children }) => {
15
13
  return (
16
14
  <table
17
15
  className={clsx(
@@ -77,7 +75,7 @@ interface TokenRowProps {
77
75
  usage: ModelUsage1;
78
76
  }
79
77
 
80
- export const TokenRow: React.FC<TokenRowProps> = ({ model, usage }) => {
78
+ export const TokenRow: FC<TokenRowProps> = ({ model, usage }) => {
81
79
  return (
82
80
  <tr>
83
81
  <td>