inspect-ai 0.3.102__py3-none-any.whl → 0.3.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. inspect_ai/_cli/common.py +2 -1
  2. inspect_ai/_cli/eval.py +2 -1
  3. inspect_ai/_display/core/active.py +3 -0
  4. inspect_ai/_display/core/config.py +1 -0
  5. inspect_ai/_display/core/panel.py +21 -13
  6. inspect_ai/_display/core/results.py +3 -7
  7. inspect_ai/_display/core/rich.py +3 -5
  8. inspect_ai/_display/log/__init__.py +0 -0
  9. inspect_ai/_display/log/display.py +173 -0
  10. inspect_ai/_display/plain/display.py +2 -2
  11. inspect_ai/_display/rich/display.py +2 -4
  12. inspect_ai/_display/textual/app.py +1 -6
  13. inspect_ai/_display/textual/widgets/task_detail.py +3 -14
  14. inspect_ai/_display/textual/widgets/tasks.py +1 -1
  15. inspect_ai/_eval/eval.py +14 -2
  16. inspect_ai/_eval/evalset.py +3 -2
  17. inspect_ai/_eval/registry.py +6 -1
  18. inspect_ai/_eval/run.py +7 -1
  19. inspect_ai/_eval/task/constants.py +1 -0
  20. inspect_ai/_eval/task/log.py +5 -1
  21. inspect_ai/_eval/task/run.py +1 -1
  22. inspect_ai/_util/citation.py +88 -0
  23. inspect_ai/_util/content.py +24 -2
  24. inspect_ai/_util/json.py +17 -2
  25. inspect_ai/_util/registry.py +19 -4
  26. inspect_ai/_view/schema.py +0 -6
  27. inspect_ai/_view/www/dist/assets/index.css +82 -24
  28. inspect_ai/_view/www/dist/assets/index.js +10124 -9808
  29. inspect_ai/_view/www/log-schema.json +418 -1
  30. inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
  31. inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
  32. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
  33. inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
  34. inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
  35. inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
  36. inspect_ai/_view/www/package.json +2 -2
  37. inspect_ai/_view/www/src/@types/log.d.ts +140 -39
  38. inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
  39. inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
  40. inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
  41. inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
  42. inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
  43. inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
  44. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
  45. inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
  46. inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
  47. inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
  48. inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
  49. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
  50. inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
  51. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
  52. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
  53. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
  54. inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
  55. inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
  56. inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
  57. inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
  58. inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
  59. inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
  60. inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
  61. inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
  62. inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
  63. inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
  64. inspect_ai/_view/www/src/tests/README.md +2 -2
  65. inspect_ai/_view/www/src/utils/git.ts +3 -1
  66. inspect_ai/_view/www/src/utils/html.ts +6 -0
  67. inspect_ai/agent/_handoff.py +3 -3
  68. inspect_ai/log/_condense.py +5 -0
  69. inspect_ai/log/_file.py +4 -1
  70. inspect_ai/log/_log.py +9 -4
  71. inspect_ai/log/_recorders/eval.py +4 -3
  72. inspect_ai/log/_recorders/json.py +5 -2
  73. inspect_ai/log/_recorders/recorder.py +1 -0
  74. inspect_ai/log/_util.py +2 -0
  75. inspect_ai/model/__init__.py +14 -0
  76. inspect_ai/model/_call_tools.py +13 -4
  77. inspect_ai/model/_chat_message.py +3 -0
  78. inspect_ai/model/_openai_responses.py +80 -34
  79. inspect_ai/model/_providers/_anthropic_citations.py +158 -0
  80. inspect_ai/model/_providers/_google_citations.py +100 -0
  81. inspect_ai/model/_providers/anthropic.py +196 -34
  82. inspect_ai/model/_providers/google.py +94 -22
  83. inspect_ai/model/_providers/mistral.py +20 -7
  84. inspect_ai/model/_providers/openai.py +11 -10
  85. inspect_ai/model/_providers/openai_compatible.py +3 -2
  86. inspect_ai/model/_providers/openai_responses.py +2 -5
  87. inspect_ai/model/_providers/perplexity.py +123 -0
  88. inspect_ai/model/_providers/providers.py +13 -2
  89. inspect_ai/model/_providers/vertex.py +3 -0
  90. inspect_ai/model/_trim.py +5 -0
  91. inspect_ai/tool/__init__.py +14 -0
  92. inspect_ai/tool/_mcp/_mcp.py +5 -2
  93. inspect_ai/tool/_mcp/sampling.py +19 -3
  94. inspect_ai/tool/_mcp/server.py +1 -1
  95. inspect_ai/tool/_tool.py +10 -1
  96. inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
  97. inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
  98. inspect_ai/tool/_tools/_web_search/_google.py +22 -25
  99. inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
  100. inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
  101. inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
  102. inspect_ai/util/_display.py +11 -2
  103. inspect_ai/util/_sandbox/docker/compose.py +2 -2
  104. inspect_ai/util/_span.py +12 -1
  105. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/METADATA +2 -2
  106. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/RECORD +112 -88
  107. /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
  108. /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
  109. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/WHEEL +0 -0
  110. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/entry_points.txt +0 -0
  111. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/licenses/LICENSE +0 -0
  112. {inspect_ai-0.3.102.dist-info → inspect_ai-0.3.104.dist-info}/top_level.txt +0 -0
@@ -194,24 +194,55 @@ export type Content =
194
194
  | ContentImage
195
195
  | ContentAudio
196
196
  | ContentVideo
197
+ | ContentData
197
198
  )[];
198
199
  export type Type3 = "text";
199
200
  export type Text = string;
200
201
  export type Refusal = boolean | null;
201
- export type Type4 = "reasoning";
202
+ export type Citations =
203
+ | (ContentCitation | DocumentCitation | UrlCitation)[]
204
+ | null;
205
+ export type CitedText = string | [number, number] | null;
206
+ export type Title = string | null;
207
+ export type Internal = {
208
+ [k: string]: JsonValue;
209
+ } | null;
210
+ export type JsonValue = unknown;
211
+ export type Type4 = "content";
212
+ export type CitedText1 = string | [number, number] | null;
213
+ export type Title1 = string | null;
214
+ export type Internal1 = {
215
+ [k: string]: JsonValue;
216
+ } | null;
217
+ export type Type5 = "document";
218
+ export type Type6 = "block" | "page" | "char";
219
+ export type StartIndex = number;
220
+ export type EndIndex = number;
221
+ export type CitedText2 = string | [number, number] | null;
222
+ export type Title2 = string | null;
223
+ export type Internal2 = {
224
+ [k: string]: JsonValue;
225
+ } | null;
226
+ export type Type7 = "url";
227
+ export type Url = string;
228
+ export type Type8 = "reasoning";
202
229
  export type Reasoning = string;
203
230
  export type Signature = string | null;
204
231
  export type Redacted = boolean;
205
- export type Type5 = "image";
232
+ export type Type9 = "image";
206
233
  export type Image = string;
207
234
  export type Detail = "auto" | "low" | "high";
208
- export type Type6 = "audio";
235
+ export type Type10 = "audio";
209
236
  export type Audio = string;
210
237
  export type Format1 = "wav" | "mp3";
211
- export type Type7 = "video";
238
+ export type Type11 = "video";
212
239
  export type Video = string;
213
240
  export type Format2 = "mp4" | "mpeg" | "mov";
241
+ export type Type12 = "data";
214
242
  export type Source = ("input" | "generate") | null;
243
+ export type Metadata5 = {
244
+ [k: string]: unknown;
245
+ } | null;
215
246
  export type Role = "system";
216
247
  export type Id2 = string | null;
217
248
  export type Content1 =
@@ -222,8 +253,12 @@ export type Content1 =
222
253
  | ContentImage
223
254
  | ContentAudio
224
255
  | ContentVideo
256
+ | ContentData
225
257
  )[];
226
258
  export type Source1 = ("input" | "generate") | null;
259
+ export type Metadata6 = {
260
+ [k: string]: unknown;
261
+ } | null;
227
262
  export type Role1 = "user";
228
263
  export type ToolCallId = string[] | null;
229
264
  export type Id3 = string | null;
@@ -235,17 +270,21 @@ export type Content2 =
235
270
  | ContentImage
236
271
  | ContentAudio
237
272
  | ContentVideo
273
+ | ContentData
238
274
  )[];
239
275
  export type Source2 = ("input" | "generate") | null;
276
+ export type Metadata7 = {
277
+ [k: string]: unknown;
278
+ } | null;
240
279
  export type Role2 = "assistant";
241
280
  export type ToolCalls = ToolCall[] | null;
242
281
  export type Id4 = string;
243
282
  export type Function = string;
244
283
  export type ParseError = string | null;
245
- export type Title = string | null;
284
+ export type Title3 = string | null;
246
285
  export type Format3 = "text" | "markdown";
247
286
  export type Content3 = string;
248
- export type Type8 = string | null;
287
+ export type Type13 = string | null;
249
288
  export type Model2 = string | null;
250
289
  export type Id5 = string | null;
251
290
  export type Content4 =
@@ -256,12 +295,16 @@ export type Content4 =
256
295
  | ContentImage
257
296
  | ContentAudio
258
297
  | ContentVideo
298
+ | ContentData
259
299
  )[];
260
300
  export type Source3 = ("input" | "generate") | null;
301
+ export type Metadata8 = {
302
+ [k: string]: unknown;
303
+ } | null;
261
304
  export type Role3 = "tool";
262
305
  export type ToolCallId1 = string | null;
263
306
  export type Function1 = string | null;
264
- export type Type9 =
307
+ export type Type14 =
265
308
  | "parsing"
266
309
  | "timeout"
267
310
  | "unicode_decode"
@@ -301,7 +344,7 @@ export type Bytes1 = number[] | null;
301
344
  export type Content5 = Logprob[];
302
345
  export type Choices1 = ChatCompletionChoice[];
303
346
  export type Time = number | null;
304
- export type Metadata5 = {
347
+ export type Metadata9 = {
305
348
  [k: string]: unknown;
306
349
  } | null;
307
350
  export type Error = string | null;
@@ -318,7 +361,7 @@ export type Value1 =
318
361
  };
319
362
  export type Answer = string | null;
320
363
  export type Explanation = string | null;
321
- export type Metadata6 = {
364
+ export type Metadata10 = {
322
365
  [k: string]: unknown;
323
366
  } | null;
324
367
  export type SpanId = string | null;
@@ -337,20 +380,19 @@ export type Input1 =
337
380
  export type Choices2 = string[] | null;
338
381
  export type Target1 = string | string[];
339
382
  export type Id6 = number | string | null;
340
- export type Metadata8 = {
383
+ export type Metadata12 = {
341
384
  [k: string]: unknown;
342
385
  } | null;
343
386
  export type Files1 = {
344
387
  [k: string]: string;
345
388
  } | null;
346
389
  export type Setup1 = string | null;
347
- export type JsonValue = unknown;
348
390
  export type SpanId1 = string | null;
349
391
  export type Timestamp1 = string;
350
392
  export type WorkingStart1 = number;
351
393
  export type Pending1 = boolean | null;
352
394
  export type Event1 = "sample_limit";
353
- export type Type10 =
395
+ export type Type15 =
354
396
  | "message"
355
397
  | "time"
356
398
  | "working"
@@ -404,7 +446,7 @@ export type Input3 = (
404
446
  )[];
405
447
  export type Name8 = string;
406
448
  export type Description2 = string;
407
- export type Type11 = "object";
449
+ export type Type16 = "object";
408
450
  export type Required1 = string[];
409
451
  export type Additionalproperties1 = boolean;
410
452
  export type Options3 = {
@@ -424,7 +466,7 @@ export type Timestamp6 = string;
424
466
  export type WorkingStart6 = number;
425
467
  export type Pending6 = boolean | null;
426
468
  export type Event6 = "tool";
427
- export type Type12 = "function";
469
+ export type Type17 = "function";
428
470
  export type Id7 = string;
429
471
  export type Function2 = string;
430
472
  export type Result1 =
@@ -436,12 +478,14 @@ export type Result1 =
436
478
  | ContentImage
437
479
  | ContentAudio
438
480
  | ContentVideo
481
+ | ContentData
439
482
  | (
440
483
  | ContentText
441
484
  | ContentReasoning
442
485
  | ContentImage
443
486
  | ContentAudio
444
487
  | ContentVideo
488
+ | ContentData
445
489
  )[];
446
490
  export type Truncated = [unknown, unknown] | null;
447
491
  export type SpanId7 = string | null;
@@ -510,7 +554,7 @@ export type Pending13 = boolean | null;
510
554
  export type Event13 = "span_begin";
511
555
  export type Id8 = string;
512
556
  export type ParentId = string | null;
513
- export type Type13 = string | null;
557
+ export type Type18 = string | null;
514
558
  export type Name11 = string;
515
559
  export type SpanId14 = string | null;
516
560
  export type Timestamp14 = string;
@@ -524,7 +568,7 @@ export type WorkingStart15 = number;
524
568
  export type Pending15 = boolean | null;
525
569
  export type Event15 = "step";
526
570
  export type Action1 = "begin" | "end";
527
- export type Type14 = string | null;
571
+ export type Type19 = string | null;
528
572
  export type Name12 = string;
529
573
  export type SpanId16 = string | null;
530
574
  export type Timestamp16 = string;
@@ -532,7 +576,7 @@ export type WorkingStart16 = number;
532
576
  export type Pending16 = boolean | null;
533
577
  export type Event16 = "subtask";
534
578
  export type Name13 = string;
535
- export type Type15 = string | null;
579
+ export type Type20 = string | null;
536
580
  export type Events2 = (
537
581
  | SampleInitEvent
538
582
  | SampleLimitEvent
@@ -600,7 +644,7 @@ export type TotalTime = number | null;
600
644
  export type WorkingTime3 = number | null;
601
645
  export type Uuid = string | null;
602
646
  export type ErrorRetries = EvalError[] | null;
603
- export type Type16 =
647
+ export type Type21 =
604
648
  | "context"
605
649
  | "time"
606
650
  | "working"
@@ -622,7 +666,7 @@ export type Value2 =
622
666
  };
623
667
  export type Answer1 = string | null;
624
668
  export type Explanation2 = string | null;
625
- export type Metadata9 = {
669
+ export type Metadata13 = {
626
670
  [k: string]: unknown;
627
671
  } | null;
628
672
  export type SampleId1 = string | number | null;
@@ -658,6 +702,7 @@ export interface EvalSpec {
658
702
  task_registry_name: TaskRegistryName;
659
703
  task_attribs: TaskAttribs;
660
704
  task_args: TaskArgs;
705
+ task_args_passed: TaskArgsPassed;
661
706
  solver: Solver;
662
707
  solver_args: SolverArgs;
663
708
  tags: Tags;
@@ -681,6 +726,9 @@ export interface TaskAttribs {
681
726
  export interface TaskArgs {
682
727
  [k: string]: unknown;
683
728
  }
729
+ export interface TaskArgsPassed {
730
+ [k: string]: unknown;
731
+ }
684
732
  /**
685
733
  * Dataset used for evaluation.
686
734
  */
@@ -984,7 +1032,7 @@ export interface EvalSample {
984
1032
  messages: Messages;
985
1033
  output: ModelOutput;
986
1034
  scores: Scores1;
987
- metadata: Metadata7;
1035
+ metadata: Metadata11;
988
1036
  store: Store;
989
1037
  events: Events;
990
1038
  model_usage: ModelUsage2;
@@ -1003,6 +1051,7 @@ export interface ChatMessageSystem {
1003
1051
  id: Id1;
1004
1052
  content: Content;
1005
1053
  source: Source;
1054
+ metadata: Metadata5;
1006
1055
  internal: unknown;
1007
1056
  role: Role;
1008
1057
  }
@@ -1014,6 +1063,44 @@ export interface ContentText {
1014
1063
  type: Type3;
1015
1064
  text: Text;
1016
1065
  refusal: Refusal;
1066
+ citations: Citations;
1067
+ }
1068
+ /**
1069
+ * A generic content citation.
1070
+ */
1071
+ export interface ContentCitation {
1072
+ cited_text: CitedText;
1073
+ title: Title;
1074
+ internal: Internal;
1075
+ type: Type4;
1076
+ }
1077
+ /**
1078
+ * A citation that refers to a page range in a document.
1079
+ */
1080
+ export interface DocumentCitation {
1081
+ cited_text: CitedText1;
1082
+ title: Title1;
1083
+ internal: Internal1;
1084
+ type: Type5;
1085
+ range: DocumentRange | null;
1086
+ }
1087
+ /**
1088
+ * A range specifying a section of a document.
1089
+ */
1090
+ export interface DocumentRange {
1091
+ type: Type6;
1092
+ start_index: StartIndex;
1093
+ end_index: EndIndex;
1094
+ }
1095
+ /**
1096
+ * A citation that refers to a URL.
1097
+ */
1098
+ export interface UrlCitation {
1099
+ cited_text: CitedText2;
1100
+ title: Title2;
1101
+ internal: Internal2;
1102
+ type: Type7;
1103
+ url: Url;
1017
1104
  }
1018
1105
  /**
1019
1106
  * Reasoning content.
@@ -1022,7 +1109,7 @@ export interface ContentText {
1022
1109
  */
1023
1110
  export interface ContentReasoning {
1024
1111
  internal: unknown;
1025
- type: Type4;
1112
+ type: Type8;
1026
1113
  reasoning: Reasoning;
1027
1114
  signature: Signature;
1028
1115
  redacted: Redacted;
@@ -1032,7 +1119,7 @@ export interface ContentReasoning {
1032
1119
  */
1033
1120
  export interface ContentImage {
1034
1121
  internal: unknown;
1035
- type: Type5;
1122
+ type: Type9;
1036
1123
  image: Image;
1037
1124
  detail: Detail;
1038
1125
  }
@@ -1041,7 +1128,7 @@ export interface ContentImage {
1041
1128
  */
1042
1129
  export interface ContentAudio {
1043
1130
  internal: unknown;
1044
- type: Type6;
1131
+ type: Type10;
1045
1132
  audio: Audio;
1046
1133
  format: Format1;
1047
1134
  }
@@ -1050,10 +1137,21 @@ export interface ContentAudio {
1050
1137
  */
1051
1138
  export interface ContentVideo {
1052
1139
  internal: unknown;
1053
- type: Type7;
1140
+ type: Type11;
1054
1141
  video: Video;
1055
1142
  format: Format2;
1056
1143
  }
1144
+ /**
1145
+ * Model internal.
1146
+ */
1147
+ export interface ContentData {
1148
+ internal: unknown;
1149
+ type: Type12;
1150
+ data: Data;
1151
+ }
1152
+ export interface Data {
1153
+ [k: string]: JsonValue;
1154
+ }
1057
1155
  /**
1058
1156
  * User chat message.
1059
1157
  */
@@ -1061,6 +1159,7 @@ export interface ChatMessageUser {
1061
1159
  id: Id2;
1062
1160
  content: Content1;
1063
1161
  source: Source1;
1162
+ metadata: Metadata6;
1064
1163
  internal: unknown;
1065
1164
  role: Role1;
1066
1165
  tool_call_id: ToolCallId;
@@ -1072,6 +1171,7 @@ export interface ChatMessageAssistant {
1072
1171
  id: Id3;
1073
1172
  content: Content2;
1074
1173
  source: Source2;
1174
+ metadata: Metadata7;
1075
1175
  internal: unknown;
1076
1176
  role: Role2;
1077
1177
  tool_calls: ToolCalls;
@@ -1084,7 +1184,7 @@ export interface ToolCall {
1084
1184
  internal: unknown;
1085
1185
  parse_error: ParseError;
1086
1186
  view: ToolCallContent | null;
1087
- type: Type8;
1187
+ type: Type13;
1088
1188
  }
1089
1189
  export interface Arguments {
1090
1190
  [k: string]: unknown;
@@ -1093,7 +1193,7 @@ export interface Arguments {
1093
1193
  * Content to include in tool call view.
1094
1194
  */
1095
1195
  export interface ToolCallContent {
1096
- title: Title;
1196
+ title: Title3;
1097
1197
  format: Format3;
1098
1198
  content: Content3;
1099
1199
  }
@@ -1104,6 +1204,7 @@ export interface ChatMessageTool {
1104
1204
  id: Id5;
1105
1205
  content: Content4;
1106
1206
  source: Source3;
1207
+ metadata: Metadata8;
1107
1208
  internal: unknown;
1108
1209
  role: Role3;
1109
1210
  tool_call_id: ToolCallId1;
@@ -1111,7 +1212,7 @@ export interface ChatMessageTool {
1111
1212
  error: ToolCallError | null;
1112
1213
  }
1113
1214
  export interface ToolCallError {
1114
- type: Type9;
1215
+ type: Type14;
1115
1216
  message: Message1;
1116
1217
  }
1117
1218
  /**
@@ -1122,7 +1223,7 @@ export interface ModelOutput {
1122
1223
  choices: Choices1;
1123
1224
  usage: ModelUsage1 | null;
1124
1225
  time: Time;
1125
- metadata: Metadata5;
1226
+ metadata: Metadata9;
1126
1227
  error: Error;
1127
1228
  }
1128
1229
  /**
@@ -1163,9 +1264,9 @@ export interface Score {
1163
1264
  value: Value1;
1164
1265
  answer: Answer;
1165
1266
  explanation: Explanation;
1166
- metadata: Metadata6;
1267
+ metadata: Metadata10;
1167
1268
  }
1168
- export interface Metadata7 {
1269
+ export interface Metadata11 {
1169
1270
  [k: string]: unknown;
1170
1271
  }
1171
1272
  export interface Store {
@@ -1191,7 +1292,7 @@ export interface Sample {
1191
1292
  choices: Choices2;
1192
1293
  target: Target1;
1193
1294
  id: Id6;
1194
- metadata: Metadata8;
1295
+ metadata: Metadata12;
1195
1296
  sandbox: SandboxEnvironmentSpec | null;
1196
1297
  files: Files1;
1197
1298
  setup: Setup1;
@@ -1205,7 +1306,7 @@ export interface SampleLimitEvent {
1205
1306
  working_start: WorkingStart1;
1206
1307
  pending: Pending1;
1207
1308
  event: Event1;
1208
- type: Type10;
1309
+ type: Type15;
1209
1310
  message: Message2;
1210
1311
  limit: Limit1;
1211
1312
  }
@@ -1322,7 +1423,7 @@ export interface ToolInfo {
1322
1423
  * Description of tool parameters object in JSON Schema format.
1323
1424
  */
1324
1425
  export interface ToolParams {
1325
- type: Type11;
1426
+ type: Type16;
1326
1427
  properties: Properties1;
1327
1428
  required: Required1;
1328
1429
  additionalProperties: Additionalproperties1;
@@ -1356,7 +1457,7 @@ export interface ToolEvent {
1356
1457
  working_start: WorkingStart6;
1357
1458
  pending: Pending6;
1358
1459
  event: Event6;
1359
- type: Type12;
1460
+ type: Type17;
1360
1461
  id: Id7;
1361
1462
  function: Function2;
1362
1463
  arguments: Arguments1;
@@ -1486,7 +1587,7 @@ export interface SpanBeginEvent {
1486
1587
  event: Event13;
1487
1588
  id: Id8;
1488
1589
  parent_id: ParentId;
1489
- type: Type13;
1590
+ type: Type18;
1490
1591
  name: Name11;
1491
1592
  }
1492
1593
  /**
@@ -1510,7 +1611,7 @@ export interface StepEvent {
1510
1611
  pending: Pending15;
1511
1612
  event: Event15;
1512
1613
  action: Action1;
1513
- type: Type14;
1614
+ type: Type19;
1514
1615
  name: Name12;
1515
1616
  }
1516
1617
  /**
@@ -1523,7 +1624,7 @@ export interface SubtaskEvent {
1523
1624
  pending: Pending16;
1524
1625
  event: Event16;
1525
1626
  name: Name13;
1526
- type: Type15;
1627
+ type: Type20;
1527
1628
  input: Input5;
1528
1629
  result: Result2;
1529
1630
  events: Events2;
@@ -1546,7 +1647,7 @@ export interface Attachments {
1546
1647
  * Limit encountered by sample.
1547
1648
  */
1548
1649
  export interface EvalSampleLimit {
1549
- type: Type16;
1650
+ type: Type21;
1550
1651
  limit: Limit2;
1551
1652
  }
1552
1653
  /**
@@ -1564,6 +1665,6 @@ export interface EvalSampleScore {
1564
1665
  value: Value2;
1565
1666
  answer: Answer1;
1566
1667
  explanation: Explanation2;
1567
- metadata: Metadata9;
1668
+ metadata: Metadata13;
1568
1669
  sample_id: SampleId1;
1569
1670
  }
@@ -204,6 +204,19 @@ export const RecordTree: FC<RecordTreeProps> = ({
204
204
  );
205
205
  };
206
206
 
207
+ if (!scrollRef) {
208
+ // No virtualization - render directly
209
+ return (
210
+ <div
211
+ id={id}
212
+ className={clsx(className, "samples-list")}
213
+ style={{ width: "100%" }}
214
+ tabIndex={0}
215
+ >
216
+ {items.map((_, index) => renderRow(index))}
217
+ </div>
218
+ );
219
+ }
207
220
  return (
208
221
  <Virtuoso
209
222
  ref={listHandle}
@@ -13,7 +13,7 @@ import { Navbar } from "./navbar/Navbar";
13
13
 
14
14
  import { useEvalSpec, useRefreshLog } from "../../state/hooks";
15
15
  import { useStore } from "../../state/store";
16
- import { useLogNavigation } from "../routing/navigationHooks";
16
+ import { useLogNavigation } from "../routing/logNavigation";
17
17
  import styles from "./LogView.module.css";
18
18
  import { useInfoTabConfig } from "./tabs/InfoTab";
19
19
  import { useJsonTabConfig } from "./tabs/JsonTab";
@@ -0,0 +1,31 @@
1
+ import { useCallback } from "react";
2
+ import { useNavigate, useParams } from "react-router-dom";
3
+ import { useStore } from "../../state/store";
4
+ import { logUrl, logUrlRaw } from "./url";
5
+
6
+ export const useLogNavigation = () => {
7
+ const navigate = useNavigate();
8
+ const { logPath } = useParams<{ logPath: string }>();
9
+ const logs = useStore((state) => state.logs.logs);
10
+ const loadedLog = useStore((state) => state.log.loadedLog);
11
+
12
+ const selectTab = useCallback(
13
+ (tabId: string) => {
14
+ // Only update URL if we have a loaded log
15
+ if (loadedLog && logPath) {
16
+ // We already have the logPath from params, just navigate to the tab
17
+ const url = logUrlRaw(logPath, tabId);
18
+ navigate(url);
19
+ } else if (loadedLog) {
20
+ // Fallback to constructing the path if needed
21
+ const url = logUrl(loadedLog, logs.log_dir, tabId);
22
+ navigate(url);
23
+ }
24
+ },
25
+ [loadedLog, logPath, logs.log_dir, navigate],
26
+ );
27
+
28
+ return {
29
+ selectTab,
30
+ };
31
+ };