inspect-ai 0.3.103__py3-none-any.whl → 0.3.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +2 -1
- inspect_ai/_cli/eval.py +2 -2
- inspect_ai/_display/core/active.py +3 -0
- inspect_ai/_display/core/config.py +1 -0
- inspect_ai/_display/core/panel.py +21 -13
- inspect_ai/_display/core/results.py +3 -7
- inspect_ai/_display/core/rich.py +3 -5
- inspect_ai/_display/log/__init__.py +0 -0
- inspect_ai/_display/log/display.py +173 -0
- inspect_ai/_display/plain/display.py +2 -2
- inspect_ai/_display/rich/display.py +2 -4
- inspect_ai/_display/textual/app.py +1 -6
- inspect_ai/_display/textual/widgets/task_detail.py +3 -14
- inspect_ai/_display/textual/widgets/tasks.py +1 -1
- inspect_ai/_eval/eval.py +1 -1
- inspect_ai/_eval/evalset.py +3 -3
- inspect_ai/_eval/registry.py +6 -1
- inspect_ai/_eval/run.py +5 -1
- inspect_ai/_eval/task/constants.py +1 -0
- inspect_ai/_eval/task/log.py +2 -0
- inspect_ai/_eval/task/run.py +65 -39
- inspect_ai/_util/citation.py +88 -0
- inspect_ai/_util/content.py +24 -2
- inspect_ai/_util/json.py +17 -2
- inspect_ai/_util/registry.py +19 -4
- inspect_ai/_view/schema.py +0 -6
- inspect_ai/_view/server.py +17 -0
- inspect_ai/_view/www/dist/assets/index.css +93 -31
- inspect_ai/_view/www/dist/assets/index.js +10639 -10011
- inspect_ai/_view/www/log-schema.json +418 -1
- inspect_ai/_view/www/node_modules/flatted/python/flatted.py +149 -0
- inspect_ai/_view/www/node_modules/katex/src/fonts/generate_fonts.py +58 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_tfms.py +114 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/extract_ttfs.py +122 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/format_json.py +28 -0
- inspect_ai/_view/www/node_modules/katex/src/metrics/parse_tfm.py +211 -0
- inspect_ai/_view/www/package.json +2 -2
- inspect_ai/_view/www/src/@types/log.d.ts +140 -39
- inspect_ai/_view/www/src/app/content/RecordTree.tsx +13 -0
- inspect_ai/_view/www/src/app/log-view/LogView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/logNavigation.ts +31 -0
- inspect_ai/_view/www/src/app/routing/{navigationHooks.ts → sampleNavigation.ts} +39 -86
- inspect_ai/_view/www/src/app/samples/SampleDialog.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/SampleDisplay.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.module.css +4 -0
- inspect_ai/_view/www/src/app/samples/chat/ChatMessage.tsx +17 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.module.css +16 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageCitations.tsx +63 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +6 -0
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.tsx +174 -25
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.tsx +21 -3
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.module.css +7 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/ContentDataView.tsx +111 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.module.css +10 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearch.tsx +14 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.module.css +19 -0
- inspect_ai/_view/www/src/app/samples/chat/content-data/WebSearchResults.tsx +49 -0
- inspect_ai/_view/www/src/app/samples/chat/messages.ts +7 -1
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.tsx +12 -2
- inspect_ai/_view/www/src/app/samples/chat/types.ts +4 -0
- inspect_ai/_view/www/src/app/samples/list/SampleList.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/sample-tools/filters.ts +26 -0
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/SampleFilter.tsx +14 -3
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/completions.ts +359 -7
- inspect_ai/_view/www/src/app/samples/sample-tools/sample-filter/language.ts +6 -0
- inspect_ai/_view/www/src/app/samples/sampleLimit.ts +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/ModelEventView.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/SampleLimitEventView.tsx +4 -4
- inspect_ai/_view/www/src/app/samples/transcript/outline/OutlineRow.tsx +1 -1
- inspect_ai/_view/www/src/app/samples/transcript/outline/TranscriptOutline.tsx +1 -1
- inspect_ai/_view/www/src/client/api/api-browser.ts +25 -0
- inspect_ai/_view/www/src/client/api/api-http.ts +3 -0
- inspect_ai/_view/www/src/client/api/api-vscode.ts +6 -0
- inspect_ai/_view/www/src/client/api/client-api.ts +3 -0
- inspect_ai/_view/www/src/client/api/jsonrpc.ts +1 -0
- inspect_ai/_view/www/src/client/api/types.ts +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +15 -2
- inspect_ai/_view/www/src/state/samplePolling.ts +17 -1
- inspect_ai/_view/www/src/tests/README.md +2 -2
- inspect_ai/_view/www/src/utils/git.ts +3 -1
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/agent/_handoff.py +8 -5
- inspect_ai/agent/_react.py +5 -5
- inspect_ai/dataset/_dataset.py +1 -1
- inspect_ai/log/_condense.py +5 -0
- inspect_ai/log/_file.py +4 -1
- inspect_ai/log/_log.py +9 -4
- inspect_ai/log/_recorders/json.py +4 -2
- inspect_ai/log/_samples.py +5 -0
- inspect_ai/log/_util.py +2 -0
- inspect_ai/model/__init__.py +14 -0
- inspect_ai/model/_call_tools.py +17 -8
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_openai_responses.py +80 -34
- inspect_ai/model/_providers/_anthropic_citations.py +158 -0
- inspect_ai/model/_providers/_google_citations.py +100 -0
- inspect_ai/model/_providers/anthropic.py +219 -36
- inspect_ai/model/_providers/google.py +98 -22
- inspect_ai/model/_providers/mistral.py +20 -7
- inspect_ai/model/_providers/openai.py +11 -10
- inspect_ai/model/_providers/openai_compatible.py +3 -2
- inspect_ai/model/_providers/openai_responses.py +2 -5
- inspect_ai/model/_providers/perplexity.py +123 -0
- inspect_ai/model/_providers/providers.py +13 -2
- inspect_ai/model/_providers/vertex.py +3 -0
- inspect_ai/model/_trim.py +5 -0
- inspect_ai/tool/__init__.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +5 -2
- inspect_ai/tool/_mcp/sampling.py +19 -3
- inspect_ai/tool/_mcp/server.py +1 -1
- inspect_ai/tool/_tool.py +10 -1
- inspect_ai/tool/_tools/_web_search/_base_http_provider.py +104 -0
- inspect_ai/tool/_tools/_web_search/_exa.py +78 -0
- inspect_ai/tool/_tools/_web_search/_google.py +22 -25
- inspect_ai/tool/_tools/_web_search/_tavily.py +47 -65
- inspect_ai/tool/_tools/_web_search/_web_search.py +83 -36
- inspect_ai/tool/_tools/_web_search/_web_search_provider.py +7 -0
- inspect_ai/util/__init__.py +8 -0
- inspect_ai/util/_background.py +64 -0
- inspect_ai/util/_display.py +11 -2
- inspect_ai/util/_limit.py +72 -5
- inspect_ai/util/_sandbox/__init__.py +2 -0
- inspect_ai/util/_sandbox/docker/compose.py +2 -2
- inspect_ai/util/_sandbox/service.py +28 -7
- inspect_ai/util/_span.py +12 -1
- inspect_ai/util/_subprocess.py +51 -38
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/METADATA +2 -2
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/RECORD +134 -109
- /inspect_ai/model/{_openai_computer_use.py → _providers/_openai_computer_use.py} +0 -0
- /inspect_ai/model/{_openai_web_search.py → _providers/_openai_web_search.py} +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.103.dist-info → inspect_ai-0.3.105.dist-info}/top_level.txt +0 -0
@@ -194,24 +194,55 @@ export type Content =
|
|
194
194
|
| ContentImage
|
195
195
|
| ContentAudio
|
196
196
|
| ContentVideo
|
197
|
+
| ContentData
|
197
198
|
)[];
|
198
199
|
export type Type3 = "text";
|
199
200
|
export type Text = string;
|
200
201
|
export type Refusal = boolean | null;
|
201
|
-
export type
|
202
|
+
export type Citations =
|
203
|
+
| (ContentCitation | DocumentCitation | UrlCitation)[]
|
204
|
+
| null;
|
205
|
+
export type CitedText = string | [number, number] | null;
|
206
|
+
export type Title = string | null;
|
207
|
+
export type Internal = {
|
208
|
+
[k: string]: JsonValue;
|
209
|
+
} | null;
|
210
|
+
export type JsonValue = unknown;
|
211
|
+
export type Type4 = "content";
|
212
|
+
export type CitedText1 = string | [number, number] | null;
|
213
|
+
export type Title1 = string | null;
|
214
|
+
export type Internal1 = {
|
215
|
+
[k: string]: JsonValue;
|
216
|
+
} | null;
|
217
|
+
export type Type5 = "document";
|
218
|
+
export type Type6 = "block" | "page" | "char";
|
219
|
+
export type StartIndex = number;
|
220
|
+
export type EndIndex = number;
|
221
|
+
export type CitedText2 = string | [number, number] | null;
|
222
|
+
export type Title2 = string | null;
|
223
|
+
export type Internal2 = {
|
224
|
+
[k: string]: JsonValue;
|
225
|
+
} | null;
|
226
|
+
export type Type7 = "url";
|
227
|
+
export type Url = string;
|
228
|
+
export type Type8 = "reasoning";
|
202
229
|
export type Reasoning = string;
|
203
230
|
export type Signature = string | null;
|
204
231
|
export type Redacted = boolean;
|
205
|
-
export type
|
232
|
+
export type Type9 = "image";
|
206
233
|
export type Image = string;
|
207
234
|
export type Detail = "auto" | "low" | "high";
|
208
|
-
export type
|
235
|
+
export type Type10 = "audio";
|
209
236
|
export type Audio = string;
|
210
237
|
export type Format1 = "wav" | "mp3";
|
211
|
-
export type
|
238
|
+
export type Type11 = "video";
|
212
239
|
export type Video = string;
|
213
240
|
export type Format2 = "mp4" | "mpeg" | "mov";
|
241
|
+
export type Type12 = "data";
|
214
242
|
export type Source = ("input" | "generate") | null;
|
243
|
+
export type Metadata5 = {
|
244
|
+
[k: string]: unknown;
|
245
|
+
} | null;
|
215
246
|
export type Role = "system";
|
216
247
|
export type Id2 = string | null;
|
217
248
|
export type Content1 =
|
@@ -222,8 +253,12 @@ export type Content1 =
|
|
222
253
|
| ContentImage
|
223
254
|
| ContentAudio
|
224
255
|
| ContentVideo
|
256
|
+
| ContentData
|
225
257
|
)[];
|
226
258
|
export type Source1 = ("input" | "generate") | null;
|
259
|
+
export type Metadata6 = {
|
260
|
+
[k: string]: unknown;
|
261
|
+
} | null;
|
227
262
|
export type Role1 = "user";
|
228
263
|
export type ToolCallId = string[] | null;
|
229
264
|
export type Id3 = string | null;
|
@@ -235,17 +270,21 @@ export type Content2 =
|
|
235
270
|
| ContentImage
|
236
271
|
| ContentAudio
|
237
272
|
| ContentVideo
|
273
|
+
| ContentData
|
238
274
|
)[];
|
239
275
|
export type Source2 = ("input" | "generate") | null;
|
276
|
+
export type Metadata7 = {
|
277
|
+
[k: string]: unknown;
|
278
|
+
} | null;
|
240
279
|
export type Role2 = "assistant";
|
241
280
|
export type ToolCalls = ToolCall[] | null;
|
242
281
|
export type Id4 = string;
|
243
282
|
export type Function = string;
|
244
283
|
export type ParseError = string | null;
|
245
|
-
export type
|
284
|
+
export type Title3 = string | null;
|
246
285
|
export type Format3 = "text" | "markdown";
|
247
286
|
export type Content3 = string;
|
248
|
-
export type
|
287
|
+
export type Type13 = string | null;
|
249
288
|
export type Model2 = string | null;
|
250
289
|
export type Id5 = string | null;
|
251
290
|
export type Content4 =
|
@@ -256,12 +295,16 @@ export type Content4 =
|
|
256
295
|
| ContentImage
|
257
296
|
| ContentAudio
|
258
297
|
| ContentVideo
|
298
|
+
| ContentData
|
259
299
|
)[];
|
260
300
|
export type Source3 = ("input" | "generate") | null;
|
301
|
+
export type Metadata8 = {
|
302
|
+
[k: string]: unknown;
|
303
|
+
} | null;
|
261
304
|
export type Role3 = "tool";
|
262
305
|
export type ToolCallId1 = string | null;
|
263
306
|
export type Function1 = string | null;
|
264
|
-
export type
|
307
|
+
export type Type14 =
|
265
308
|
| "parsing"
|
266
309
|
| "timeout"
|
267
310
|
| "unicode_decode"
|
@@ -301,7 +344,7 @@ export type Bytes1 = number[] | null;
|
|
301
344
|
export type Content5 = Logprob[];
|
302
345
|
export type Choices1 = ChatCompletionChoice[];
|
303
346
|
export type Time = number | null;
|
304
|
-
export type
|
347
|
+
export type Metadata9 = {
|
305
348
|
[k: string]: unknown;
|
306
349
|
} | null;
|
307
350
|
export type Error = string | null;
|
@@ -318,7 +361,7 @@ export type Value1 =
|
|
318
361
|
};
|
319
362
|
export type Answer = string | null;
|
320
363
|
export type Explanation = string | null;
|
321
|
-
export type
|
364
|
+
export type Metadata10 = {
|
322
365
|
[k: string]: unknown;
|
323
366
|
} | null;
|
324
367
|
export type SpanId = string | null;
|
@@ -337,20 +380,19 @@ export type Input1 =
|
|
337
380
|
export type Choices2 = string[] | null;
|
338
381
|
export type Target1 = string | string[];
|
339
382
|
export type Id6 = number | string | null;
|
340
|
-
export type
|
383
|
+
export type Metadata12 = {
|
341
384
|
[k: string]: unknown;
|
342
385
|
} | null;
|
343
386
|
export type Files1 = {
|
344
387
|
[k: string]: string;
|
345
388
|
} | null;
|
346
389
|
export type Setup1 = string | null;
|
347
|
-
export type JsonValue = unknown;
|
348
390
|
export type SpanId1 = string | null;
|
349
391
|
export type Timestamp1 = string;
|
350
392
|
export type WorkingStart1 = number;
|
351
393
|
export type Pending1 = boolean | null;
|
352
394
|
export type Event1 = "sample_limit";
|
353
|
-
export type
|
395
|
+
export type Type15 =
|
354
396
|
| "message"
|
355
397
|
| "time"
|
356
398
|
| "working"
|
@@ -404,7 +446,7 @@ export type Input3 = (
|
|
404
446
|
)[];
|
405
447
|
export type Name8 = string;
|
406
448
|
export type Description2 = string;
|
407
|
-
export type
|
449
|
+
export type Type16 = "object";
|
408
450
|
export type Required1 = string[];
|
409
451
|
export type Additionalproperties1 = boolean;
|
410
452
|
export type Options3 = {
|
@@ -424,7 +466,7 @@ export type Timestamp6 = string;
|
|
424
466
|
export type WorkingStart6 = number;
|
425
467
|
export type Pending6 = boolean | null;
|
426
468
|
export type Event6 = "tool";
|
427
|
-
export type
|
469
|
+
export type Type17 = "function";
|
428
470
|
export type Id7 = string;
|
429
471
|
export type Function2 = string;
|
430
472
|
export type Result1 =
|
@@ -436,12 +478,14 @@ export type Result1 =
|
|
436
478
|
| ContentImage
|
437
479
|
| ContentAudio
|
438
480
|
| ContentVideo
|
481
|
+
| ContentData
|
439
482
|
| (
|
440
483
|
| ContentText
|
441
484
|
| ContentReasoning
|
442
485
|
| ContentImage
|
443
486
|
| ContentAudio
|
444
487
|
| ContentVideo
|
488
|
+
| ContentData
|
445
489
|
)[];
|
446
490
|
export type Truncated = [unknown, unknown] | null;
|
447
491
|
export type SpanId7 = string | null;
|
@@ -510,7 +554,7 @@ export type Pending13 = boolean | null;
|
|
510
554
|
export type Event13 = "span_begin";
|
511
555
|
export type Id8 = string;
|
512
556
|
export type ParentId = string | null;
|
513
|
-
export type
|
557
|
+
export type Type18 = string | null;
|
514
558
|
export type Name11 = string;
|
515
559
|
export type SpanId14 = string | null;
|
516
560
|
export type Timestamp14 = string;
|
@@ -524,7 +568,7 @@ export type WorkingStart15 = number;
|
|
524
568
|
export type Pending15 = boolean | null;
|
525
569
|
export type Event15 = "step";
|
526
570
|
export type Action1 = "begin" | "end";
|
527
|
-
export type
|
571
|
+
export type Type19 = string | null;
|
528
572
|
export type Name12 = string;
|
529
573
|
export type SpanId16 = string | null;
|
530
574
|
export type Timestamp16 = string;
|
@@ -532,7 +576,7 @@ export type WorkingStart16 = number;
|
|
532
576
|
export type Pending16 = boolean | null;
|
533
577
|
export type Event16 = "subtask";
|
534
578
|
export type Name13 = string;
|
535
|
-
export type
|
579
|
+
export type Type20 = string | null;
|
536
580
|
export type Events2 = (
|
537
581
|
| SampleInitEvent
|
538
582
|
| SampleLimitEvent
|
@@ -600,7 +644,7 @@ export type TotalTime = number | null;
|
|
600
644
|
export type WorkingTime3 = number | null;
|
601
645
|
export type Uuid = string | null;
|
602
646
|
export type ErrorRetries = EvalError[] | null;
|
603
|
-
export type
|
647
|
+
export type Type21 =
|
604
648
|
| "context"
|
605
649
|
| "time"
|
606
650
|
| "working"
|
@@ -622,7 +666,7 @@ export type Value2 =
|
|
622
666
|
};
|
623
667
|
export type Answer1 = string | null;
|
624
668
|
export type Explanation2 = string | null;
|
625
|
-
export type
|
669
|
+
export type Metadata13 = {
|
626
670
|
[k: string]: unknown;
|
627
671
|
} | null;
|
628
672
|
export type SampleId1 = string | number | null;
|
@@ -658,6 +702,7 @@ export interface EvalSpec {
|
|
658
702
|
task_registry_name: TaskRegistryName;
|
659
703
|
task_attribs: TaskAttribs;
|
660
704
|
task_args: TaskArgs;
|
705
|
+
task_args_passed: TaskArgsPassed;
|
661
706
|
solver: Solver;
|
662
707
|
solver_args: SolverArgs;
|
663
708
|
tags: Tags;
|
@@ -681,6 +726,9 @@ export interface TaskAttribs {
|
|
681
726
|
export interface TaskArgs {
|
682
727
|
[k: string]: unknown;
|
683
728
|
}
|
729
|
+
export interface TaskArgsPassed {
|
730
|
+
[k: string]: unknown;
|
731
|
+
}
|
684
732
|
/**
|
685
733
|
* Dataset used for evaluation.
|
686
734
|
*/
|
@@ -984,7 +1032,7 @@ export interface EvalSample {
|
|
984
1032
|
messages: Messages;
|
985
1033
|
output: ModelOutput;
|
986
1034
|
scores: Scores1;
|
987
|
-
metadata:
|
1035
|
+
metadata: Metadata11;
|
988
1036
|
store: Store;
|
989
1037
|
events: Events;
|
990
1038
|
model_usage: ModelUsage2;
|
@@ -1003,6 +1051,7 @@ export interface ChatMessageSystem {
|
|
1003
1051
|
id: Id1;
|
1004
1052
|
content: Content;
|
1005
1053
|
source: Source;
|
1054
|
+
metadata: Metadata5;
|
1006
1055
|
internal: unknown;
|
1007
1056
|
role: Role;
|
1008
1057
|
}
|
@@ -1014,6 +1063,44 @@ export interface ContentText {
|
|
1014
1063
|
type: Type3;
|
1015
1064
|
text: Text;
|
1016
1065
|
refusal: Refusal;
|
1066
|
+
citations: Citations;
|
1067
|
+
}
|
1068
|
+
/**
|
1069
|
+
* A generic content citation.
|
1070
|
+
*/
|
1071
|
+
export interface ContentCitation {
|
1072
|
+
cited_text: CitedText;
|
1073
|
+
title: Title;
|
1074
|
+
internal: Internal;
|
1075
|
+
type: Type4;
|
1076
|
+
}
|
1077
|
+
/**
|
1078
|
+
* A citation that refers to a page range in a document.
|
1079
|
+
*/
|
1080
|
+
export interface DocumentCitation {
|
1081
|
+
cited_text: CitedText1;
|
1082
|
+
title: Title1;
|
1083
|
+
internal: Internal1;
|
1084
|
+
type: Type5;
|
1085
|
+
range: DocumentRange | null;
|
1086
|
+
}
|
1087
|
+
/**
|
1088
|
+
* A range specifying a section of a document.
|
1089
|
+
*/
|
1090
|
+
export interface DocumentRange {
|
1091
|
+
type: Type6;
|
1092
|
+
start_index: StartIndex;
|
1093
|
+
end_index: EndIndex;
|
1094
|
+
}
|
1095
|
+
/**
|
1096
|
+
* A citation that refers to a URL.
|
1097
|
+
*/
|
1098
|
+
export interface UrlCitation {
|
1099
|
+
cited_text: CitedText2;
|
1100
|
+
title: Title2;
|
1101
|
+
internal: Internal2;
|
1102
|
+
type: Type7;
|
1103
|
+
url: Url;
|
1017
1104
|
}
|
1018
1105
|
/**
|
1019
1106
|
* Reasoning content.
|
@@ -1022,7 +1109,7 @@ export interface ContentText {
|
|
1022
1109
|
*/
|
1023
1110
|
export interface ContentReasoning {
|
1024
1111
|
internal: unknown;
|
1025
|
-
type:
|
1112
|
+
type: Type8;
|
1026
1113
|
reasoning: Reasoning;
|
1027
1114
|
signature: Signature;
|
1028
1115
|
redacted: Redacted;
|
@@ -1032,7 +1119,7 @@ export interface ContentReasoning {
|
|
1032
1119
|
*/
|
1033
1120
|
export interface ContentImage {
|
1034
1121
|
internal: unknown;
|
1035
|
-
type:
|
1122
|
+
type: Type9;
|
1036
1123
|
image: Image;
|
1037
1124
|
detail: Detail;
|
1038
1125
|
}
|
@@ -1041,7 +1128,7 @@ export interface ContentImage {
|
|
1041
1128
|
*/
|
1042
1129
|
export interface ContentAudio {
|
1043
1130
|
internal: unknown;
|
1044
|
-
type:
|
1131
|
+
type: Type10;
|
1045
1132
|
audio: Audio;
|
1046
1133
|
format: Format1;
|
1047
1134
|
}
|
@@ -1050,10 +1137,21 @@ export interface ContentAudio {
|
|
1050
1137
|
*/
|
1051
1138
|
export interface ContentVideo {
|
1052
1139
|
internal: unknown;
|
1053
|
-
type:
|
1140
|
+
type: Type11;
|
1054
1141
|
video: Video;
|
1055
1142
|
format: Format2;
|
1056
1143
|
}
|
1144
|
+
/**
|
1145
|
+
* Model internal.
|
1146
|
+
*/
|
1147
|
+
export interface ContentData {
|
1148
|
+
internal: unknown;
|
1149
|
+
type: Type12;
|
1150
|
+
data: Data;
|
1151
|
+
}
|
1152
|
+
export interface Data {
|
1153
|
+
[k: string]: JsonValue;
|
1154
|
+
}
|
1057
1155
|
/**
|
1058
1156
|
* User chat message.
|
1059
1157
|
*/
|
@@ -1061,6 +1159,7 @@ export interface ChatMessageUser {
|
|
1061
1159
|
id: Id2;
|
1062
1160
|
content: Content1;
|
1063
1161
|
source: Source1;
|
1162
|
+
metadata: Metadata6;
|
1064
1163
|
internal: unknown;
|
1065
1164
|
role: Role1;
|
1066
1165
|
tool_call_id: ToolCallId;
|
@@ -1072,6 +1171,7 @@ export interface ChatMessageAssistant {
|
|
1072
1171
|
id: Id3;
|
1073
1172
|
content: Content2;
|
1074
1173
|
source: Source2;
|
1174
|
+
metadata: Metadata7;
|
1075
1175
|
internal: unknown;
|
1076
1176
|
role: Role2;
|
1077
1177
|
tool_calls: ToolCalls;
|
@@ -1084,7 +1184,7 @@ export interface ToolCall {
|
|
1084
1184
|
internal: unknown;
|
1085
1185
|
parse_error: ParseError;
|
1086
1186
|
view: ToolCallContent | null;
|
1087
|
-
type:
|
1187
|
+
type: Type13;
|
1088
1188
|
}
|
1089
1189
|
export interface Arguments {
|
1090
1190
|
[k: string]: unknown;
|
@@ -1093,7 +1193,7 @@ export interface Arguments {
|
|
1093
1193
|
* Content to include in tool call view.
|
1094
1194
|
*/
|
1095
1195
|
export interface ToolCallContent {
|
1096
|
-
title:
|
1196
|
+
title: Title3;
|
1097
1197
|
format: Format3;
|
1098
1198
|
content: Content3;
|
1099
1199
|
}
|
@@ -1104,6 +1204,7 @@ export interface ChatMessageTool {
|
|
1104
1204
|
id: Id5;
|
1105
1205
|
content: Content4;
|
1106
1206
|
source: Source3;
|
1207
|
+
metadata: Metadata8;
|
1107
1208
|
internal: unknown;
|
1108
1209
|
role: Role3;
|
1109
1210
|
tool_call_id: ToolCallId1;
|
@@ -1111,7 +1212,7 @@ export interface ChatMessageTool {
|
|
1111
1212
|
error: ToolCallError | null;
|
1112
1213
|
}
|
1113
1214
|
export interface ToolCallError {
|
1114
|
-
type:
|
1215
|
+
type: Type14;
|
1115
1216
|
message: Message1;
|
1116
1217
|
}
|
1117
1218
|
/**
|
@@ -1122,7 +1223,7 @@ export interface ModelOutput {
|
|
1122
1223
|
choices: Choices1;
|
1123
1224
|
usage: ModelUsage1 | null;
|
1124
1225
|
time: Time;
|
1125
|
-
metadata:
|
1226
|
+
metadata: Metadata9;
|
1126
1227
|
error: Error;
|
1127
1228
|
}
|
1128
1229
|
/**
|
@@ -1163,9 +1264,9 @@ export interface Score {
|
|
1163
1264
|
value: Value1;
|
1164
1265
|
answer: Answer;
|
1165
1266
|
explanation: Explanation;
|
1166
|
-
metadata:
|
1267
|
+
metadata: Metadata10;
|
1167
1268
|
}
|
1168
|
-
export interface
|
1269
|
+
export interface Metadata11 {
|
1169
1270
|
[k: string]: unknown;
|
1170
1271
|
}
|
1171
1272
|
export interface Store {
|
@@ -1191,7 +1292,7 @@ export interface Sample {
|
|
1191
1292
|
choices: Choices2;
|
1192
1293
|
target: Target1;
|
1193
1294
|
id: Id6;
|
1194
|
-
metadata:
|
1295
|
+
metadata: Metadata12;
|
1195
1296
|
sandbox: SandboxEnvironmentSpec | null;
|
1196
1297
|
files: Files1;
|
1197
1298
|
setup: Setup1;
|
@@ -1205,7 +1306,7 @@ export interface SampleLimitEvent {
|
|
1205
1306
|
working_start: WorkingStart1;
|
1206
1307
|
pending: Pending1;
|
1207
1308
|
event: Event1;
|
1208
|
-
type:
|
1309
|
+
type: Type15;
|
1209
1310
|
message: Message2;
|
1210
1311
|
limit: Limit1;
|
1211
1312
|
}
|
@@ -1322,7 +1423,7 @@ export interface ToolInfo {
|
|
1322
1423
|
* Description of tool parameters object in JSON Schema format.
|
1323
1424
|
*/
|
1324
1425
|
export interface ToolParams {
|
1325
|
-
type:
|
1426
|
+
type: Type16;
|
1326
1427
|
properties: Properties1;
|
1327
1428
|
required: Required1;
|
1328
1429
|
additionalProperties: Additionalproperties1;
|
@@ -1356,7 +1457,7 @@ export interface ToolEvent {
|
|
1356
1457
|
working_start: WorkingStart6;
|
1357
1458
|
pending: Pending6;
|
1358
1459
|
event: Event6;
|
1359
|
-
type:
|
1460
|
+
type: Type17;
|
1360
1461
|
id: Id7;
|
1361
1462
|
function: Function2;
|
1362
1463
|
arguments: Arguments1;
|
@@ -1486,7 +1587,7 @@ export interface SpanBeginEvent {
|
|
1486
1587
|
event: Event13;
|
1487
1588
|
id: Id8;
|
1488
1589
|
parent_id: ParentId;
|
1489
|
-
type:
|
1590
|
+
type: Type18;
|
1490
1591
|
name: Name11;
|
1491
1592
|
}
|
1492
1593
|
/**
|
@@ -1510,7 +1611,7 @@ export interface StepEvent {
|
|
1510
1611
|
pending: Pending15;
|
1511
1612
|
event: Event15;
|
1512
1613
|
action: Action1;
|
1513
|
-
type:
|
1614
|
+
type: Type19;
|
1514
1615
|
name: Name12;
|
1515
1616
|
}
|
1516
1617
|
/**
|
@@ -1523,7 +1624,7 @@ export interface SubtaskEvent {
|
|
1523
1624
|
pending: Pending16;
|
1524
1625
|
event: Event16;
|
1525
1626
|
name: Name13;
|
1526
|
-
type:
|
1627
|
+
type: Type20;
|
1527
1628
|
input: Input5;
|
1528
1629
|
result: Result2;
|
1529
1630
|
events: Events2;
|
@@ -1546,7 +1647,7 @@ export interface Attachments {
|
|
1546
1647
|
* Limit encountered by sample.
|
1547
1648
|
*/
|
1548
1649
|
export interface EvalSampleLimit {
|
1549
|
-
type:
|
1650
|
+
type: Type21;
|
1550
1651
|
limit: Limit2;
|
1551
1652
|
}
|
1552
1653
|
/**
|
@@ -1564,6 +1665,6 @@ export interface EvalSampleScore {
|
|
1564
1665
|
value: Value2;
|
1565
1666
|
answer: Answer1;
|
1566
1667
|
explanation: Explanation2;
|
1567
|
-
metadata:
|
1668
|
+
metadata: Metadata13;
|
1568
1669
|
sample_id: SampleId1;
|
1569
1670
|
}
|
@@ -204,6 +204,19 @@ export const RecordTree: FC<RecordTreeProps> = ({
|
|
204
204
|
);
|
205
205
|
};
|
206
206
|
|
207
|
+
if (!scrollRef) {
|
208
|
+
// No virtualization - render directly
|
209
|
+
return (
|
210
|
+
<div
|
211
|
+
id={id}
|
212
|
+
className={clsx(className, "samples-list")}
|
213
|
+
style={{ width: "100%" }}
|
214
|
+
tabIndex={0}
|
215
|
+
>
|
216
|
+
{items.map((_, index) => renderRow(index))}
|
217
|
+
</div>
|
218
|
+
);
|
219
|
+
}
|
207
220
|
return (
|
208
221
|
<Virtuoso
|
209
222
|
ref={listHandle}
|
@@ -13,7 +13,7 @@ import { Navbar } from "./navbar/Navbar";
|
|
13
13
|
|
14
14
|
import { useEvalSpec, useRefreshLog } from "../../state/hooks";
|
15
15
|
import { useStore } from "../../state/store";
|
16
|
-
import { useLogNavigation } from "../routing/
|
16
|
+
import { useLogNavigation } from "../routing/logNavigation";
|
17
17
|
import styles from "./LogView.module.css";
|
18
18
|
import { useInfoTabConfig } from "./tabs/InfoTab";
|
19
19
|
import { useJsonTabConfig } from "./tabs/JsonTab";
|
@@ -0,0 +1,31 @@
|
|
1
|
+
import { useCallback } from "react";
|
2
|
+
import { useNavigate, useParams } from "react-router-dom";
|
3
|
+
import { useStore } from "../../state/store";
|
4
|
+
import { logUrl, logUrlRaw } from "./url";
|
5
|
+
|
6
|
+
export const useLogNavigation = () => {
|
7
|
+
const navigate = useNavigate();
|
8
|
+
const { logPath } = useParams<{ logPath: string }>();
|
9
|
+
const logs = useStore((state) => state.logs.logs);
|
10
|
+
const loadedLog = useStore((state) => state.log.loadedLog);
|
11
|
+
|
12
|
+
const selectTab = useCallback(
|
13
|
+
(tabId: string) => {
|
14
|
+
// Only update URL if we have a loaded log
|
15
|
+
if (loadedLog && logPath) {
|
16
|
+
// We already have the logPath from params, just navigate to the tab
|
17
|
+
const url = logUrlRaw(logPath, tabId);
|
18
|
+
navigate(url);
|
19
|
+
} else if (loadedLog) {
|
20
|
+
// Fallback to constructing the path if needed
|
21
|
+
const url = logUrl(loadedLog, logs.log_dir, tabId);
|
22
|
+
navigate(url);
|
23
|
+
}
|
24
|
+
},
|
25
|
+
[loadedLog, logPath, logs.log_dir, navigate],
|
26
|
+
);
|
27
|
+
|
28
|
+
return {
|
29
|
+
selectTab,
|
30
|
+
};
|
31
|
+
};
|