inspect-ai 0.3.69__py3-none-any.whl → 0.3.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/eval.py +27 -9
- inspect_ai/_display/core/display.py +2 -0
- inspect_ai/_display/core/footer.py +13 -3
- inspect_ai/_display/plain/display.py +6 -2
- inspect_ai/_display/rich/display.py +19 -6
- inspect_ai/_display/textual/app.py +9 -3
- inspect_ai/_display/textual/display.py +4 -0
- inspect_ai/_display/textual/widgets/samples.py +4 -10
- inspect_ai/_display/textual/widgets/transcript.py +35 -18
- inspect_ai/_eval/eval.py +14 -2
- inspect_ai/_eval/evalset.py +6 -1
- inspect_ai/_eval/run.py +6 -0
- inspect_ai/_eval/task/run.py +49 -23
- inspect_ai/_eval/task/task.py +26 -3
- inspect_ai/_util/content.py +20 -1
- inspect_ai/_util/interrupt.py +6 -0
- inspect_ai/_util/logger.py +19 -0
- inspect_ai/_util/rich.py +7 -8
- inspect_ai/_util/text.py +13 -0
- inspect_ai/_util/transcript.py +20 -6
- inspect_ai/_util/working.py +50 -0
- inspect_ai/_view/www/App.css +6 -0
- inspect_ai/_view/www/dist/assets/index.css +171 -99
- inspect_ai/_view/www/dist/assets/index.js +5972 -2770
- inspect_ai/_view/www/eslint.config.mjs +24 -1
- inspect_ai/_view/www/log-schema.json +619 -21
- inspect_ai/_view/www/package.json +8 -3
- inspect_ai/_view/www/src/App.tsx +2 -2
- inspect_ai/_view/www/src/appearance/icons.ts +3 -1
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +4 -3
- inspect_ai/_view/www/src/components/Card.tsx +9 -8
- inspect_ai/_view/www/src/components/DownloadButton.tsx +2 -1
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +2 -2
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +4 -3
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +13 -5
- inspect_ai/_view/www/src/components/FindBand.tsx +3 -3
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +3 -3
- inspect_ai/_view/www/src/components/LabeledValue.tsx +5 -4
- inspect_ai/_view/www/src/components/LargeModal.tsx +18 -13
- inspect_ai/_view/www/src/components/{LightboxCarousel.css → LightboxCarousel.module.css} +22 -18
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +36 -27
- inspect_ai/_view/www/src/components/MessageBand.tsx +2 -1
- inspect_ai/_view/www/src/components/NavPills.tsx +9 -8
- inspect_ai/_view/www/src/components/ProgressBar.tsx +2 -1
- inspect_ai/_view/www/src/components/TabSet.tsx +21 -15
- inspect_ai/_view/www/src/index.tsx +2 -2
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +11 -9
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +3 -2
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +1 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.tsx +16 -1
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +3 -2
- inspect_ai/_view/www/src/plan/DetailStep.tsx +2 -1
- inspect_ai/_view/www/src/plan/PlanCard.tsx +2 -5
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +6 -9
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +2 -1
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +3 -3
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +2 -2
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +3 -3
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +9 -1
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +30 -3
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +4 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +25 -4
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +3 -19
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +2 -1
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +22 -7
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +35 -6
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +2 -2
- inspect_ai/_view/www/src/samples/chat/messages.ts +15 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +13 -4
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +2 -2
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +18 -19
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +1 -1
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +4 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +2 -3
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +3 -2
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +57 -45
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +2 -1
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +2 -1
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +4 -3
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +2 -5
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +2 -2
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +2 -1
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +4 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +12 -2
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +1 -1
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +25 -28
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +9 -4
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.module.css +32 -0
- inspect_ai/_view/www/src/samples/transcript/SandboxEventView.tsx +153 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +12 -5
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +18 -14
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +5 -5
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +53 -16
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +6 -3
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +3 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +2 -2
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.module.css +28 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventTimingPanel.tsx +115 -0
- inspect_ai/_view/www/src/samples/transcript/event/utils.ts +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +2 -1
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +3 -3
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +11 -8
- inspect_ai/_view/www/src/samples/transcript/types.ts +3 -1
- inspect_ai/_view/www/src/types/log.d.ts +312 -137
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +6 -10
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +4 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +32 -9
- inspect_ai/_view/www/src/usage/TokenTable.tsx +4 -6
- inspect_ai/_view/www/src/usage/UsageCard.tsx +2 -1
- inspect_ai/_view/www/src/utils/format.ts +8 -5
- inspect_ai/_view/www/src/utils/json.ts +24 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +6 -5
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +18 -8
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +3 -3
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +4 -3
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +5 -4
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +5 -8
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +5 -4
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +2 -2
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +2 -1
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +2 -2
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +2 -5
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +12 -11
- inspect_ai/_view/www/yarn.lock +241 -5
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_condense.py +4 -0
- inspect_ai/log/_log.py +72 -12
- inspect_ai/log/_recorders/eval.py +6 -1
- inspect_ai/log/_samples.py +5 -1
- inspect_ai/log/_transcript.py +89 -2
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_call_tools.py +8 -1
- inspect_ai/model/_chat_message.py +22 -7
- inspect_ai/model/_conversation.py +11 -9
- inspect_ai/model/_generate_config.py +25 -4
- inspect_ai/model/_model.py +164 -72
- inspect_ai/model/_model_call.py +10 -3
- inspect_ai/model/_model_output.py +3 -0
- inspect_ai/model/_openai.py +106 -40
- inspect_ai/model/_providers/anthropic.py +145 -26
- inspect_ai/model/_providers/bedrock.py +7 -0
- inspect_ai/model/_providers/cloudflare.py +20 -7
- inspect_ai/model/_providers/google.py +29 -8
- inspect_ai/model/_providers/groq.py +66 -27
- inspect_ai/model/_providers/hf.py +6 -0
- inspect_ai/model/_providers/mistral.py +78 -51
- inspect_ai/model/_providers/openai.py +66 -4
- inspect_ai/model/_providers/openai_o1.py +10 -0
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/util/tracker.py +92 -0
- inspect_ai/model/_providers/vllm.py +13 -5
- inspect_ai/model/_reasoning.py +15 -2
- inspect_ai/scorer/_model.py +23 -19
- inspect_ai/solver/_basic_agent.py +1 -3
- inspect_ai/solver/_bridge/patch.py +0 -2
- inspect_ai/solver/_human_agent/agent.py +14 -10
- inspect_ai/solver/_human_agent/commands/__init__.py +7 -3
- inspect_ai/solver/_human_agent/commands/submit.py +76 -30
- inspect_ai/solver/_limit.py +4 -4
- inspect_ai/solver/_plan.py +0 -3
- inspect_ai/solver/_task_state.py +7 -0
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/_tool.py +3 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/_run.py +1 -1
- inspect_ai/tool/_tools/_web_browser/_resources/.pylintrc +8 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/launch.json +24 -0
- inspect_ai/tool/_tools/_web_browser/_resources/.vscode/settings.json +25 -0
- inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +5 -6
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +10 -11
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree.py +71 -0
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_tree_node.py +323 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/__init__.py +5 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/a11y.py +279 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom.py +9 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/dom_snapshot.py +293 -0
- inspect_ai/tool/_tools/_web_browser/_resources/cdp/page.py +94 -0
- inspect_ai/tool/_tools/_web_browser/_resources/constants.py +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.svg +2 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_browser.py +50 -0
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +31 -359
- inspect_ai/tool/_tools/_web_browser/_resources/playwright_page_crawler.py +280 -0
- inspect_ai/tool/_tools/_web_browser/_resources/pyproject.toml +65 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rectangle.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/rpc_client_helpers.py +146 -0
- inspect_ai/tool/_tools/_web_browser/_resources/scale_factor.py +64 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_tree_node.py +180 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +15 -9
- inspect_ai/tool/_tools/_web_browser/_resources/test_rectangle.py +15 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_client.py +44 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_browser_rpc_types.py +39 -0
- inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +198 -48
- inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +26 -25
- inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +178 -39
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +38 -19
- inspect_ai/tool/_tools/_web_search.py +3 -3
- inspect_ai/util/__init__.py +2 -1
- inspect_ai/util/_concurrency.py +14 -8
- inspect_ai/util/_display.py +12 -0
- inspect_ai/util/_sandbox/context.py +15 -0
- inspect_ai/util/_sandbox/docker/docker.py +7 -5
- inspect_ai/util/_sandbox/environment.py +32 -1
- inspect_ai/util/_sandbox/events.py +183 -0
- inspect_ai/util/_sandbox/local.py +3 -3
- inspect_ai/util/_sandbox/self_check.py +131 -43
- inspect_ai/util/_subtask.py +11 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/RECORD +233 -211
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/components/VirtualList.module.css +0 -19
- inspect_ai/_view/www/src/components/VirtualList.tsx +0 -292
- inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -312
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -275
- inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
- inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -176
- inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -135
- inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -71
- inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -184
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.69.dist-info → inspect_ai-0.3.71.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,10 @@
|
|
8
8
|
"title": "Timestamp",
|
9
9
|
"type": "string"
|
10
10
|
},
|
11
|
+
"working_start": {
|
12
|
+
"title": "Working Start",
|
13
|
+
"type": "number"
|
14
|
+
},
|
11
15
|
"pending": {
|
12
16
|
"anyOf": [
|
13
17
|
{
|
@@ -85,6 +89,7 @@
|
|
85
89
|
},
|
86
90
|
"required": [
|
87
91
|
"timestamp",
|
92
|
+
"working_start",
|
88
93
|
"pending",
|
89
94
|
"event",
|
90
95
|
"message",
|
@@ -216,6 +221,9 @@
|
|
216
221
|
{
|
217
222
|
"$ref": "#/$defs/ContentText"
|
218
223
|
},
|
224
|
+
{
|
225
|
+
"$ref": "#/$defs/ContentReasoning"
|
226
|
+
},
|
219
227
|
{
|
220
228
|
"$ref": "#/$defs/ContentImage"
|
221
229
|
},
|
@@ -262,26 +270,13 @@
|
|
262
270
|
],
|
263
271
|
"default": null,
|
264
272
|
"title": "Tool Calls"
|
265
|
-
},
|
266
|
-
"reasoning": {
|
267
|
-
"anyOf": [
|
268
|
-
{
|
269
|
-
"type": "string"
|
270
|
-
},
|
271
|
-
{
|
272
|
-
"type": "null"
|
273
|
-
}
|
274
|
-
],
|
275
|
-
"default": null,
|
276
|
-
"title": "Reasoning"
|
277
273
|
}
|
278
274
|
},
|
279
275
|
"required": [
|
280
276
|
"role",
|
281
277
|
"content",
|
282
278
|
"source",
|
283
|
-
"tool_calls"
|
284
|
-
"reasoning"
|
279
|
+
"tool_calls"
|
285
280
|
],
|
286
281
|
"title": "ChatMessageAssistant",
|
287
282
|
"type": "object",
|
@@ -307,6 +302,9 @@
|
|
307
302
|
{
|
308
303
|
"$ref": "#/$defs/ContentText"
|
309
304
|
},
|
305
|
+
{
|
306
|
+
"$ref": "#/$defs/ContentReasoning"
|
307
|
+
},
|
310
308
|
{
|
311
309
|
"$ref": "#/$defs/ContentImage"
|
312
310
|
},
|
@@ -369,6 +367,9 @@
|
|
369
367
|
{
|
370
368
|
"$ref": "#/$defs/ContentText"
|
371
369
|
},
|
370
|
+
{
|
371
|
+
"$ref": "#/$defs/ContentReasoning"
|
372
|
+
},
|
372
373
|
{
|
373
374
|
"$ref": "#/$defs/ContentImage"
|
374
375
|
},
|
@@ -469,6 +470,9 @@
|
|
469
470
|
{
|
470
471
|
"$ref": "#/$defs/ContentText"
|
471
472
|
},
|
473
|
+
{
|
474
|
+
"$ref": "#/$defs/ContentReasoning"
|
475
|
+
},
|
472
476
|
{
|
473
477
|
"$ref": "#/$defs/ContentImage"
|
474
478
|
},
|
@@ -591,6 +595,47 @@
|
|
591
595
|
"type": "object",
|
592
596
|
"additionalProperties": false
|
593
597
|
},
|
598
|
+
"ContentReasoning": {
|
599
|
+
"description": "Reasoning content.\n\nSee the specification for [thinking blocks](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#understanding-thinking-blocks) for Claude models.",
|
600
|
+
"properties": {
|
601
|
+
"type": {
|
602
|
+
"const": "reasoning",
|
603
|
+
"default": "reasoning",
|
604
|
+
"title": "Type",
|
605
|
+
"type": "string"
|
606
|
+
},
|
607
|
+
"reasoning": {
|
608
|
+
"title": "Reasoning",
|
609
|
+
"type": "string"
|
610
|
+
},
|
611
|
+
"signature": {
|
612
|
+
"anyOf": [
|
613
|
+
{
|
614
|
+
"type": "string"
|
615
|
+
},
|
616
|
+
{
|
617
|
+
"type": "null"
|
618
|
+
}
|
619
|
+
],
|
620
|
+
"default": null,
|
621
|
+
"title": "Signature"
|
622
|
+
},
|
623
|
+
"redacted": {
|
624
|
+
"default": false,
|
625
|
+
"title": "Redacted",
|
626
|
+
"type": "boolean"
|
627
|
+
}
|
628
|
+
},
|
629
|
+
"required": [
|
630
|
+
"type",
|
631
|
+
"reasoning",
|
632
|
+
"signature",
|
633
|
+
"redacted"
|
634
|
+
],
|
635
|
+
"title": "ContentReasoning",
|
636
|
+
"type": "object",
|
637
|
+
"additionalProperties": false
|
638
|
+
},
|
594
639
|
"ContentText": {
|
595
640
|
"description": "Text content.",
|
596
641
|
"properties": {
|
@@ -653,6 +698,10 @@
|
|
653
698
|
"title": "Timestamp",
|
654
699
|
"type": "string"
|
655
700
|
},
|
701
|
+
"working_start": {
|
702
|
+
"title": "Working Start",
|
703
|
+
"type": "number"
|
704
|
+
},
|
656
705
|
"pending": {
|
657
706
|
"anyOf": [
|
658
707
|
{
|
@@ -677,6 +726,7 @@
|
|
677
726
|
},
|
678
727
|
"required": [
|
679
728
|
"timestamp",
|
729
|
+
"working_start",
|
680
730
|
"pending",
|
681
731
|
"event",
|
682
732
|
"error"
|
@@ -830,6 +880,18 @@
|
|
830
880
|
"default": null,
|
831
881
|
"title": "Time Limit"
|
832
882
|
},
|
883
|
+
"working_limit": {
|
884
|
+
"anyOf": [
|
885
|
+
{
|
886
|
+
"type": "integer"
|
887
|
+
},
|
888
|
+
{
|
889
|
+
"type": "null"
|
890
|
+
}
|
891
|
+
],
|
892
|
+
"default": null,
|
893
|
+
"title": "Working Limit"
|
894
|
+
},
|
833
895
|
"max_samples": {
|
834
896
|
"anyOf": [
|
835
897
|
{
|
@@ -951,6 +1013,7 @@
|
|
951
1013
|
"message_limit",
|
952
1014
|
"token_limit",
|
953
1015
|
"time_limit",
|
1016
|
+
"working_limit",
|
954
1017
|
"max_samples",
|
955
1018
|
"max_tasks",
|
956
1019
|
"max_subprocesses",
|
@@ -1118,6 +1181,33 @@
|
|
1118
1181
|
"type": "object",
|
1119
1182
|
"additionalProperties": false
|
1120
1183
|
},
|
1184
|
+
"EvalMetricDefinition": {
|
1185
|
+
"properties": {
|
1186
|
+
"name": {
|
1187
|
+
"title": "Name",
|
1188
|
+
"type": "string"
|
1189
|
+
},
|
1190
|
+
"options": {
|
1191
|
+
"anyOf": [
|
1192
|
+
{
|
1193
|
+
"type": "object"
|
1194
|
+
},
|
1195
|
+
{
|
1196
|
+
"type": "null"
|
1197
|
+
}
|
1198
|
+
],
|
1199
|
+
"default": null,
|
1200
|
+
"title": "Options"
|
1201
|
+
}
|
1202
|
+
},
|
1203
|
+
"required": [
|
1204
|
+
"name",
|
1205
|
+
"options"
|
1206
|
+
],
|
1207
|
+
"title": "EvalMetricDefinition",
|
1208
|
+
"type": "object",
|
1209
|
+
"additionalProperties": false
|
1210
|
+
},
|
1121
1211
|
"EvalPlan": {
|
1122
1212
|
"description": "Plan (solvers) used in evaluation.",
|
1123
1213
|
"properties": {
|
@@ -1170,6 +1260,7 @@
|
|
1170
1260
|
"max_tool_output": null,
|
1171
1261
|
"cache_prompt": null,
|
1172
1262
|
"reasoning_effort": null,
|
1263
|
+
"reasoning_tokens": null,
|
1173
1264
|
"reasoning_history": null
|
1174
1265
|
}
|
1175
1266
|
}
|
@@ -1441,6 +1532,9 @@
|
|
1441
1532
|
{
|
1442
1533
|
"$ref": "#/$defs/SampleLimitEvent"
|
1443
1534
|
},
|
1535
|
+
{
|
1536
|
+
"$ref": "#/$defs/SandboxEvent"
|
1537
|
+
},
|
1444
1538
|
{
|
1445
1539
|
"$ref": "#/$defs/StateEvent"
|
1446
1540
|
},
|
@@ -1489,6 +1583,42 @@
|
|
1489
1583
|
"title": "Model Usage",
|
1490
1584
|
"type": "object"
|
1491
1585
|
},
|
1586
|
+
"total_time": {
|
1587
|
+
"anyOf": [
|
1588
|
+
{
|
1589
|
+
"type": "number"
|
1590
|
+
},
|
1591
|
+
{
|
1592
|
+
"type": "null"
|
1593
|
+
}
|
1594
|
+
],
|
1595
|
+
"default": null,
|
1596
|
+
"title": "Total Time"
|
1597
|
+
},
|
1598
|
+
"working_time": {
|
1599
|
+
"anyOf": [
|
1600
|
+
{
|
1601
|
+
"type": "number"
|
1602
|
+
},
|
1603
|
+
{
|
1604
|
+
"type": "null"
|
1605
|
+
}
|
1606
|
+
],
|
1607
|
+
"default": null,
|
1608
|
+
"title": "Working Time"
|
1609
|
+
},
|
1610
|
+
"uuid": {
|
1611
|
+
"anyOf": [
|
1612
|
+
{
|
1613
|
+
"type": "string"
|
1614
|
+
},
|
1615
|
+
{
|
1616
|
+
"type": "null"
|
1617
|
+
}
|
1618
|
+
],
|
1619
|
+
"default": null,
|
1620
|
+
"title": "Uuid"
|
1621
|
+
},
|
1492
1622
|
"error": {
|
1493
1623
|
"anyOf": [
|
1494
1624
|
{
|
@@ -1535,6 +1665,9 @@
|
|
1535
1665
|
"store",
|
1536
1666
|
"events",
|
1537
1667
|
"model_usage",
|
1668
|
+
"total_time",
|
1669
|
+
"working_time",
|
1670
|
+
"uuid",
|
1538
1671
|
"error",
|
1539
1672
|
"attachments",
|
1540
1673
|
"limit"
|
@@ -1550,6 +1683,7 @@
|
|
1550
1683
|
"enum": [
|
1551
1684
|
"context",
|
1552
1685
|
"time",
|
1686
|
+
"working",
|
1553
1687
|
"message",
|
1554
1688
|
"token",
|
1555
1689
|
"operator",
|
@@ -1790,6 +1924,84 @@
|
|
1790
1924
|
"type": "object",
|
1791
1925
|
"additionalProperties": false
|
1792
1926
|
},
|
1927
|
+
"EvalScorer": {
|
1928
|
+
"properties": {
|
1929
|
+
"name": {
|
1930
|
+
"title": "Name",
|
1931
|
+
"type": "string"
|
1932
|
+
},
|
1933
|
+
"options": {
|
1934
|
+
"anyOf": [
|
1935
|
+
{
|
1936
|
+
"type": "object"
|
1937
|
+
},
|
1938
|
+
{
|
1939
|
+
"type": "null"
|
1940
|
+
}
|
1941
|
+
],
|
1942
|
+
"default": null,
|
1943
|
+
"title": "Options"
|
1944
|
+
},
|
1945
|
+
"metrics": {
|
1946
|
+
"anyOf": [
|
1947
|
+
{
|
1948
|
+
"items": {
|
1949
|
+
"anyOf": [
|
1950
|
+
{
|
1951
|
+
"$ref": "#/$defs/EvalMetricDefinition"
|
1952
|
+
},
|
1953
|
+
{
|
1954
|
+
"additionalProperties": {
|
1955
|
+
"items": {
|
1956
|
+
"$ref": "#/$defs/EvalMetricDefinition"
|
1957
|
+
},
|
1958
|
+
"type": "array"
|
1959
|
+
},
|
1960
|
+
"type": "object"
|
1961
|
+
}
|
1962
|
+
]
|
1963
|
+
},
|
1964
|
+
"type": "array"
|
1965
|
+
},
|
1966
|
+
{
|
1967
|
+
"additionalProperties": {
|
1968
|
+
"items": {
|
1969
|
+
"$ref": "#/$defs/EvalMetricDefinition"
|
1970
|
+
},
|
1971
|
+
"type": "array"
|
1972
|
+
},
|
1973
|
+
"type": "object"
|
1974
|
+
},
|
1975
|
+
{
|
1976
|
+
"type": "null"
|
1977
|
+
}
|
1978
|
+
],
|
1979
|
+
"default": null,
|
1980
|
+
"title": "Metrics"
|
1981
|
+
},
|
1982
|
+
"metadata": {
|
1983
|
+
"anyOf": [
|
1984
|
+
{
|
1985
|
+
"type": "object"
|
1986
|
+
},
|
1987
|
+
{
|
1988
|
+
"type": "null"
|
1989
|
+
}
|
1990
|
+
],
|
1991
|
+
"default": null,
|
1992
|
+
"title": "Metadata"
|
1993
|
+
}
|
1994
|
+
},
|
1995
|
+
"required": [
|
1996
|
+
"name",
|
1997
|
+
"options",
|
1998
|
+
"metrics",
|
1999
|
+
"metadata"
|
2000
|
+
],
|
2001
|
+
"title": "EvalScorer",
|
2002
|
+
"type": "object",
|
2003
|
+
"additionalProperties": false
|
2004
|
+
},
|
1793
2005
|
"EvalSpec": {
|
1794
2006
|
"description": "Eval target and configuration.",
|
1795
2007
|
"properties": {
|
@@ -1939,6 +2151,45 @@
|
|
1939
2151
|
],
|
1940
2152
|
"default": null,
|
1941
2153
|
"title": "Metadata"
|
2154
|
+
},
|
2155
|
+
"scorers": {
|
2156
|
+
"anyOf": [
|
2157
|
+
{
|
2158
|
+
"items": {
|
2159
|
+
"$ref": "#/$defs/EvalScorer"
|
2160
|
+
},
|
2161
|
+
"type": "array"
|
2162
|
+
},
|
2163
|
+
{
|
2164
|
+
"type": "null"
|
2165
|
+
}
|
2166
|
+
],
|
2167
|
+
"default": null,
|
2168
|
+
"title": "Scorers"
|
2169
|
+
},
|
2170
|
+
"metrics": {
|
2171
|
+
"anyOf": [
|
2172
|
+
{
|
2173
|
+
"items": {
|
2174
|
+
"$ref": "#/$defs/EvalMetricDefinition"
|
2175
|
+
},
|
2176
|
+
"type": "array"
|
2177
|
+
},
|
2178
|
+
{
|
2179
|
+
"additionalProperties": {
|
2180
|
+
"items": {
|
2181
|
+
"$ref": "#/$defs/EvalMetricDefinition"
|
2182
|
+
},
|
2183
|
+
"type": "array"
|
2184
|
+
},
|
2185
|
+
"type": "object"
|
2186
|
+
},
|
2187
|
+
{
|
2188
|
+
"type": "null"
|
2189
|
+
}
|
2190
|
+
],
|
2191
|
+
"default": null,
|
2192
|
+
"title": "Metrics"
|
1942
2193
|
}
|
1943
2194
|
},
|
1944
2195
|
"required": [
|
@@ -1961,7 +2212,9 @@
|
|
1961
2212
|
"config",
|
1962
2213
|
"revision",
|
1963
2214
|
"packages",
|
1964
|
-
"metadata"
|
2215
|
+
"metadata",
|
2216
|
+
"scorers",
|
2217
|
+
"metrics"
|
1965
2218
|
],
|
1966
2219
|
"title": "EvalSpec",
|
1967
2220
|
"type": "object",
|
@@ -2277,10 +2530,28 @@
|
|
2277
2530
|
"default": null,
|
2278
2531
|
"title": "Reasoning Effort"
|
2279
2532
|
},
|
2533
|
+
"reasoning_tokens": {
|
2534
|
+
"anyOf": [
|
2535
|
+
{
|
2536
|
+
"type": "integer"
|
2537
|
+
},
|
2538
|
+
{
|
2539
|
+
"type": "null"
|
2540
|
+
}
|
2541
|
+
],
|
2542
|
+
"default": null,
|
2543
|
+
"title": "Reasoning Tokens"
|
2544
|
+
},
|
2280
2545
|
"reasoning_history": {
|
2281
2546
|
"anyOf": [
|
2282
2547
|
{
|
2283
|
-
"
|
2548
|
+
"enum": [
|
2549
|
+
"none",
|
2550
|
+
"all",
|
2551
|
+
"last",
|
2552
|
+
"auto"
|
2553
|
+
],
|
2554
|
+
"type": "string"
|
2284
2555
|
},
|
2285
2556
|
{
|
2286
2557
|
"type": "null"
|
@@ -2315,6 +2586,7 @@
|
|
2315
2586
|
"max_tool_output",
|
2316
2587
|
"cache_prompt",
|
2317
2588
|
"reasoning_effort",
|
2589
|
+
"reasoning_tokens",
|
2318
2590
|
"reasoning_history"
|
2319
2591
|
],
|
2320
2592
|
"additionalProperties": false
|
@@ -2327,6 +2599,10 @@
|
|
2327
2599
|
"title": "Timestamp",
|
2328
2600
|
"type": "string"
|
2329
2601
|
},
|
2602
|
+
"working_start": {
|
2603
|
+
"title": "Working Start",
|
2604
|
+
"type": "number"
|
2605
|
+
},
|
2330
2606
|
"pending": {
|
2331
2607
|
"anyOf": [
|
2332
2608
|
{
|
@@ -2363,6 +2639,7 @@
|
|
2363
2639
|
},
|
2364
2640
|
"required": [
|
2365
2641
|
"timestamp",
|
2642
|
+
"working_start",
|
2366
2643
|
"pending",
|
2367
2644
|
"event",
|
2368
2645
|
"source",
|
@@ -2380,6 +2657,10 @@
|
|
2380
2657
|
"title": "Timestamp",
|
2381
2658
|
"type": "string"
|
2382
2659
|
},
|
2660
|
+
"working_start": {
|
2661
|
+
"title": "Working Start",
|
2662
|
+
"type": "number"
|
2663
|
+
},
|
2383
2664
|
"pending": {
|
2384
2665
|
"anyOf": [
|
2385
2666
|
{
|
@@ -2409,6 +2690,7 @@
|
|
2409
2690
|
},
|
2410
2691
|
"required": [
|
2411
2692
|
"timestamp",
|
2693
|
+
"working_start",
|
2412
2694
|
"pending",
|
2413
2695
|
"event",
|
2414
2696
|
"input",
|
@@ -2478,6 +2760,10 @@
|
|
2478
2760
|
"title": "Timestamp",
|
2479
2761
|
"type": "string"
|
2480
2762
|
},
|
2763
|
+
"working_start": {
|
2764
|
+
"title": "Working Start",
|
2765
|
+
"type": "number"
|
2766
|
+
},
|
2481
2767
|
"pending": {
|
2482
2768
|
"anyOf": [
|
2483
2769
|
{
|
@@ -2502,6 +2788,7 @@
|
|
2502
2788
|
},
|
2503
2789
|
"required": [
|
2504
2790
|
"timestamp",
|
2791
|
+
"working_start",
|
2505
2792
|
"pending",
|
2506
2793
|
"event",
|
2507
2794
|
"message"
|
@@ -2662,11 +2949,24 @@
|
|
2662
2949
|
},
|
2663
2950
|
"title": "Response",
|
2664
2951
|
"type": "object"
|
2952
|
+
},
|
2953
|
+
"time": {
|
2954
|
+
"anyOf": [
|
2955
|
+
{
|
2956
|
+
"type": "number"
|
2957
|
+
},
|
2958
|
+
{
|
2959
|
+
"type": "null"
|
2960
|
+
}
|
2961
|
+
],
|
2962
|
+
"default": null,
|
2963
|
+
"title": "Time"
|
2665
2964
|
}
|
2666
2965
|
},
|
2667
2966
|
"required": [
|
2668
2967
|
"request",
|
2669
|
-
"response"
|
2968
|
+
"response",
|
2969
|
+
"time"
|
2670
2970
|
],
|
2671
2971
|
"title": "ModelCall",
|
2672
2972
|
"type": "object",
|
@@ -2680,6 +2980,10 @@
|
|
2680
2980
|
"title": "Timestamp",
|
2681
2981
|
"type": "string"
|
2682
2982
|
},
|
2983
|
+
"working_start": {
|
2984
|
+
"title": "Working Start",
|
2985
|
+
"type": "number"
|
2986
|
+
},
|
2683
2987
|
"pending": {
|
2684
2988
|
"anyOf": [
|
2685
2989
|
{
|
@@ -2789,10 +3093,36 @@
|
|
2789
3093
|
}
|
2790
3094
|
],
|
2791
3095
|
"default": null
|
3096
|
+
},
|
3097
|
+
"completed": {
|
3098
|
+
"anyOf": [
|
3099
|
+
{
|
3100
|
+
"format": "date-time",
|
3101
|
+
"type": "string"
|
3102
|
+
},
|
3103
|
+
{
|
3104
|
+
"type": "null"
|
3105
|
+
}
|
3106
|
+
],
|
3107
|
+
"default": null,
|
3108
|
+
"title": "Completed"
|
3109
|
+
},
|
3110
|
+
"working_time": {
|
3111
|
+
"anyOf": [
|
3112
|
+
{
|
3113
|
+
"type": "number"
|
3114
|
+
},
|
3115
|
+
{
|
3116
|
+
"type": "null"
|
3117
|
+
}
|
3118
|
+
],
|
3119
|
+
"default": null,
|
3120
|
+
"title": "Working Time"
|
2792
3121
|
}
|
2793
3122
|
},
|
2794
3123
|
"required": [
|
2795
3124
|
"timestamp",
|
3125
|
+
"working_start",
|
2796
3126
|
"pending",
|
2797
3127
|
"event",
|
2798
3128
|
"model",
|
@@ -2803,7 +3133,9 @@
|
|
2803
3133
|
"output",
|
2804
3134
|
"error",
|
2805
3135
|
"cache",
|
2806
|
-
"call"
|
3136
|
+
"call",
|
3137
|
+
"completed",
|
3138
|
+
"working_time"
|
2807
3139
|
],
|
2808
3140
|
"title": "ModelEvent",
|
2809
3141
|
"type": "object",
|
@@ -2925,6 +3257,18 @@
|
|
2925
3257
|
],
|
2926
3258
|
"default": null,
|
2927
3259
|
"title": "Input Tokens Cache Read"
|
3260
|
+
},
|
3261
|
+
"reasoning_tokens": {
|
3262
|
+
"anyOf": [
|
3263
|
+
{
|
3264
|
+
"type": "integer"
|
3265
|
+
},
|
3266
|
+
{
|
3267
|
+
"type": "null"
|
3268
|
+
}
|
3269
|
+
],
|
3270
|
+
"default": null,
|
3271
|
+
"title": "Reasoning Tokens"
|
2928
3272
|
}
|
2929
3273
|
},
|
2930
3274
|
"title": "ModelUsage",
|
@@ -2934,7 +3278,8 @@
|
|
2934
3278
|
"output_tokens",
|
2935
3279
|
"total_tokens",
|
2936
3280
|
"input_tokens_cache_write",
|
2937
|
-
"input_tokens_cache_read"
|
3281
|
+
"input_tokens_cache_read",
|
3282
|
+
"reasoning_tokens"
|
2938
3283
|
],
|
2939
3284
|
"additionalProperties": false
|
2940
3285
|
},
|
@@ -3085,6 +3430,10 @@
|
|
3085
3430
|
"title": "Timestamp",
|
3086
3431
|
"type": "string"
|
3087
3432
|
},
|
3433
|
+
"working_start": {
|
3434
|
+
"title": "Working Start",
|
3435
|
+
"type": "number"
|
3436
|
+
},
|
3088
3437
|
"pending": {
|
3089
3438
|
"anyOf": [
|
3090
3439
|
{
|
@@ -3112,6 +3461,7 @@
|
|
3112
3461
|
},
|
3113
3462
|
"required": [
|
3114
3463
|
"timestamp",
|
3464
|
+
"working_start",
|
3115
3465
|
"pending",
|
3116
3466
|
"event",
|
3117
3467
|
"sample",
|
@@ -3129,6 +3479,10 @@
|
|
3129
3479
|
"title": "Timestamp",
|
3130
3480
|
"type": "string"
|
3131
3481
|
},
|
3482
|
+
"working_start": {
|
3483
|
+
"title": "Working Start",
|
3484
|
+
"type": "number"
|
3485
|
+
},
|
3132
3486
|
"pending": {
|
3133
3487
|
"anyOf": [
|
3134
3488
|
{
|
@@ -3151,6 +3505,7 @@
|
|
3151
3505
|
"enum": [
|
3152
3506
|
"message",
|
3153
3507
|
"time",
|
3508
|
+
"working",
|
3154
3509
|
"token",
|
3155
3510
|
"operator",
|
3156
3511
|
"custom"
|
@@ -3177,6 +3532,7 @@
|
|
3177
3532
|
},
|
3178
3533
|
"required": [
|
3179
3534
|
"timestamp",
|
3535
|
+
"working_start",
|
3180
3536
|
"pending",
|
3181
3537
|
"event",
|
3182
3538
|
"type",
|
@@ -3213,6 +3569,152 @@
|
|
3213
3569
|
],
|
3214
3570
|
"type": "array"
|
3215
3571
|
},
|
3572
|
+
"SandboxEvent": {
|
3573
|
+
"description": "Sandbox execution or I/O",
|
3574
|
+
"properties": {
|
3575
|
+
"timestamp": {
|
3576
|
+
"format": "date-time",
|
3577
|
+
"title": "Timestamp",
|
3578
|
+
"type": "string"
|
3579
|
+
},
|
3580
|
+
"working_start": {
|
3581
|
+
"title": "Working Start",
|
3582
|
+
"type": "number"
|
3583
|
+
},
|
3584
|
+
"pending": {
|
3585
|
+
"anyOf": [
|
3586
|
+
{
|
3587
|
+
"type": "boolean"
|
3588
|
+
},
|
3589
|
+
{
|
3590
|
+
"type": "null"
|
3591
|
+
}
|
3592
|
+
],
|
3593
|
+
"default": null,
|
3594
|
+
"title": "Pending"
|
3595
|
+
},
|
3596
|
+
"event": {
|
3597
|
+
"const": "sandbox",
|
3598
|
+
"default": "sandbox",
|
3599
|
+
"title": "Event",
|
3600
|
+
"type": "string"
|
3601
|
+
},
|
3602
|
+
"action": {
|
3603
|
+
"enum": [
|
3604
|
+
"exec",
|
3605
|
+
"read_file",
|
3606
|
+
"write_file"
|
3607
|
+
],
|
3608
|
+
"title": "Action",
|
3609
|
+
"type": "string"
|
3610
|
+
},
|
3611
|
+
"cmd": {
|
3612
|
+
"anyOf": [
|
3613
|
+
{
|
3614
|
+
"type": "string"
|
3615
|
+
},
|
3616
|
+
{
|
3617
|
+
"type": "null"
|
3618
|
+
}
|
3619
|
+
],
|
3620
|
+
"default": null,
|
3621
|
+
"title": "Cmd"
|
3622
|
+
},
|
3623
|
+
"options": {
|
3624
|
+
"anyOf": [
|
3625
|
+
{
|
3626
|
+
"additionalProperties": {
|
3627
|
+
"$ref": "#/$defs/JsonValue"
|
3628
|
+
},
|
3629
|
+
"type": "object"
|
3630
|
+
},
|
3631
|
+
{
|
3632
|
+
"type": "null"
|
3633
|
+
}
|
3634
|
+
],
|
3635
|
+
"default": null,
|
3636
|
+
"title": "Options"
|
3637
|
+
},
|
3638
|
+
"file": {
|
3639
|
+
"anyOf": [
|
3640
|
+
{
|
3641
|
+
"type": "string"
|
3642
|
+
},
|
3643
|
+
{
|
3644
|
+
"type": "null"
|
3645
|
+
}
|
3646
|
+
],
|
3647
|
+
"default": null,
|
3648
|
+
"title": "File"
|
3649
|
+
},
|
3650
|
+
"input": {
|
3651
|
+
"anyOf": [
|
3652
|
+
{
|
3653
|
+
"type": "string"
|
3654
|
+
},
|
3655
|
+
{
|
3656
|
+
"type": "null"
|
3657
|
+
}
|
3658
|
+
],
|
3659
|
+
"default": null,
|
3660
|
+
"title": "Input"
|
3661
|
+
},
|
3662
|
+
"result": {
|
3663
|
+
"anyOf": [
|
3664
|
+
{
|
3665
|
+
"type": "integer"
|
3666
|
+
},
|
3667
|
+
{
|
3668
|
+
"type": "null"
|
3669
|
+
}
|
3670
|
+
],
|
3671
|
+
"default": null,
|
3672
|
+
"title": "Result"
|
3673
|
+
},
|
3674
|
+
"output": {
|
3675
|
+
"anyOf": [
|
3676
|
+
{
|
3677
|
+
"type": "string"
|
3678
|
+
},
|
3679
|
+
{
|
3680
|
+
"type": "null"
|
3681
|
+
}
|
3682
|
+
],
|
3683
|
+
"default": null,
|
3684
|
+
"title": "Output"
|
3685
|
+
},
|
3686
|
+
"completed": {
|
3687
|
+
"anyOf": [
|
3688
|
+
{
|
3689
|
+
"format": "date-time",
|
3690
|
+
"type": "string"
|
3691
|
+
},
|
3692
|
+
{
|
3693
|
+
"type": "null"
|
3694
|
+
}
|
3695
|
+
],
|
3696
|
+
"default": null,
|
3697
|
+
"title": "Completed"
|
3698
|
+
}
|
3699
|
+
},
|
3700
|
+
"required": [
|
3701
|
+
"timestamp",
|
3702
|
+
"working_start",
|
3703
|
+
"pending",
|
3704
|
+
"event",
|
3705
|
+
"action",
|
3706
|
+
"cmd",
|
3707
|
+
"options",
|
3708
|
+
"file",
|
3709
|
+
"input",
|
3710
|
+
"result",
|
3711
|
+
"output",
|
3712
|
+
"completed"
|
3713
|
+
],
|
3714
|
+
"title": "SandboxEvent",
|
3715
|
+
"type": "object",
|
3716
|
+
"additionalProperties": false
|
3717
|
+
},
|
3216
3718
|
"Score": {
|
3217
3719
|
"description": "Score generated by a scorer.",
|
3218
3720
|
"properties": {
|
@@ -3329,6 +3831,10 @@
|
|
3329
3831
|
"title": "Timestamp",
|
3330
3832
|
"type": "string"
|
3331
3833
|
},
|
3834
|
+
"working_start": {
|
3835
|
+
"title": "Working Start",
|
3836
|
+
"type": "number"
|
3837
|
+
},
|
3332
3838
|
"pending": {
|
3333
3839
|
"anyOf": [
|
3334
3840
|
{
|
@@ -3376,6 +3882,7 @@
|
|
3376
3882
|
},
|
3377
3883
|
"required": [
|
3378
3884
|
"timestamp",
|
3885
|
+
"working_start",
|
3379
3886
|
"pending",
|
3380
3887
|
"event",
|
3381
3888
|
"score",
|
@@ -3394,6 +3901,10 @@
|
|
3394
3901
|
"title": "Timestamp",
|
3395
3902
|
"type": "string"
|
3396
3903
|
},
|
3904
|
+
"working_start": {
|
3905
|
+
"title": "Working Start",
|
3906
|
+
"type": "number"
|
3907
|
+
},
|
3397
3908
|
"pending": {
|
3398
3909
|
"anyOf": [
|
3399
3910
|
{
|
@@ -3422,6 +3933,7 @@
|
|
3422
3933
|
},
|
3423
3934
|
"required": [
|
3424
3935
|
"timestamp",
|
3936
|
+
"working_start",
|
3425
3937
|
"pending",
|
3426
3938
|
"event",
|
3427
3939
|
"changes"
|
@@ -3438,6 +3950,10 @@
|
|
3438
3950
|
"title": "Timestamp",
|
3439
3951
|
"type": "string"
|
3440
3952
|
},
|
3953
|
+
"working_start": {
|
3954
|
+
"title": "Working Start",
|
3955
|
+
"type": "number"
|
3956
|
+
},
|
3441
3957
|
"pending": {
|
3442
3958
|
"anyOf": [
|
3443
3959
|
{
|
@@ -3483,6 +3999,7 @@
|
|
3483
3999
|
},
|
3484
4000
|
"required": [
|
3485
4001
|
"timestamp",
|
4002
|
+
"working_start",
|
3486
4003
|
"pending",
|
3487
4004
|
"event",
|
3488
4005
|
"action",
|
@@ -3501,6 +4018,10 @@
|
|
3501
4018
|
"title": "Timestamp",
|
3502
4019
|
"type": "string"
|
3503
4020
|
},
|
4021
|
+
"working_start": {
|
4022
|
+
"title": "Working Start",
|
4023
|
+
"type": "number"
|
4024
|
+
},
|
3504
4025
|
"pending": {
|
3505
4026
|
"anyOf": [
|
3506
4027
|
{
|
@@ -3529,6 +4050,7 @@
|
|
3529
4050
|
},
|
3530
4051
|
"required": [
|
3531
4052
|
"timestamp",
|
4053
|
+
"working_start",
|
3532
4054
|
"pending",
|
3533
4055
|
"event",
|
3534
4056
|
"changes"
|
@@ -3545,6 +4067,10 @@
|
|
3545
4067
|
"title": "Timestamp",
|
3546
4068
|
"type": "string"
|
3547
4069
|
},
|
4070
|
+
"working_start": {
|
4071
|
+
"title": "Working Start",
|
4072
|
+
"type": "number"
|
4073
|
+
},
|
3548
4074
|
"pending": {
|
3549
4075
|
"anyOf": [
|
3550
4076
|
{
|
@@ -3596,6 +4122,9 @@
|
|
3596
4122
|
{
|
3597
4123
|
"$ref": "#/$defs/SampleLimitEvent"
|
3598
4124
|
},
|
4125
|
+
{
|
4126
|
+
"$ref": "#/$defs/SandboxEvent"
|
4127
|
+
},
|
3599
4128
|
{
|
3600
4129
|
"$ref": "#/$defs/StateEvent"
|
3601
4130
|
},
|
@@ -3636,17 +4165,45 @@
|
|
3636
4165
|
},
|
3637
4166
|
"title": "Events",
|
3638
4167
|
"type": "array"
|
4168
|
+
},
|
4169
|
+
"completed": {
|
4170
|
+
"anyOf": [
|
4171
|
+
{
|
4172
|
+
"format": "date-time",
|
4173
|
+
"type": "string"
|
4174
|
+
},
|
4175
|
+
{
|
4176
|
+
"type": "null"
|
4177
|
+
}
|
4178
|
+
],
|
4179
|
+
"default": null,
|
4180
|
+
"title": "Completed"
|
4181
|
+
},
|
4182
|
+
"working_time": {
|
4183
|
+
"anyOf": [
|
4184
|
+
{
|
4185
|
+
"type": "number"
|
4186
|
+
},
|
4187
|
+
{
|
4188
|
+
"type": "null"
|
4189
|
+
}
|
4190
|
+
],
|
4191
|
+
"default": null,
|
4192
|
+
"title": "Working Time"
|
3639
4193
|
}
|
3640
4194
|
},
|
3641
4195
|
"required": [
|
3642
4196
|
"timestamp",
|
4197
|
+
"working_start",
|
3643
4198
|
"pending",
|
3644
4199
|
"event",
|
3645
4200
|
"name",
|
3646
4201
|
"type",
|
3647
4202
|
"input",
|
3648
4203
|
"result",
|
3649
|
-
"events"
|
4204
|
+
"events",
|
4205
|
+
"completed",
|
4206
|
+
"working_time"
|
3650
4207
|
],
|
3651
4208
|
"title": "SubtaskEvent",
|
3652
4209
|
"type": "object",
|
@@ -3816,6 +4373,10 @@
|
|
3816
4373
|
"title": "Timestamp",
|
3817
4374
|
"type": "string"
|
3818
4375
|
},
|
4376
|
+
"working_start": {
|
4377
|
+
"title": "Working Start",
|
4378
|
+
"type": "number"
|
4379
|
+
},
|
3819
4380
|
"pending": {
|
3820
4381
|
"anyOf": [
|
3821
4382
|
{
|
@@ -3883,6 +4444,9 @@
|
|
3883
4444
|
{
|
3884
4445
|
"$ref": "#/$defs/ContentText"
|
3885
4446
|
},
|
4447
|
+
{
|
4448
|
+
"$ref": "#/$defs/ContentReasoning"
|
4449
|
+
},
|
3886
4450
|
{
|
3887
4451
|
"$ref": "#/$defs/ContentImage"
|
3888
4452
|
},
|
@@ -3898,6 +4462,9 @@
|
|
3898
4462
|
{
|
3899
4463
|
"$ref": "#/$defs/ContentText"
|
3900
4464
|
},
|
4465
|
+
{
|
4466
|
+
"$ref": "#/$defs/ContentReasoning"
|
4467
|
+
},
|
3901
4468
|
{
|
3902
4469
|
"$ref": "#/$defs/ContentImage"
|
3903
4470
|
},
|
@@ -3956,6 +4523,9 @@
|
|
3956
4523
|
{
|
3957
4524
|
"$ref": "#/$defs/SampleLimitEvent"
|
3958
4525
|
},
|
4526
|
+
{
|
4527
|
+
"$ref": "#/$defs/SandboxEvent"
|
4528
|
+
},
|
3959
4529
|
{
|
3960
4530
|
"$ref": "#/$defs/StateEvent"
|
3961
4531
|
},
|
@@ -3996,10 +4566,36 @@
|
|
3996
4566
|
},
|
3997
4567
|
"title": "Events",
|
3998
4568
|
"type": "array"
|
4569
|
+
},
|
4570
|
+
"completed": {
|
4571
|
+
"anyOf": [
|
4572
|
+
{
|
4573
|
+
"format": "date-time",
|
4574
|
+
"type": "string"
|
4575
|
+
},
|
4576
|
+
{
|
4577
|
+
"type": "null"
|
4578
|
+
}
|
4579
|
+
],
|
4580
|
+
"default": null,
|
4581
|
+
"title": "Completed"
|
4582
|
+
},
|
4583
|
+
"working_time": {
|
4584
|
+
"anyOf": [
|
4585
|
+
{
|
4586
|
+
"type": "number"
|
4587
|
+
},
|
4588
|
+
{
|
4589
|
+
"type": "null"
|
4590
|
+
}
|
4591
|
+
],
|
4592
|
+
"default": null,
|
4593
|
+
"title": "Working Time"
|
3999
4594
|
}
|
4000
4595
|
},
|
4001
4596
|
"required": [
|
4002
4597
|
"timestamp",
|
4598
|
+
"working_start",
|
4003
4599
|
"pending",
|
4004
4600
|
"event",
|
4005
4601
|
"type",
|
@@ -4010,7 +4606,9 @@
|
|
4010
4606
|
"result",
|
4011
4607
|
"truncated",
|
4012
4608
|
"error",
|
4013
|
-
"events"
|
4609
|
+
"events",
|
4610
|
+
"completed",
|
4611
|
+
"working_time"
|
4014
4612
|
],
|
4015
4613
|
"title": "ToolEvent",
|
4016
4614
|
"type": "object",
|