inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/cache.py +8 -7
- inspect_ai/_cli/common.py +0 -12
- inspect_ai/_cli/eval.py +32 -4
- inspect_ai/_cli/info.py +1 -0
- inspect_ai/_cli/list.py +1 -1
- inspect_ai/_cli/log.py +2 -0
- inspect_ai/_cli/main.py +1 -1
- inspect_ai/_cli/sandbox.py +4 -1
- inspect_ai/_cli/score.py +181 -32
- inspect_ai/_cli/trace.py +10 -0
- inspect_ai/_cli/view.py +4 -2
- inspect_ai/_display/core/active.py +2 -3
- inspect_ai/_display/core/config.py +7 -1
- inspect_ai/_display/textual/widgets/samples.py +4 -3
- inspect_ai/_display/textual/widgets/sandbox.py +6 -0
- inspect_ai/_eval/eval.py +104 -101
- inspect_ai/_eval/evalset.py +75 -75
- inspect_ai/_eval/loader.py +122 -12
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +14 -0
- inspect_ai/_eval/score.py +125 -36
- inspect_ai/_eval/task/log.py +105 -4
- inspect_ai/_eval/task/results.py +92 -38
- inspect_ai/_eval/task/run.py +9 -2
- inspect_ai/_eval/task/sandbox.py +35 -2
- inspect_ai/_eval/task/task.py +49 -46
- inspect_ai/_util/constants.py +1 -1
- inspect_ai/_util/content.py +8 -0
- inspect_ai/_util/error.py +2 -0
- inspect_ai/_util/file.py +15 -1
- inspect_ai/_util/hash.py +1 -1
- inspect_ai/_util/logger.py +4 -2
- inspect_ai/_util/registry.py +7 -1
- inspect_ai/_view/view.py +1 -2
- inspect_ai/_view/www/.vscode/extensions.json +3 -0
- inspect_ai/_view/www/.vscode/settings.json +8 -0
- inspect_ai/_view/www/App.css +97 -29
- inspect_ai/_view/www/README.md +1 -1
- inspect_ai/_view/www/dist/assets/index.css +16663 -14674
- inspect_ai/_view/www/dist/assets/index.js +58808 -51348
- inspect_ai/_view/www/dist/index.html +1 -1
- inspect_ai/_view/www/index.html +2 -2
- inspect_ai/_view/www/log-schema.json +87 -73
- inspect_ai/_view/www/package.json +22 -4
- inspect_ai/_view/www/postcss.config.cjs +8 -9
- inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
- inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
- inspect_ai/_view/www/src/api/api-browser.ts +2 -2
- inspect_ai/_view/www/src/api/api-http.ts +3 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
- inspect_ai/_view/www/src/api/client-api.ts +4 -4
- inspect_ai/_view/www/src/api/index.ts +4 -4
- inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
- inspect_ai/_view/www/src/appearance/colors.ts +9 -0
- inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
- inspect_ai/_view/www/src/appearance/icons.ts +100 -0
- inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
- inspect_ai/_view/www/src/components/Card.css +60 -0
- inspect_ai/_view/www/src/components/Card.tsx +109 -0
- inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
- inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
- inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
- inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
- inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
- inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
- inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
- inspect_ai/_view/www/src/components/FindBand.css +49 -0
- inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
- inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
- inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
- inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
- inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
- inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
- inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
- inspect_ai/_view/www/src/components/MessageBand.css +43 -0
- inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
- inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
- inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
- inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
- inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
- inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
- inspect_ai/_view/www/src/components/ToolButton.css +3 -0
- inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
- inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
- inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
- inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
- inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
- inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
- inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
- inspect_ai/_view/www/src/metadata/types.ts +18 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
- inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
- inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
- inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
- inspect_ai/_view/www/src/samples/error/error.ts +15 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
- inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
- inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
- inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
- inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
- inspect_ai/_view/www/src/types/log.d.ts +108 -19
- inspect_ai/_view/www/src/types/prism.d.ts +11 -0
- inspect_ai/_view/www/src/types.ts +71 -0
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
- inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
- inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
- inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
- inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
- inspect_ai/_view/www/src/utils/attachments.ts +42 -0
- inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
- inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
- inspect_ai/_view/www/src/utils/debugging.ts +28 -0
- inspect_ai/_view/www/src/utils/dom.ts +30 -0
- inspect_ai/_view/www/src/utils/format.ts +194 -0
- inspect_ai/_view/www/src/utils/git.ts +7 -0
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/_view/www/src/utils/http.ts +14 -0
- inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
- inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
- inspect_ai/_view/www/src/utils/queue.ts +51 -0
- inspect_ai/_view/www/src/utils/sync.ts +114 -0
- inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
- inspect_ai/_view/www/src/utils/vscode.ts +13 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
- inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
- inspect_ai/_view/www/src/workspace/types.ts +10 -0
- inspect_ai/_view/www/src/workspace/utils.ts +34 -0
- inspect_ai/_view/www/tsconfig.json +23 -9
- inspect_ai/_view/www/vite.config.js +8 -17
- inspect_ai/_view/www/yarn.lock +627 -556
- inspect_ai/approval/_approval.py +2 -0
- inspect_ai/approval/_approver.py +4 -4
- inspect_ai/approval/_auto.py +1 -1
- inspect_ai/approval/_human/approver.py +3 -0
- inspect_ai/approval/_policy.py +5 -0
- inspect_ai/approval/_registry.py +2 -2
- inspect_ai/dataset/_dataset.py +64 -37
- inspect_ai/dataset/_sources/__init__.py +0 -0
- inspect_ai/dataset/_sources/csv.py +20 -12
- inspect_ai/dataset/_sources/file.py +4 -0
- inspect_ai/dataset/_sources/hf.py +39 -29
- inspect_ai/dataset/_sources/json.py +17 -9
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_convert.py +3 -3
- inspect_ai/log/_file.py +24 -9
- inspect_ai/log/_log.py +101 -13
- inspect_ai/log/_message.py +4 -2
- inspect_ai/log/_recorders/file.py +4 -0
- inspect_ai/log/_recorders/json.py +5 -7
- inspect_ai/log/_recorders/recorder.py +3 -0
- inspect_ai/log/_transcript.py +19 -8
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_cache.py +39 -21
- inspect_ai/model/_call_tools.py +4 -3
- inspect_ai/model/_chat_message.py +14 -4
- inspect_ai/model/_generate_config.py +1 -1
- inspect_ai/model/_model.py +31 -24
- inspect_ai/model/_model_output.py +14 -1
- inspect_ai/model/_openai.py +10 -18
- inspect_ai/model/_providers/anthropic.py +3 -3
- inspect_ai/model/_providers/google.py +9 -5
- inspect_ai/model/_providers/openai.py +5 -9
- inspect_ai/model/_providers/openai_o1.py +3 -5
- inspect_ai/model/_providers/openrouter.py +86 -0
- inspect_ai/model/_providers/providers.py +11 -0
- inspect_ai/scorer/__init__.py +6 -1
- inspect_ai/scorer/_answer.py +7 -7
- inspect_ai/scorer/_classification.py +38 -18
- inspect_ai/scorer/_common.py +2 -8
- inspect_ai/scorer/_match.py +4 -5
- inspect_ai/scorer/_metric.py +87 -28
- inspect_ai/scorer/_metrics/__init__.py +3 -3
- inspect_ai/scorer/_metrics/accuracy.py +8 -10
- inspect_ai/scorer/_metrics/mean.py +3 -17
- inspect_ai/scorer/_metrics/std.py +111 -30
- inspect_ai/scorer/_model.py +12 -12
- inspect_ai/scorer/_pattern.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +36 -21
- inspect_ai/scorer/_reducer/registry.py +2 -2
- inspect_ai/scorer/_reducer/types.py +7 -1
- inspect_ai/scorer/_score.py +11 -1
- inspect_ai/scorer/_scorer.py +110 -16
- inspect_ai/solver/__init__.py +1 -1
- inspect_ai/solver/_basic_agent.py +19 -22
- inspect_ai/solver/_bridge/__init__.py +0 -3
- inspect_ai/solver/_bridge/bridge.py +3 -3
- inspect_ai/solver/_chain.py +1 -2
- inspect_ai/solver/_critique.py +3 -3
- inspect_ai/solver/_fork.py +2 -2
- inspect_ai/solver/_human_agent/__init__.py +0 -0
- inspect_ai/solver/_human_agent/agent.py +5 -8
- inspect_ai/solver/_human_agent/commands/clock.py +14 -10
- inspect_ai/solver/_human_agent/commands/note.py +1 -1
- inspect_ai/solver/_human_agent/commands/score.py +0 -11
- inspect_ai/solver/_multiple_choice.py +38 -26
- inspect_ai/solver/_prompt.py +7 -7
- inspect_ai/solver/_solver.py +53 -52
- inspect_ai/solver/_task_state.py +80 -69
- inspect_ai/solver/_use_tools.py +9 -9
- inspect_ai/tool/__init__.py +4 -1
- inspect_ai/tool/_tool.py +43 -14
- inspect_ai/tool/_tool_call.py +6 -2
- inspect_ai/tool/_tool_choice.py +3 -1
- inspect_ai/tool/_tool_def.py +10 -8
- inspect_ai/tool/_tool_params.py +24 -0
- inspect_ai/tool/_tool_with.py +7 -7
- inspect_ai/tool/_tools/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
- inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
- inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_execute.py +23 -11
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
- inspect_ai/tool/_tools/_web_search.py +7 -5
- inspect_ai/tool/beta.py +3 -0
- inspect_ai/util/_concurrency.py +3 -3
- inspect_ai/util/_panel.py +2 -0
- inspect_ai/util/_resource.py +12 -12
- inspect_ai/util/_sandbox/docker/compose.py +23 -20
- inspect_ai/util/_sandbox/docker/config.py +2 -1
- inspect_ai/util/_sandbox/docker/docker.py +42 -86
- inspect_ai/util/_sandbox/docker/service.py +100 -0
- inspect_ai/util/_sandbox/environment.py +99 -96
- inspect_ai/util/_sandbox/self_check.py +124 -16
- inspect_ai/util/_subprocess.py +5 -3
- inspect_ai/util/_subtask.py +15 -16
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
- inspect_ai-0.3.64.dist-info/RECORD +625 -0
- inspect_ai/_view/www/src/Register.mjs +0 -3
- inspect_ai/_view/www/src/Types.mjs +0 -38
- inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
- inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
- inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
- inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
- inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
- inspect_ai/_view/www/src/components/Card.mjs +0 -126
- inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
- inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
- inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
- inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
- inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
- inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
- inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
- inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
- inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
- inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
- inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
- inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
- inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
- inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
- inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
- inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
- inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
- inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
- inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
- inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
- inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
- inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
- inspect_ai/_view/www/src/components/Tools.mjs +0 -376
- inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
- inspect_ai/_view/www/src/components/ansi-output.js +0 -932
- inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
- inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
- inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
- inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
- inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
- inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
- inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
- inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
- inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
- inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
- inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
- inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
- inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
- inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
- inspect_ai/_view/www/src/utils/Format.mjs +0 -260
- inspect_ai/_view/www/src/utils/Git.mjs +0 -12
- inspect_ai/_view/www/src/utils/Html.mjs +0 -21
- inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
- inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
- inspect_ai/_view/www/src/utils/http.mjs +0 -18
- inspect_ai/_view/www/src/utils/queue.mjs +0 -67
- inspect_ai/_view/www/src/utils/sync.mjs +0 -101
- inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
- inspect_ai/tool/beta/__init__.py +0 -5
- inspect_ai-0.3.62.dist-info/RECORD +0 -481
- /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
- /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
- /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
inspect_ai/_view/www/index.html
CHANGED
@@ -19,8 +19,8 @@
|
|
19
19
|
</script>
|
20
20
|
</head>
|
21
21
|
|
22
|
-
<body style="min-width: 450px
|
22
|
+
<body style="min-width: 450px">
|
23
23
|
<div id="app"></div>
|
24
|
-
<script type="module" src="./src/index.
|
24
|
+
<script type="module" src="./src/index.tsx"></script>
|
25
25
|
</body>
|
26
26
|
</html>
|
@@ -157,6 +157,7 @@
|
|
157
157
|
"type": "object"
|
158
158
|
},
|
159
159
|
"ChatCompletionChoice": {
|
160
|
+
"description": "Choice generated for completion.",
|
160
161
|
"properties": {
|
161
162
|
"message": {
|
162
163
|
"$ref": "#/$defs/ChatMessageAssistant"
|
@@ -196,7 +197,14 @@
|
|
196
197
|
"additionalProperties": false
|
197
198
|
},
|
198
199
|
"ChatMessageAssistant": {
|
200
|
+
"description": "Assistant chat message.",
|
199
201
|
"properties": {
|
202
|
+
"role": {
|
203
|
+
"const": "assistant",
|
204
|
+
"default": "assistant",
|
205
|
+
"title": "Role",
|
206
|
+
"type": "string"
|
207
|
+
},
|
200
208
|
"content": {
|
201
209
|
"anyOf": [
|
202
210
|
{
|
@@ -240,12 +248,6 @@
|
|
240
248
|
"default": null,
|
241
249
|
"title": "Source"
|
242
250
|
},
|
243
|
-
"role": {
|
244
|
-
"const": "assistant",
|
245
|
-
"default": "assistant",
|
246
|
-
"title": "Role",
|
247
|
-
"type": "string"
|
248
|
-
},
|
249
251
|
"tool_calls": {
|
250
252
|
"anyOf": [
|
251
253
|
{
|
@@ -275,9 +277,9 @@
|
|
275
277
|
}
|
276
278
|
},
|
277
279
|
"required": [
|
280
|
+
"role",
|
278
281
|
"content",
|
279
282
|
"source",
|
280
|
-
"role",
|
281
283
|
"tool_calls",
|
282
284
|
"reasoning"
|
283
285
|
],
|
@@ -286,7 +288,14 @@
|
|
286
288
|
"additionalProperties": false
|
287
289
|
},
|
288
290
|
"ChatMessageSystem": {
|
291
|
+
"description": "System chat message.",
|
289
292
|
"properties": {
|
293
|
+
"role": {
|
294
|
+
"const": "system",
|
295
|
+
"default": "system",
|
296
|
+
"title": "Role",
|
297
|
+
"type": "string"
|
298
|
+
},
|
290
299
|
"content": {
|
291
300
|
"anyOf": [
|
292
301
|
{
|
@@ -329,25 +338,26 @@
|
|
329
338
|
],
|
330
339
|
"default": null,
|
331
340
|
"title": "Source"
|
332
|
-
},
|
333
|
-
"role": {
|
334
|
-
"const": "system",
|
335
|
-
"default": "system",
|
336
|
-
"title": "Role",
|
337
|
-
"type": "string"
|
338
341
|
}
|
339
342
|
},
|
340
343
|
"required": [
|
344
|
+
"role",
|
341
345
|
"content",
|
342
|
-
"source"
|
343
|
-
"role"
|
346
|
+
"source"
|
344
347
|
],
|
345
348
|
"title": "ChatMessageSystem",
|
346
349
|
"type": "object",
|
347
350
|
"additionalProperties": false
|
348
351
|
},
|
349
352
|
"ChatMessageTool": {
|
353
|
+
"description": "Tool chat message.",
|
350
354
|
"properties": {
|
355
|
+
"role": {
|
356
|
+
"const": "tool",
|
357
|
+
"default": "tool",
|
358
|
+
"title": "Role",
|
359
|
+
"type": "string"
|
360
|
+
},
|
351
361
|
"content": {
|
352
362
|
"anyOf": [
|
353
363
|
{
|
@@ -391,12 +401,6 @@
|
|
391
401
|
"default": null,
|
392
402
|
"title": "Source"
|
393
403
|
},
|
394
|
-
"role": {
|
395
|
-
"const": "tool",
|
396
|
-
"default": "tool",
|
397
|
-
"title": "Role",
|
398
|
-
"type": "string"
|
399
|
-
},
|
400
404
|
"tool_call_id": {
|
401
405
|
"anyOf": [
|
402
406
|
{
|
@@ -434,9 +438,9 @@
|
|
434
438
|
}
|
435
439
|
},
|
436
440
|
"required": [
|
441
|
+
"role",
|
437
442
|
"content",
|
438
443
|
"source",
|
439
|
-
"role",
|
440
444
|
"tool_call_id",
|
441
445
|
"function",
|
442
446
|
"error"
|
@@ -446,7 +450,14 @@
|
|
446
450
|
"additionalProperties": false
|
447
451
|
},
|
448
452
|
"ChatMessageUser": {
|
453
|
+
"description": "User chat message.",
|
449
454
|
"properties": {
|
455
|
+
"role": {
|
456
|
+
"const": "user",
|
457
|
+
"default": "user",
|
458
|
+
"title": "Role",
|
459
|
+
"type": "string"
|
460
|
+
},
|
450
461
|
"content": {
|
451
462
|
"anyOf": [
|
452
463
|
{
|
@@ -490,12 +501,6 @@
|
|
490
501
|
"default": null,
|
491
502
|
"title": "Source"
|
492
503
|
},
|
493
|
-
"role": {
|
494
|
-
"const": "user",
|
495
|
-
"default": "user",
|
496
|
-
"title": "Role",
|
497
|
-
"type": "string"
|
498
|
-
},
|
499
504
|
"tool_call_id": {
|
500
505
|
"anyOf": [
|
501
506
|
{
|
@@ -513,9 +518,9 @@
|
|
513
518
|
}
|
514
519
|
},
|
515
520
|
"required": [
|
521
|
+
"role",
|
516
522
|
"content",
|
517
523
|
"source",
|
518
|
-
"role",
|
519
524
|
"tool_call_id"
|
520
525
|
],
|
521
526
|
"title": "ChatMessageUser",
|
@@ -523,6 +528,7 @@
|
|
523
528
|
"additionalProperties": false
|
524
529
|
},
|
525
530
|
"ContentAudio": {
|
531
|
+
"description": "Audio content.",
|
526
532
|
"properties": {
|
527
533
|
"type": {
|
528
534
|
"const": "audio",
|
@@ -553,6 +559,7 @@
|
|
553
559
|
"additionalProperties": false
|
554
560
|
},
|
555
561
|
"ContentImage": {
|
562
|
+
"description": "Image content.",
|
556
563
|
"properties": {
|
557
564
|
"type": {
|
558
565
|
"const": "image",
|
@@ -585,6 +592,7 @@
|
|
585
592
|
"additionalProperties": false
|
586
593
|
},
|
587
594
|
"ContentText": {
|
595
|
+
"description": "Text content.",
|
588
596
|
"properties": {
|
589
597
|
"type": {
|
590
598
|
"const": "text",
|
@@ -606,6 +614,7 @@
|
|
606
614
|
"additionalProperties": false
|
607
615
|
},
|
608
616
|
"ContentVideo": {
|
617
|
+
"description": "Video content.",
|
609
618
|
"properties": {
|
610
619
|
"type": {
|
611
620
|
"const": "video",
|
@@ -677,6 +686,7 @@
|
|
677
686
|
"additionalProperties": false
|
678
687
|
},
|
679
688
|
"EvalConfig": {
|
689
|
+
"description": "Configuration used for evaluation.",
|
680
690
|
"properties": {
|
681
691
|
"limit": {
|
682
692
|
"anyOf": [
|
@@ -954,6 +964,7 @@
|
|
954
964
|
"additionalProperties": false
|
955
965
|
},
|
956
966
|
"EvalDataset": {
|
967
|
+
"description": "Dataset used for evaluation.",
|
957
968
|
"properties": {
|
958
969
|
"name": {
|
959
970
|
"anyOf": [
|
@@ -1038,6 +1049,7 @@
|
|
1038
1049
|
"additionalProperties": false
|
1039
1050
|
},
|
1040
1051
|
"EvalError": {
|
1052
|
+
"description": "Eval error details.",
|
1041
1053
|
"properties": {
|
1042
1054
|
"message": {
|
1043
1055
|
"title": "Message",
|
@@ -1062,6 +1074,7 @@
|
|
1062
1074
|
"additionalProperties": false
|
1063
1075
|
},
|
1064
1076
|
"EvalMetric": {
|
1077
|
+
"description": "Metric for evaluation score.",
|
1065
1078
|
"properties": {
|
1066
1079
|
"name": {
|
1067
1080
|
"title": "Name",
|
@@ -1078,8 +1091,8 @@
|
|
1078
1091
|
],
|
1079
1092
|
"title": "Value"
|
1080
1093
|
},
|
1081
|
-
"
|
1082
|
-
"title": "
|
1094
|
+
"params": {
|
1095
|
+
"title": "Params",
|
1083
1096
|
"type": "object"
|
1084
1097
|
},
|
1085
1098
|
"metadata": {
|
@@ -1098,7 +1111,7 @@
|
|
1098
1111
|
"required": [
|
1099
1112
|
"name",
|
1100
1113
|
"value",
|
1101
|
-
"
|
1114
|
+
"params",
|
1102
1115
|
"metadata"
|
1103
1116
|
],
|
1104
1117
|
"title": "EvalMetric",
|
@@ -1106,6 +1119,7 @@
|
|
1106
1119
|
"additionalProperties": false
|
1107
1120
|
},
|
1108
1121
|
"EvalPlan": {
|
1122
|
+
"description": "Plan (solvers) used in evaluation.",
|
1109
1123
|
"properties": {
|
1110
1124
|
"name": {
|
1111
1125
|
"default": "plan",
|
@@ -1171,6 +1185,7 @@
|
|
1171
1185
|
"additionalProperties": false
|
1172
1186
|
},
|
1173
1187
|
"EvalPlanStep": {
|
1188
|
+
"description": "Solver step.",
|
1174
1189
|
"properties": {
|
1175
1190
|
"solver": {
|
1176
1191
|
"title": "Solver",
|
@@ -1190,6 +1205,7 @@
|
|
1190
1205
|
"additionalProperties": false
|
1191
1206
|
},
|
1192
1207
|
"EvalResults": {
|
1208
|
+
"description": "Scoring results from evaluation.",
|
1193
1209
|
"properties": {
|
1194
1210
|
"total_samples": {
|
1195
1211
|
"default": 0,
|
@@ -1233,6 +1249,7 @@
|
|
1233
1249
|
"additionalProperties": false
|
1234
1250
|
},
|
1235
1251
|
"EvalRevision": {
|
1252
|
+
"description": "Git revision for evaluation.",
|
1236
1253
|
"properties": {
|
1237
1254
|
"type": {
|
1238
1255
|
"const": "git",
|
@@ -1258,6 +1275,7 @@
|
|
1258
1275
|
"additionalProperties": false
|
1259
1276
|
},
|
1260
1277
|
"EvalSample": {
|
1278
|
+
"description": "Sample from evaluation task.",
|
1261
1279
|
"properties": {
|
1262
1280
|
"id": {
|
1263
1281
|
"anyOf": [
|
@@ -1526,6 +1544,7 @@
|
|
1526
1544
|
"additionalProperties": false
|
1527
1545
|
},
|
1528
1546
|
"EvalSampleLimit": {
|
1547
|
+
"description": "Limit encontered by sample.",
|
1529
1548
|
"properties": {
|
1530
1549
|
"type": {
|
1531
1550
|
"enum": [
|
@@ -1553,6 +1572,7 @@
|
|
1553
1572
|
"additionalProperties": false
|
1554
1573
|
},
|
1555
1574
|
"EvalSampleReductions": {
|
1575
|
+
"description": "Score reductions.",
|
1556
1576
|
"properties": {
|
1557
1577
|
"scorer": {
|
1558
1578
|
"title": "Scorer",
|
@@ -1588,6 +1608,7 @@
|
|
1588
1608
|
"additionalProperties": false
|
1589
1609
|
},
|
1590
1610
|
"EvalSampleScore": {
|
1611
|
+
"description": "Score and sample_id scored.",
|
1591
1612
|
"properties": {
|
1592
1613
|
"value": {
|
1593
1614
|
"anyOf": [
|
@@ -1711,6 +1732,7 @@
|
|
1711
1732
|
"additionalProperties": false
|
1712
1733
|
},
|
1713
1734
|
"EvalScore": {
|
1735
|
+
"description": "Score for evaluation task.",
|
1714
1736
|
"properties": {
|
1715
1737
|
"name": {
|
1716
1738
|
"title": "Name",
|
@@ -1769,6 +1791,7 @@
|
|
1769
1791
|
"additionalProperties": false
|
1770
1792
|
},
|
1771
1793
|
"EvalSpec": {
|
1794
|
+
"description": "Eval target and configuration.",
|
1772
1795
|
"properties": {
|
1773
1796
|
"run_id": {
|
1774
1797
|
"title": "Run Id",
|
@@ -1945,6 +1968,7 @@
|
|
1945
1968
|
"additionalProperties": false
|
1946
1969
|
},
|
1947
1970
|
"EvalStats": {
|
1971
|
+
"description": "Timing and usage statistics.",
|
1948
1972
|
"properties": {
|
1949
1973
|
"started_at": {
|
1950
1974
|
"title": "Started At",
|
@@ -1972,7 +1996,7 @@
|
|
1972
1996
|
"additionalProperties": false
|
1973
1997
|
},
|
1974
1998
|
"GenerateConfig": {
|
1975
|
-
"description": "
|
1999
|
+
"description": "Model generation options.",
|
1976
2000
|
"properties": {
|
1977
2001
|
"max_retries": {
|
1978
2002
|
"anyOf": [
|
@@ -2321,6 +2345,18 @@
|
|
2321
2345
|
"title": "Event",
|
2322
2346
|
"type": "string"
|
2323
2347
|
},
|
2348
|
+
"source": {
|
2349
|
+
"anyOf": [
|
2350
|
+
{
|
2351
|
+
"type": "string"
|
2352
|
+
},
|
2353
|
+
{
|
2354
|
+
"type": "null"
|
2355
|
+
}
|
2356
|
+
],
|
2357
|
+
"default": null,
|
2358
|
+
"title": "Source"
|
2359
|
+
},
|
2324
2360
|
"data": {
|
2325
2361
|
"$ref": "#/$defs/JsonValue"
|
2326
2362
|
}
|
@@ -2329,6 +2365,7 @@
|
|
2329
2365
|
"timestamp",
|
2330
2366
|
"pending",
|
2331
2367
|
"event",
|
2368
|
+
"source",
|
2332
2369
|
"data"
|
2333
2370
|
],
|
2334
2371
|
"title": "InfoEvent",
|
@@ -2474,6 +2511,7 @@
|
|
2474
2511
|
"additionalProperties": false
|
2475
2512
|
},
|
2476
2513
|
"LoggingMessage": {
|
2514
|
+
"description": "Message written to Python log.",
|
2477
2515
|
"properties": {
|
2478
2516
|
"name": {
|
2479
2517
|
"anyOf": [
|
@@ -2490,6 +2528,7 @@
|
|
2490
2528
|
"level": {
|
2491
2529
|
"enum": [
|
2492
2530
|
"debug",
|
2531
|
+
"trace",
|
2493
2532
|
"http",
|
2494
2533
|
"sandbox",
|
2495
2534
|
"info",
|
@@ -2771,6 +2810,7 @@
|
|
2771
2810
|
"additionalProperties": false
|
2772
2811
|
},
|
2773
2812
|
"ModelOutput": {
|
2813
|
+
"description": "Output from model generation.",
|
2774
2814
|
"properties": {
|
2775
2815
|
"model": {
|
2776
2816
|
"title": "Model",
|
@@ -2845,6 +2885,7 @@
|
|
2845
2885
|
"additionalProperties": false
|
2846
2886
|
},
|
2847
2887
|
"ModelUsage": {
|
2888
|
+
"description": "Token usage for completion.",
|
2848
2889
|
"properties": {
|
2849
2890
|
"input_tokens": {
|
2850
2891
|
"default": 0,
|
@@ -2898,6 +2939,7 @@
|
|
2898
2939
|
"additionalProperties": false
|
2899
2940
|
},
|
2900
2941
|
"Sample": {
|
2942
|
+
"description": "Sample for an evaluation task.",
|
2901
2943
|
"properties": {
|
2902
2944
|
"input": {
|
2903
2945
|
"anyOf": [
|
@@ -3172,7 +3214,7 @@
|
|
3172
3214
|
"type": "array"
|
3173
3215
|
},
|
3174
3216
|
"Score": {
|
3175
|
-
"description": "Score generated by a scorer
|
3217
|
+
"description": "Score generated by a scorer.",
|
3176
3218
|
"properties": {
|
3177
3219
|
"value": {
|
3178
3220
|
"anyOf": [
|
@@ -3280,7 +3322,7 @@
|
|
3280
3322
|
"additionalProperties": false
|
3281
3323
|
},
|
3282
3324
|
"ScoreEvent": {
|
3283
|
-
"description": "Event with
|
3325
|
+
"description": "Event with score.\n\nCan be the final score for a `Sample`, or can be an intermediate score\nresulting from a call to `score`.",
|
3284
3326
|
"properties": {
|
3285
3327
|
"timestamp": {
|
3286
3328
|
"format": "date-time",
|
@@ -3325,6 +3367,11 @@
|
|
3325
3367
|
],
|
3326
3368
|
"default": null,
|
3327
3369
|
"title": "Target"
|
3370
|
+
},
|
3371
|
+
"intermediate": {
|
3372
|
+
"default": false,
|
3373
|
+
"title": "Intermediate",
|
3374
|
+
"type": "boolean"
|
3328
3375
|
}
|
3329
3376
|
},
|
3330
3377
|
"required": [
|
@@ -3332,7 +3379,8 @@
|
|
3332
3379
|
"pending",
|
3333
3380
|
"event",
|
3334
3381
|
"score",
|
3335
|
-
"target"
|
3382
|
+
"target",
|
3383
|
+
"intermediate"
|
3336
3384
|
],
|
3337
3385
|
"title": "ScoreEvent",
|
3338
3386
|
"type": "object",
|
@@ -4222,6 +4270,7 @@
|
|
4222
4270
|
"additionalProperties": false
|
4223
4271
|
}
|
4224
4272
|
},
|
4273
|
+
"description": "Evaluation log.",
|
4225
4274
|
"properties": {
|
4226
4275
|
"version": {
|
4227
4276
|
"default": 2,
|
@@ -4243,37 +4292,7 @@
|
|
4243
4292
|
"$ref": "#/$defs/EvalSpec"
|
4244
4293
|
},
|
4245
4294
|
"plan": {
|
4246
|
-
"$ref": "#/$defs/EvalPlan"
|
4247
|
-
"default": {
|
4248
|
-
"name": "plan",
|
4249
|
-
"steps": [],
|
4250
|
-
"finish": null,
|
4251
|
-
"config": {
|
4252
|
-
"best_of": null,
|
4253
|
-
"cache_prompt": null,
|
4254
|
-
"frequency_penalty": null,
|
4255
|
-
"internal_tools": null,
|
4256
|
-
"logit_bias": null,
|
4257
|
-
"logprobs": null,
|
4258
|
-
"max_connections": null,
|
4259
|
-
"max_retries": null,
|
4260
|
-
"max_tokens": null,
|
4261
|
-
"max_tool_output": null,
|
4262
|
-
"num_choices": null,
|
4263
|
-
"parallel_tool_calls": null,
|
4264
|
-
"presence_penalty": null,
|
4265
|
-
"reasoning_effort": null,
|
4266
|
-
"reasoning_history": null,
|
4267
|
-
"seed": null,
|
4268
|
-
"stop_seqs": null,
|
4269
|
-
"system_message": null,
|
4270
|
-
"temperature": null,
|
4271
|
-
"timeout": null,
|
4272
|
-
"top_k": null,
|
4273
|
-
"top_logprobs": null,
|
4274
|
-
"top_p": null
|
4275
|
-
}
|
4276
|
-
}
|
4295
|
+
"$ref": "#/$defs/EvalPlan"
|
4277
4296
|
},
|
4278
4297
|
"results": {
|
4279
4298
|
"anyOf": [
|
@@ -4287,12 +4306,7 @@
|
|
4287
4306
|
"default": null
|
4288
4307
|
},
|
4289
4308
|
"stats": {
|
4290
|
-
"$ref": "#/$defs/EvalStats"
|
4291
|
-
"default": {
|
4292
|
-
"started_at": "",
|
4293
|
-
"completed_at": "",
|
4294
|
-
"model_usage": {}
|
4295
|
-
}
|
4309
|
+
"$ref": "#/$defs/EvalStats"
|
4296
4310
|
},
|
4297
4311
|
"error": {
|
4298
4312
|
"anyOf": [
|
@@ -8,6 +8,7 @@
|
|
8
8
|
"scripts": {
|
9
9
|
"build": "vite build",
|
10
10
|
"watch": "vite build --watch",
|
11
|
+
"dev-watch": "NODE_ENV=development vite build --mode development --watch",
|
11
12
|
"dev": "vite",
|
12
13
|
"prettier:check": "prettier --check src",
|
13
14
|
"prettier:write": "prettier --write src",
|
@@ -18,18 +19,34 @@
|
|
18
19
|
},
|
19
20
|
"devDependencies": {
|
20
21
|
"@eslint/js": "^9.5.0",
|
22
|
+
"@types/bootstrap": "^5.2.10",
|
23
|
+
"@types/clipboard": "^2.0.10",
|
24
|
+
"@types/codemirror": "^5.60.15",
|
25
|
+
"@types/css-modules": "^1.0.5",
|
26
|
+
"@types/markdown-it": "^14.1.2",
|
27
|
+
"@types/prismjs": "^1.26.5",
|
28
|
+
"@types/react": "^19.0.7",
|
29
|
+
"@types/react-dom": "^19.0.3",
|
30
|
+
"@vitejs/plugin-react": "^4.3.4",
|
21
31
|
"eslint": "9.x",
|
22
32
|
"globals": "^15.6.0",
|
23
33
|
"prettier": "^3.3.3",
|
24
|
-
"
|
25
|
-
"vite
|
34
|
+
"typescript": "^5.7.3",
|
35
|
+
"vite": "^5.3.2"
|
26
36
|
},
|
27
37
|
"dependencies": {
|
38
|
+
"@codemirror/autocomplete": "^6.18.4",
|
39
|
+
"@codemirror/language": "^6.10.8",
|
40
|
+
"@codemirror/lint": "^6.8.4",
|
41
|
+
"@codemirror/state": "^6.5.1",
|
42
|
+
"@lezer/highlight": "^1.2.1",
|
28
43
|
"@popperjs/core": "^2.11.8",
|
29
44
|
"asciinema-player": "^3.8.2",
|
45
|
+
"ansi-output": "^0.0.9",
|
30
46
|
"bootstrap": "^5.3.3",
|
31
47
|
"bootstrap-icons": "^1.11.3",
|
32
48
|
"clipboard": "^2.0.11",
|
49
|
+
"clsx": "^2.1.1",
|
33
50
|
"codemirror": "^6.0.1",
|
34
51
|
"fast-json-patch": "^3.1.1",
|
35
52
|
"fflate": "^0.8.2",
|
@@ -41,7 +58,8 @@
|
|
41
58
|
"markdown-it": "^14.1.0",
|
42
59
|
"murmurhash": "^2.0.1",
|
43
60
|
"postcss-url": "^10.1.3",
|
44
|
-
"
|
45
|
-
"
|
61
|
+
"prismjs": "^1.29.0",
|
62
|
+
"react": "^19.0.0",
|
63
|
+
"react-dom": "^19.0.0"
|
46
64
|
}
|
47
65
|
}
|
@@ -1,11 +1,10 @@
|
|
1
|
-
|
2
1
|
// postcss.config.js
|
3
2
|
module.exports = {
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
3
|
+
plugins: [
|
4
|
+
require("postcss-url")({
|
5
|
+
url: "inline", // Inline all assets
|
6
|
+
maxSize: Infinity, // Maximum file size to inline (in kilobytes). Adjust as needed.
|
7
|
+
fallback: "copy", // Copy files to output directory if they are larger than the maxSize
|
8
|
+
}),
|
9
|
+
],
|
10
|
+
};
|