inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/cache.py +8 -7
- inspect_ai/_cli/common.py +0 -12
- inspect_ai/_cli/eval.py +32 -4
- inspect_ai/_cli/info.py +1 -0
- inspect_ai/_cli/list.py +1 -1
- inspect_ai/_cli/log.py +2 -0
- inspect_ai/_cli/main.py +1 -1
- inspect_ai/_cli/sandbox.py +4 -1
- inspect_ai/_cli/score.py +181 -32
- inspect_ai/_cli/trace.py +10 -0
- inspect_ai/_cli/view.py +4 -2
- inspect_ai/_display/core/active.py +2 -3
- inspect_ai/_display/core/config.py +7 -1
- inspect_ai/_display/textual/widgets/samples.py +4 -3
- inspect_ai/_display/textual/widgets/sandbox.py +6 -0
- inspect_ai/_eval/eval.py +104 -101
- inspect_ai/_eval/evalset.py +75 -75
- inspect_ai/_eval/loader.py +122 -12
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +14 -0
- inspect_ai/_eval/score.py +125 -36
- inspect_ai/_eval/task/log.py +105 -4
- inspect_ai/_eval/task/results.py +92 -38
- inspect_ai/_eval/task/run.py +9 -2
- inspect_ai/_eval/task/sandbox.py +35 -2
- inspect_ai/_eval/task/task.py +49 -46
- inspect_ai/_util/constants.py +1 -1
- inspect_ai/_util/content.py +8 -0
- inspect_ai/_util/error.py +2 -0
- inspect_ai/_util/file.py +15 -1
- inspect_ai/_util/hash.py +1 -1
- inspect_ai/_util/logger.py +4 -2
- inspect_ai/_util/registry.py +7 -1
- inspect_ai/_view/view.py +1 -2
- inspect_ai/_view/www/.vscode/extensions.json +3 -0
- inspect_ai/_view/www/.vscode/settings.json +8 -0
- inspect_ai/_view/www/App.css +97 -29
- inspect_ai/_view/www/README.md +1 -1
- inspect_ai/_view/www/dist/assets/index.css +16663 -14674
- inspect_ai/_view/www/dist/assets/index.js +58808 -51348
- inspect_ai/_view/www/dist/index.html +1 -1
- inspect_ai/_view/www/index.html +2 -2
- inspect_ai/_view/www/log-schema.json +87 -73
- inspect_ai/_view/www/package.json +22 -4
- inspect_ai/_view/www/postcss.config.cjs +8 -9
- inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
- inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
- inspect_ai/_view/www/src/api/api-browser.ts +2 -2
- inspect_ai/_view/www/src/api/api-http.ts +3 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
- inspect_ai/_view/www/src/api/client-api.ts +4 -4
- inspect_ai/_view/www/src/api/index.ts +4 -4
- inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
- inspect_ai/_view/www/src/appearance/colors.ts +9 -0
- inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
- inspect_ai/_view/www/src/appearance/icons.ts +100 -0
- inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
- inspect_ai/_view/www/src/components/Card.css +60 -0
- inspect_ai/_view/www/src/components/Card.tsx +109 -0
- inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
- inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
- inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
- inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
- inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
- inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
- inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
- inspect_ai/_view/www/src/components/FindBand.css +49 -0
- inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
- inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
- inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
- inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
- inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
- inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
- inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
- inspect_ai/_view/www/src/components/MessageBand.css +43 -0
- inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
- inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
- inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
- inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
- inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
- inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
- inspect_ai/_view/www/src/components/ToolButton.css +3 -0
- inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
- inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
- inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
- inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
- inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
- inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
- inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
- inspect_ai/_view/www/src/metadata/types.ts +18 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
- inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
- inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
- inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
- inspect_ai/_view/www/src/samples/error/error.ts +15 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
- inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
- inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
- inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
- inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
- inspect_ai/_view/www/src/types/log.d.ts +108 -19
- inspect_ai/_view/www/src/types/prism.d.ts +11 -0
- inspect_ai/_view/www/src/types.ts +71 -0
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
- inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
- inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
- inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
- inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
- inspect_ai/_view/www/src/utils/attachments.ts +42 -0
- inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
- inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
- inspect_ai/_view/www/src/utils/debugging.ts +28 -0
- inspect_ai/_view/www/src/utils/dom.ts +30 -0
- inspect_ai/_view/www/src/utils/format.ts +194 -0
- inspect_ai/_view/www/src/utils/git.ts +7 -0
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/_view/www/src/utils/http.ts +14 -0
- inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
- inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
- inspect_ai/_view/www/src/utils/queue.ts +51 -0
- inspect_ai/_view/www/src/utils/sync.ts +114 -0
- inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
- inspect_ai/_view/www/src/utils/vscode.ts +13 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
- inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
- inspect_ai/_view/www/src/workspace/types.ts +10 -0
- inspect_ai/_view/www/src/workspace/utils.ts +34 -0
- inspect_ai/_view/www/tsconfig.json +23 -9
- inspect_ai/_view/www/vite.config.js +8 -17
- inspect_ai/_view/www/yarn.lock +627 -556
- inspect_ai/approval/_approval.py +2 -0
- inspect_ai/approval/_approver.py +4 -4
- inspect_ai/approval/_auto.py +1 -1
- inspect_ai/approval/_human/approver.py +3 -0
- inspect_ai/approval/_policy.py +5 -0
- inspect_ai/approval/_registry.py +2 -2
- inspect_ai/dataset/_dataset.py +64 -37
- inspect_ai/dataset/_sources/__init__.py +0 -0
- inspect_ai/dataset/_sources/csv.py +20 -12
- inspect_ai/dataset/_sources/file.py +4 -0
- inspect_ai/dataset/_sources/hf.py +39 -29
- inspect_ai/dataset/_sources/json.py +17 -9
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_convert.py +3 -3
- inspect_ai/log/_file.py +24 -9
- inspect_ai/log/_log.py +101 -13
- inspect_ai/log/_message.py +4 -2
- inspect_ai/log/_recorders/file.py +4 -0
- inspect_ai/log/_recorders/json.py +5 -7
- inspect_ai/log/_recorders/recorder.py +3 -0
- inspect_ai/log/_transcript.py +19 -8
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_cache.py +39 -21
- inspect_ai/model/_call_tools.py +4 -3
- inspect_ai/model/_chat_message.py +14 -4
- inspect_ai/model/_generate_config.py +1 -1
- inspect_ai/model/_model.py +31 -24
- inspect_ai/model/_model_output.py +14 -1
- inspect_ai/model/_openai.py +10 -18
- inspect_ai/model/_providers/anthropic.py +3 -3
- inspect_ai/model/_providers/google.py +9 -5
- inspect_ai/model/_providers/openai.py +5 -9
- inspect_ai/model/_providers/openai_o1.py +3 -5
- inspect_ai/model/_providers/openrouter.py +86 -0
- inspect_ai/model/_providers/providers.py +11 -0
- inspect_ai/scorer/__init__.py +6 -1
- inspect_ai/scorer/_answer.py +7 -7
- inspect_ai/scorer/_classification.py +38 -18
- inspect_ai/scorer/_common.py +2 -8
- inspect_ai/scorer/_match.py +4 -5
- inspect_ai/scorer/_metric.py +87 -28
- inspect_ai/scorer/_metrics/__init__.py +3 -3
- inspect_ai/scorer/_metrics/accuracy.py +8 -10
- inspect_ai/scorer/_metrics/mean.py +3 -17
- inspect_ai/scorer/_metrics/std.py +111 -30
- inspect_ai/scorer/_model.py +12 -12
- inspect_ai/scorer/_pattern.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +36 -21
- inspect_ai/scorer/_reducer/registry.py +2 -2
- inspect_ai/scorer/_reducer/types.py +7 -1
- inspect_ai/scorer/_score.py +11 -1
- inspect_ai/scorer/_scorer.py +110 -16
- inspect_ai/solver/__init__.py +1 -1
- inspect_ai/solver/_basic_agent.py +19 -22
- inspect_ai/solver/_bridge/__init__.py +0 -3
- inspect_ai/solver/_bridge/bridge.py +3 -3
- inspect_ai/solver/_chain.py +1 -2
- inspect_ai/solver/_critique.py +3 -3
- inspect_ai/solver/_fork.py +2 -2
- inspect_ai/solver/_human_agent/__init__.py +0 -0
- inspect_ai/solver/_human_agent/agent.py +5 -8
- inspect_ai/solver/_human_agent/commands/clock.py +14 -10
- inspect_ai/solver/_human_agent/commands/note.py +1 -1
- inspect_ai/solver/_human_agent/commands/score.py +0 -11
- inspect_ai/solver/_multiple_choice.py +38 -26
- inspect_ai/solver/_prompt.py +7 -7
- inspect_ai/solver/_solver.py +53 -52
- inspect_ai/solver/_task_state.py +80 -69
- inspect_ai/solver/_use_tools.py +9 -9
- inspect_ai/tool/__init__.py +4 -1
- inspect_ai/tool/_tool.py +43 -14
- inspect_ai/tool/_tool_call.py +6 -2
- inspect_ai/tool/_tool_choice.py +3 -1
- inspect_ai/tool/_tool_def.py +10 -8
- inspect_ai/tool/_tool_params.py +24 -0
- inspect_ai/tool/_tool_with.py +7 -7
- inspect_ai/tool/_tools/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
- inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
- inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_execute.py +23 -11
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
- inspect_ai/tool/_tools/_web_search.py +7 -5
- inspect_ai/tool/beta.py +3 -0
- inspect_ai/util/_concurrency.py +3 -3
- inspect_ai/util/_panel.py +2 -0
- inspect_ai/util/_resource.py +12 -12
- inspect_ai/util/_sandbox/docker/compose.py +23 -20
- inspect_ai/util/_sandbox/docker/config.py +2 -1
- inspect_ai/util/_sandbox/docker/docker.py +42 -86
- inspect_ai/util/_sandbox/docker/service.py +100 -0
- inspect_ai/util/_sandbox/environment.py +99 -96
- inspect_ai/util/_sandbox/self_check.py +124 -16
- inspect_ai/util/_subprocess.py +5 -3
- inspect_ai/util/_subtask.py +15 -16
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
- inspect_ai-0.3.64.dist-info/RECORD +625 -0
- inspect_ai/_view/www/src/Register.mjs +0 -3
- inspect_ai/_view/www/src/Types.mjs +0 -38
- inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
- inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
- inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
- inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
- inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
- inspect_ai/_view/www/src/components/Card.mjs +0 -126
- inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
- inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
- inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
- inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
- inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
- inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
- inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
- inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
- inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
- inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
- inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
- inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
- inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
- inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
- inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
- inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
- inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
- inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
- inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
- inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
- inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
- inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
- inspect_ai/_view/www/src/components/Tools.mjs +0 -376
- inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
- inspect_ai/_view/www/src/components/ansi-output.js +0 -932
- inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
- inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
- inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
- inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
- inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
- inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
- inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
- inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
- inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
- inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
- inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
- inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
- inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
- inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
- inspect_ai/_view/www/src/utils/Format.mjs +0 -260
- inspect_ai/_view/www/src/utils/Git.mjs +0 -12
- inspect_ai/_view/www/src/utils/Html.mjs +0 -21
- inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
- inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
- inspect_ai/_view/www/src/utils/http.mjs +0 -18
- inspect_ai/_view/www/src/utils/queue.mjs +0 -67
- inspect_ai/_view/www/src/utils/sync.mjs +0 -101
- inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
- inspect_ai/tool/beta/__init__.py +0 -5
- inspect_ai-0.3.62.dist-info/RECORD +0 -481
- /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
- /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
- /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -112,6 +112,7 @@ export type Input =
|
|
112
112
|
| ChatMessageAssistant
|
113
113
|
| ChatMessageTool
|
114
114
|
)[];
|
115
|
+
export type Role = "system";
|
115
116
|
export type Content =
|
116
117
|
| string
|
117
118
|
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
@@ -127,18 +128,17 @@ export type Type4 = "video";
|
|
127
128
|
export type Video = string;
|
128
129
|
export type Format1 = "mp4" | "mpeg" | "mov";
|
129
130
|
export type Source = ("input" | "generate") | null;
|
130
|
-
export type
|
131
|
+
export type Role1 = "user";
|
131
132
|
export type Content1 =
|
132
133
|
| string
|
133
134
|
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
134
135
|
export type Source1 = ("input" | "generate") | null;
|
135
|
-
export type Role1 = "user";
|
136
136
|
export type ToolCallId = string[] | null;
|
137
|
+
export type Role2 = "assistant";
|
137
138
|
export type Content2 =
|
138
139
|
| string
|
139
140
|
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
140
141
|
export type Source2 = ("input" | "generate") | null;
|
141
|
-
export type Role2 = "assistant";
|
142
142
|
export type ToolCalls = ToolCall[] | null;
|
143
143
|
export type Id1 = string;
|
144
144
|
export type Function = string;
|
@@ -148,11 +148,11 @@ export type Title = string | null;
|
|
148
148
|
export type Format2 = "text" | "markdown";
|
149
149
|
export type Content3 = string;
|
150
150
|
export type Reasoning = string | null;
|
151
|
+
export type Role3 = "tool";
|
151
152
|
export type Content4 =
|
152
153
|
| string
|
153
154
|
| (ContentText | ContentImage | ContentAudio | ContentVideo)[];
|
154
155
|
export type Source3 = ("input" | "generate") | null;
|
155
|
-
export type Role3 = "tool";
|
156
156
|
export type ToolCallId1 = string | null;
|
157
157
|
export type Function1 = string | null;
|
158
158
|
export type Type6 =
|
@@ -315,6 +315,7 @@ export type Timestamp8 = string;
|
|
315
315
|
export type Pending8 = boolean | null;
|
316
316
|
export type Event8 = "score";
|
317
317
|
export type Target2 = string | string[] | null;
|
318
|
+
export type Intermediate = boolean;
|
318
319
|
export type Timestamp9 = string;
|
319
320
|
export type Pending9 = boolean | null;
|
320
321
|
export type Event9 = "error";
|
@@ -324,6 +325,7 @@ export type Event10 = "logger";
|
|
324
325
|
export type Name7 = string | null;
|
325
326
|
export type Level =
|
326
327
|
| "debug"
|
328
|
+
| "trace"
|
327
329
|
| "http"
|
328
330
|
| "sandbox"
|
329
331
|
| "info"
|
@@ -338,6 +340,7 @@ export type Lineno = number;
|
|
338
340
|
export type Timestamp11 = string;
|
339
341
|
export type Pending11 = boolean | null;
|
340
342
|
export type Event11 = "info";
|
343
|
+
export type Source4 = string | null;
|
341
344
|
export type Timestamp12 = string;
|
342
345
|
export type Pending12 = boolean | null;
|
343
346
|
export type Event12 = "step";
|
@@ -423,6 +426,9 @@ export type SampleId1 = string | number | null;
|
|
423
426
|
export type Samples2 = EvalSampleScore[];
|
424
427
|
export type Location1 = string;
|
425
428
|
|
429
|
+
/**
|
430
|
+
* Evaluation log.
|
431
|
+
*/
|
426
432
|
export interface EvalLog {
|
427
433
|
version?: Version;
|
428
434
|
status?: Status;
|
@@ -435,6 +441,9 @@ export interface EvalLog {
|
|
435
441
|
reductions?: Reductions;
|
436
442
|
location?: Location1;
|
437
443
|
}
|
444
|
+
/**
|
445
|
+
* Eval target and configuration.
|
446
|
+
*/
|
438
447
|
export interface EvalSpec {
|
439
448
|
run_id: RunId;
|
440
449
|
created: Created;
|
@@ -459,6 +468,9 @@ export interface EvalSpec {
|
|
459
468
|
}
|
460
469
|
export interface TaskAttribs {}
|
461
470
|
export interface TaskArgs {}
|
471
|
+
/**
|
472
|
+
* Dataset used for evaluation.
|
473
|
+
*/
|
462
474
|
export interface EvalDataset {
|
463
475
|
name: Name;
|
464
476
|
location: Location;
|
@@ -467,6 +479,9 @@ export interface EvalDataset {
|
|
467
479
|
shuffled: Shuffled;
|
468
480
|
}
|
469
481
|
export interface ModelArgs {}
|
482
|
+
/**
|
483
|
+
* Configuration used for evaluation.
|
484
|
+
*/
|
470
485
|
export interface EvalConfig {
|
471
486
|
limit: Limit;
|
472
487
|
sample_id: SampleId;
|
@@ -512,6 +527,9 @@ export interface ApproverPolicyConfig {
|
|
512
527
|
params: Params;
|
513
528
|
}
|
514
529
|
export interface Params {}
|
530
|
+
/**
|
531
|
+
* Git revision for evaluation.
|
532
|
+
*/
|
515
533
|
export interface EvalRevision {
|
516
534
|
type: Type;
|
517
535
|
origin: Origin;
|
@@ -520,19 +538,25 @@ export interface EvalRevision {
|
|
520
538
|
export interface Packages {
|
521
539
|
[k: string]: string;
|
522
540
|
}
|
541
|
+
/**
|
542
|
+
* Plan (solvers) used in evaluation.
|
543
|
+
*/
|
523
544
|
export interface EvalPlan {
|
524
545
|
name: Name2;
|
525
546
|
steps: Steps;
|
526
547
|
finish: EvalPlanStep | null;
|
527
548
|
config: GenerateConfig;
|
528
549
|
}
|
550
|
+
/**
|
551
|
+
* Solver step.
|
552
|
+
*/
|
529
553
|
export interface EvalPlanStep {
|
530
554
|
solver: Solver1;
|
531
555
|
params: Params1;
|
532
556
|
}
|
533
557
|
export interface Params1 {}
|
534
558
|
/**
|
535
|
-
*
|
559
|
+
* Model generation options.
|
536
560
|
*/
|
537
561
|
export interface GenerateConfig {
|
538
562
|
max_retries: MaxRetries;
|
@@ -559,12 +583,18 @@ export interface GenerateConfig {
|
|
559
583
|
reasoning_effort: ReasoningEffort;
|
560
584
|
reasoning_history: ReasoningHistory;
|
561
585
|
}
|
586
|
+
/**
|
587
|
+
* Scoring results from evaluation.
|
588
|
+
*/
|
562
589
|
export interface EvalResults {
|
563
590
|
total_samples: TotalSamples;
|
564
591
|
completed_samples: CompletedSamples;
|
565
592
|
scores: Scores;
|
566
593
|
metadata: Metadata3;
|
567
594
|
}
|
595
|
+
/**
|
596
|
+
* Score for evaluation task.
|
597
|
+
*/
|
568
598
|
export interface EvalScore {
|
569
599
|
name: Name3;
|
570
600
|
scorer: Scorer;
|
@@ -577,13 +607,19 @@ export interface Params2 {}
|
|
577
607
|
export interface Metrics {
|
578
608
|
[k: string]: EvalMetric;
|
579
609
|
}
|
610
|
+
/**
|
611
|
+
* Metric for evaluation score.
|
612
|
+
*/
|
580
613
|
export interface EvalMetric {
|
581
614
|
name: Name4;
|
582
615
|
value: Value;
|
583
|
-
|
616
|
+
params: Params3;
|
584
617
|
metadata: Metadata1;
|
585
618
|
}
|
586
|
-
export interface
|
619
|
+
export interface Params3 {}
|
620
|
+
/**
|
621
|
+
* Timing and usage statistics.
|
622
|
+
*/
|
587
623
|
export interface EvalStats {
|
588
624
|
started_at: StartedAt;
|
589
625
|
completed_at: CompletedAt;
|
@@ -592,6 +628,9 @@ export interface EvalStats {
|
|
592
628
|
export interface ModelUsage {
|
593
629
|
[k: string]: ModelUsage1;
|
594
630
|
}
|
631
|
+
/**
|
632
|
+
* Token usage for completion.
|
633
|
+
*/
|
595
634
|
export interface ModelUsage1 {
|
596
635
|
input_tokens: InputTokens;
|
597
636
|
output_tokens: OutputTokens;
|
@@ -599,11 +638,17 @@ export interface ModelUsage1 {
|
|
599
638
|
input_tokens_cache_write: InputTokensCacheWrite;
|
600
639
|
input_tokens_cache_read: InputTokensCacheRead;
|
601
640
|
}
|
641
|
+
/**
|
642
|
+
* Eval error details.
|
643
|
+
*/
|
602
644
|
export interface EvalError {
|
603
645
|
message: Message;
|
604
646
|
traceback: Traceback;
|
605
647
|
traceback_ansi: TracebackAnsi;
|
606
648
|
}
|
649
|
+
/**
|
650
|
+
* Sample from evaluation task.
|
651
|
+
*/
|
607
652
|
export interface EvalSample {
|
608
653
|
id: Id;
|
609
654
|
epoch: Epoch;
|
@@ -624,40 +669,61 @@ export interface EvalSample {
|
|
624
669
|
attachments: Attachments;
|
625
670
|
limit: EvalSampleLimit | null;
|
626
671
|
}
|
672
|
+
/**
|
673
|
+
* System chat message.
|
674
|
+
*/
|
627
675
|
export interface ChatMessageSystem {
|
676
|
+
role: Role;
|
628
677
|
content: Content;
|
629
678
|
source: Source;
|
630
|
-
role: Role;
|
631
679
|
}
|
680
|
+
/**
|
681
|
+
* Text content.
|
682
|
+
*/
|
632
683
|
export interface ContentText {
|
633
684
|
type: Type1;
|
634
685
|
text: Text;
|
635
686
|
}
|
687
|
+
/**
|
688
|
+
* Image content.
|
689
|
+
*/
|
636
690
|
export interface ContentImage {
|
637
691
|
type: Type2;
|
638
692
|
image: Image;
|
639
693
|
detail: Detail;
|
640
694
|
}
|
695
|
+
/**
|
696
|
+
* Audio content.
|
697
|
+
*/
|
641
698
|
export interface ContentAudio {
|
642
699
|
type: Type3;
|
643
700
|
audio: Audio;
|
644
701
|
format: Format;
|
645
702
|
}
|
703
|
+
/**
|
704
|
+
* Video content.
|
705
|
+
*/
|
646
706
|
export interface ContentVideo {
|
647
707
|
type: Type4;
|
648
708
|
video: Video;
|
649
709
|
format: Format1;
|
650
710
|
}
|
711
|
+
/**
|
712
|
+
* User chat message.
|
713
|
+
*/
|
651
714
|
export interface ChatMessageUser {
|
715
|
+
role: Role1;
|
652
716
|
content: Content1;
|
653
717
|
source: Source1;
|
654
|
-
role: Role1;
|
655
718
|
tool_call_id: ToolCallId;
|
656
719
|
}
|
720
|
+
/**
|
721
|
+
* Assistant chat message.
|
722
|
+
*/
|
657
723
|
export interface ChatMessageAssistant {
|
724
|
+
role: Role2;
|
658
725
|
content: Content2;
|
659
726
|
source: Source2;
|
660
|
-
role: Role2;
|
661
727
|
tool_calls: ToolCalls;
|
662
728
|
reasoning: Reasoning;
|
663
729
|
}
|
@@ -678,10 +744,13 @@ export interface ToolCallContent {
|
|
678
744
|
format: Format2;
|
679
745
|
content: Content3;
|
680
746
|
}
|
747
|
+
/**
|
748
|
+
* Tool chat message.
|
749
|
+
*/
|
681
750
|
export interface ChatMessageTool {
|
751
|
+
role: Role3;
|
682
752
|
content: Content4;
|
683
753
|
source: Source3;
|
684
|
-
role: Role3;
|
685
754
|
tool_call_id: ToolCallId1;
|
686
755
|
function: Function1;
|
687
756
|
error: ToolCallError | null;
|
@@ -690,6 +759,9 @@ export interface ToolCallError {
|
|
690
759
|
type: Type6;
|
691
760
|
message: Message1;
|
692
761
|
}
|
762
|
+
/**
|
763
|
+
* Output from model generation.
|
764
|
+
*/
|
693
765
|
export interface ModelOutput {
|
694
766
|
model: Model1;
|
695
767
|
choices: Choices1;
|
@@ -698,6 +770,9 @@ export interface ModelOutput {
|
|
698
770
|
metadata: Metadata4;
|
699
771
|
error: Error;
|
700
772
|
}
|
773
|
+
/**
|
774
|
+
* Choice generated for completion.
|
775
|
+
*/
|
701
776
|
export interface ChatCompletionChoice {
|
702
777
|
message: ChatMessageAssistant;
|
703
778
|
stop_reason: StopReason;
|
@@ -728,12 +803,6 @@ export interface TopLogprob {
|
|
728
803
|
}
|
729
804
|
/**
|
730
805
|
* Score generated by a scorer.
|
731
|
-
*
|
732
|
-
* Args:
|
733
|
-
* value (Value): Score value.
|
734
|
-
* answer (str | None): Answer extracted from model output (optional).
|
735
|
-
* explanation (str | None): Explanation of score (optional).
|
736
|
-
* metadata (dict[str,Any]): Additional metadata related to the score.
|
737
806
|
*/
|
738
807
|
export interface Score {
|
739
808
|
value: Value1;
|
@@ -753,6 +822,9 @@ export interface SampleInitEvent {
|
|
753
822
|
sample: Sample;
|
754
823
|
state: JsonValue;
|
755
824
|
}
|
825
|
+
/**
|
826
|
+
* Sample for an evaluation task.
|
827
|
+
*/
|
756
828
|
export interface Sample {
|
757
829
|
input: Input1;
|
758
830
|
choices: Choices2;
|
@@ -887,7 +959,7 @@ export interface ToolFunction {
|
|
887
959
|
name: Name6;
|
888
960
|
}
|
889
961
|
/**
|
890
|
-
*
|
962
|
+
* Model generation options.
|
891
963
|
*/
|
892
964
|
export interface GenerateConfig1 {
|
893
965
|
max_retries: MaxRetries;
|
@@ -983,7 +1055,10 @@ export interface InputEvent {
|
|
983
1055
|
input_ansi: InputAnsi;
|
984
1056
|
}
|
985
1057
|
/**
|
986
|
-
* Event with
|
1058
|
+
* Event with score.
|
1059
|
+
*
|
1060
|
+
* Can be the final score for a `Sample`, or can be an intermediate score
|
1061
|
+
* resulting from a call to `score`.
|
987
1062
|
*/
|
988
1063
|
export interface ScoreEvent {
|
989
1064
|
timestamp: Timestamp8;
|
@@ -991,6 +1066,7 @@ export interface ScoreEvent {
|
|
991
1066
|
event: Event8;
|
992
1067
|
score: Score;
|
993
1068
|
target: Target2;
|
1069
|
+
intermediate: Intermediate;
|
994
1070
|
}
|
995
1071
|
/**
|
996
1072
|
* Event with sample error.
|
@@ -1010,6 +1086,9 @@ export interface LoggerEvent {
|
|
1010
1086
|
event: Event10;
|
1011
1087
|
message: LoggingMessage;
|
1012
1088
|
}
|
1089
|
+
/**
|
1090
|
+
* Message written to Python log.
|
1091
|
+
*/
|
1013
1092
|
export interface LoggingMessage {
|
1014
1093
|
name: Name7;
|
1015
1094
|
level: Level;
|
@@ -1026,6 +1105,7 @@ export interface InfoEvent {
|
|
1026
1105
|
timestamp: Timestamp11;
|
1027
1106
|
pending: Pending11;
|
1028
1107
|
event: Event11;
|
1108
|
+
source: Source4;
|
1029
1109
|
data: JsonValue;
|
1030
1110
|
}
|
1031
1111
|
/**
|
@@ -1062,15 +1142,24 @@ export interface ModelUsage2 {
|
|
1062
1142
|
export interface Attachments {
|
1063
1143
|
[k: string]: string;
|
1064
1144
|
}
|
1145
|
+
/**
|
1146
|
+
* Limit encontered by sample.
|
1147
|
+
*/
|
1065
1148
|
export interface EvalSampleLimit {
|
1066
1149
|
type: Type13;
|
1067
1150
|
limit: Limit2;
|
1068
1151
|
}
|
1152
|
+
/**
|
1153
|
+
* Score reductions.
|
1154
|
+
*/
|
1069
1155
|
export interface EvalSampleReductions {
|
1070
1156
|
scorer: Scorer1;
|
1071
1157
|
reducer: Reducer1;
|
1072
1158
|
samples: Samples2;
|
1073
1159
|
}
|
1160
|
+
/**
|
1161
|
+
* Score and sample_id scored.
|
1162
|
+
*/
|
1074
1163
|
export interface EvalSampleScore {
|
1075
1164
|
value: Value2;
|
1076
1165
|
answer: Answer1;
|
@@ -1,4 +1,15 @@
|
|
1
1
|
declare var Prism: {
|
2
2
|
languages: any;
|
3
3
|
highlight(contents: any, tokens: any, type: any): string;
|
4
|
+
highlightElement(
|
5
|
+
element: HTMLElement,
|
6
|
+
async?: boolean,
|
7
|
+
callback?: (element: HTMLElement) => void,
|
8
|
+
);
|
9
|
+
|
10
|
+
highlightAllUnder(
|
11
|
+
element: HTMLElement,
|
12
|
+
async?: boolean,
|
13
|
+
callback?: (element: HTMLElement) => void,
|
14
|
+
);
|
4
15
|
};
|
@@ -0,0 +1,71 @@
|
|
1
|
+
import {
|
2
|
+
EvalLogHeader,
|
3
|
+
EvalSummary,
|
4
|
+
LogFiles,
|
5
|
+
SampleSummary,
|
6
|
+
} from "./api/types";
|
7
|
+
import { ContentImage, ContentText, EvalSample } from "./types/log";
|
8
|
+
|
9
|
+
export interface ApplicationState {
|
10
|
+
logs?: LogFiles;
|
11
|
+
selectedLogIndex?: number;
|
12
|
+
logHeaders?: Record<string, EvalLogHeader>;
|
13
|
+
headersLoading?: boolean;
|
14
|
+
selectedLog?: CurrentLog;
|
15
|
+
selectedWorkspaceTab?: string;
|
16
|
+
selectedSampleIndex?: number;
|
17
|
+
selectedSample?: EvalSample;
|
18
|
+
sampleStatus?: "loading" | "ok" | "error";
|
19
|
+
sampleError?: Error;
|
20
|
+
selectedSampleTab?: string;
|
21
|
+
sampleScrollPosition?: number;
|
22
|
+
showingSampleDialog?: boolean;
|
23
|
+
status?: AppStatus;
|
24
|
+
offcanvas?: boolean;
|
25
|
+
showFind?: boolean;
|
26
|
+
filter?: ScoreFilter;
|
27
|
+
epoch?: string;
|
28
|
+
sort?: string;
|
29
|
+
scores?: ScoreLabel[];
|
30
|
+
score?: ScoreLabel;
|
31
|
+
filteredSamples?: SampleSummary[];
|
32
|
+
groupBy?: "none" | "epoch" | "sample";
|
33
|
+
groupByOrder?: "asc" | "desc";
|
34
|
+
workspaceTabScrollPosition?: Record<string, number>;
|
35
|
+
}
|
36
|
+
|
37
|
+
export interface AppStatus {
|
38
|
+
loading: boolean;
|
39
|
+
error?: Error;
|
40
|
+
}
|
41
|
+
|
42
|
+
export interface Capabilities {
|
43
|
+
downloadFiles: boolean;
|
44
|
+
webWorkers: boolean;
|
45
|
+
}
|
46
|
+
|
47
|
+
export interface CurrentLog {
|
48
|
+
name: string;
|
49
|
+
contents: EvalSummary;
|
50
|
+
}
|
51
|
+
|
52
|
+
export interface Logs {
|
53
|
+
log_dir: string;
|
54
|
+
files: string[];
|
55
|
+
}
|
56
|
+
|
57
|
+
export interface ScoreLabel {
|
58
|
+
name: string;
|
59
|
+
scorer: string;
|
60
|
+
}
|
61
|
+
|
62
|
+
export interface ScoreFilter {
|
63
|
+
value?: string;
|
64
|
+
}
|
65
|
+
|
66
|
+
export type SampleMode = "none" | "single" | "many";
|
67
|
+
|
68
|
+
export interface ContentTool {
|
69
|
+
type: "tool";
|
70
|
+
content: (ContentImage | ContentText)[];
|
71
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
import { TokenHeader, TokenRow, TokenTable } from "./TokenTable";
|
2
|
+
|
3
|
+
interface ModelTokenTable {
|
4
|
+
model_usage: any;
|
5
|
+
className?: string | string[];
|
6
|
+
}
|
7
|
+
|
8
|
+
export const ModelTokenTable: React.FC<ModelTokenTable> = ({
|
9
|
+
model_usage,
|
10
|
+
className,
|
11
|
+
}) => {
|
12
|
+
return (
|
13
|
+
<TokenTable className={className}>
|
14
|
+
<TokenHeader />
|
15
|
+
<tbody>
|
16
|
+
{Object.keys(model_usage).map((key) => {
|
17
|
+
return (
|
18
|
+
<TokenRow
|
19
|
+
key={key}
|
20
|
+
model={`${key}-token-row`}
|
21
|
+
usage={model_usage[key]}
|
22
|
+
/>
|
23
|
+
);
|
24
|
+
})}
|
25
|
+
</tbody>
|
26
|
+
</TokenTable>
|
27
|
+
);
|
28
|
+
};
|
@@ -0,0 +1,24 @@
|
|
1
|
+
.wrapper {
|
2
|
+
display: grid;
|
3
|
+
grid-template-columns: 0 auto auto;
|
4
|
+
column-gap: 1.5em;
|
5
|
+
row-gap: 0.2em;
|
6
|
+
}
|
7
|
+
|
8
|
+
.col2 {
|
9
|
+
grid-column: 2;
|
10
|
+
}
|
11
|
+
|
12
|
+
.col1_3 {
|
13
|
+
grid-column: 1/3;
|
14
|
+
}
|
15
|
+
|
16
|
+
.col3 {
|
17
|
+
grid-column: 3;
|
18
|
+
}
|
19
|
+
|
20
|
+
.separator {
|
21
|
+
grid-column: -1/1;
|
22
|
+
height: 1px;
|
23
|
+
background-color: var(--bs-light-border-subtle);
|
24
|
+
}
|
@@ -0,0 +1,97 @@
|
|
1
|
+
import clsx from "clsx";
|
2
|
+
import { Fragment } from "react";
|
3
|
+
import { ModelUsage1 } from "../types/log";
|
4
|
+
import { formatNumber } from "../utils/format";
|
5
|
+
import styles from "./ModelUsagePanel.module.css";
|
6
|
+
|
7
|
+
interface ModelUsageProps {
|
8
|
+
usage: ModelUsage1;
|
9
|
+
}
|
10
|
+
|
11
|
+
interface ModelUsageRow {
|
12
|
+
label: string | "---";
|
13
|
+
value?: number;
|
14
|
+
secondary?: boolean;
|
15
|
+
bordered?: boolean;
|
16
|
+
}
|
17
|
+
|
18
|
+
/**
|
19
|
+
* Renders the ModelUsagePanel component.
|
20
|
+
*/
|
21
|
+
export const ModelUsagePanel: React.FC<ModelUsageProps> = ({ usage }) => {
|
22
|
+
if (!usage) {
|
23
|
+
return null;
|
24
|
+
}
|
25
|
+
|
26
|
+
const rows: ModelUsageRow[] = [
|
27
|
+
{
|
28
|
+
label: "input",
|
29
|
+
value: usage.input_tokens,
|
30
|
+
secondary: false,
|
31
|
+
},
|
32
|
+
];
|
33
|
+
|
34
|
+
if (usage.input_tokens_cache_read) {
|
35
|
+
rows.push({
|
36
|
+
label: "cache_read",
|
37
|
+
value: usage.input_tokens_cache_read,
|
38
|
+
secondary: true,
|
39
|
+
});
|
40
|
+
}
|
41
|
+
|
42
|
+
if (usage.input_tokens_cache_write) {
|
43
|
+
rows.push({
|
44
|
+
label: "cache_write",
|
45
|
+
value: usage.input_tokens_cache_write,
|
46
|
+
secondary: true,
|
47
|
+
});
|
48
|
+
}
|
49
|
+
|
50
|
+
rows.push({
|
51
|
+
label: "Output",
|
52
|
+
value: usage.output_tokens,
|
53
|
+
secondary: false,
|
54
|
+
bordered: true,
|
55
|
+
});
|
56
|
+
|
57
|
+
rows.push({
|
58
|
+
label: "---",
|
59
|
+
value: undefined,
|
60
|
+
secondary: false,
|
61
|
+
});
|
62
|
+
|
63
|
+
rows.push({
|
64
|
+
label: "Total",
|
65
|
+
value: usage.total_tokens,
|
66
|
+
secondary: false,
|
67
|
+
});
|
68
|
+
|
69
|
+
return (
|
70
|
+
<div className={clsx("text-size-small", styles.wrapper)}>
|
71
|
+
{rows.map((row, idx) => {
|
72
|
+
if (row.label === "---") {
|
73
|
+
return (
|
74
|
+
<div key={`$usage-sep-${idx}`} className={styles.separator}></div>
|
75
|
+
);
|
76
|
+
} else {
|
77
|
+
return (
|
78
|
+
<Fragment key={`$usage-row-${idx}`}>
|
79
|
+
<div
|
80
|
+
className={clsx(
|
81
|
+
"text-style-label",
|
82
|
+
"text-style-secondary",
|
83
|
+
row.secondary ? styles.col2 : styles.col1_3,
|
84
|
+
)}
|
85
|
+
>
|
86
|
+
{row.label}
|
87
|
+
</div>
|
88
|
+
<div className={styles.col3}>
|
89
|
+
{row.value ? formatNumber(row.value) : ""}
|
90
|
+
</div>
|
91
|
+
</Fragment>
|
92
|
+
);
|
93
|
+
}
|
94
|
+
})}
|
95
|
+
</div>
|
96
|
+
);
|
97
|
+
};
|