inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/cache.py +8 -7
- inspect_ai/_cli/common.py +0 -12
- inspect_ai/_cli/eval.py +32 -4
- inspect_ai/_cli/info.py +1 -0
- inspect_ai/_cli/list.py +1 -1
- inspect_ai/_cli/log.py +2 -0
- inspect_ai/_cli/main.py +1 -1
- inspect_ai/_cli/sandbox.py +4 -1
- inspect_ai/_cli/score.py +181 -32
- inspect_ai/_cli/trace.py +10 -0
- inspect_ai/_cli/view.py +4 -2
- inspect_ai/_display/core/active.py +2 -3
- inspect_ai/_display/core/config.py +7 -1
- inspect_ai/_display/textual/widgets/samples.py +4 -3
- inspect_ai/_display/textual/widgets/sandbox.py +6 -0
- inspect_ai/_eval/eval.py +104 -101
- inspect_ai/_eval/evalset.py +75 -75
- inspect_ai/_eval/loader.py +122 -12
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +14 -0
- inspect_ai/_eval/score.py +125 -36
- inspect_ai/_eval/task/log.py +105 -4
- inspect_ai/_eval/task/results.py +92 -38
- inspect_ai/_eval/task/run.py +9 -2
- inspect_ai/_eval/task/sandbox.py +35 -2
- inspect_ai/_eval/task/task.py +49 -46
- inspect_ai/_util/constants.py +1 -1
- inspect_ai/_util/content.py +8 -0
- inspect_ai/_util/error.py +2 -0
- inspect_ai/_util/file.py +15 -1
- inspect_ai/_util/hash.py +1 -1
- inspect_ai/_util/logger.py +4 -2
- inspect_ai/_util/registry.py +7 -1
- inspect_ai/_view/view.py +1 -2
- inspect_ai/_view/www/.vscode/extensions.json +3 -0
- inspect_ai/_view/www/.vscode/settings.json +8 -0
- inspect_ai/_view/www/App.css +97 -29
- inspect_ai/_view/www/README.md +1 -1
- inspect_ai/_view/www/dist/assets/index.css +16663 -14674
- inspect_ai/_view/www/dist/assets/index.js +58808 -51348
- inspect_ai/_view/www/dist/index.html +1 -1
- inspect_ai/_view/www/index.html +2 -2
- inspect_ai/_view/www/log-schema.json +87 -73
- inspect_ai/_view/www/package.json +22 -4
- inspect_ai/_view/www/postcss.config.cjs +8 -9
- inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
- inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
- inspect_ai/_view/www/src/api/api-browser.ts +2 -2
- inspect_ai/_view/www/src/api/api-http.ts +3 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
- inspect_ai/_view/www/src/api/client-api.ts +4 -4
- inspect_ai/_view/www/src/api/index.ts +4 -4
- inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
- inspect_ai/_view/www/src/appearance/colors.ts +9 -0
- inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
- inspect_ai/_view/www/src/appearance/icons.ts +100 -0
- inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
- inspect_ai/_view/www/src/components/Card.css +60 -0
- inspect_ai/_view/www/src/components/Card.tsx +109 -0
- inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
- inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
- inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
- inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
- inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
- inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
- inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
- inspect_ai/_view/www/src/components/FindBand.css +49 -0
- inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
- inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
- inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
- inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
- inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
- inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
- inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
- inspect_ai/_view/www/src/components/MessageBand.css +43 -0
- inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
- inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
- inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
- inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
- inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
- inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
- inspect_ai/_view/www/src/components/ToolButton.css +3 -0
- inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
- inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
- inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
- inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
- inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
- inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
- inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
- inspect_ai/_view/www/src/metadata/types.ts +18 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
- inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
- inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
- inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
- inspect_ai/_view/www/src/samples/error/error.ts +15 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
- inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
- inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
- inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
- inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
- inspect_ai/_view/www/src/types/log.d.ts +108 -19
- inspect_ai/_view/www/src/types/prism.d.ts +11 -0
- inspect_ai/_view/www/src/types.ts +71 -0
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
- inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
- inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
- inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
- inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
- inspect_ai/_view/www/src/utils/attachments.ts +42 -0
- inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
- inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
- inspect_ai/_view/www/src/utils/debugging.ts +28 -0
- inspect_ai/_view/www/src/utils/dom.ts +30 -0
- inspect_ai/_view/www/src/utils/format.ts +194 -0
- inspect_ai/_view/www/src/utils/git.ts +7 -0
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/_view/www/src/utils/http.ts +14 -0
- inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
- inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
- inspect_ai/_view/www/src/utils/queue.ts +51 -0
- inspect_ai/_view/www/src/utils/sync.ts +114 -0
- inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
- inspect_ai/_view/www/src/utils/vscode.ts +13 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
- inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
- inspect_ai/_view/www/src/workspace/types.ts +10 -0
- inspect_ai/_view/www/src/workspace/utils.ts +34 -0
- inspect_ai/_view/www/tsconfig.json +23 -9
- inspect_ai/_view/www/vite.config.js +8 -17
- inspect_ai/_view/www/yarn.lock +627 -556
- inspect_ai/approval/_approval.py +2 -0
- inspect_ai/approval/_approver.py +4 -4
- inspect_ai/approval/_auto.py +1 -1
- inspect_ai/approval/_human/approver.py +3 -0
- inspect_ai/approval/_policy.py +5 -0
- inspect_ai/approval/_registry.py +2 -2
- inspect_ai/dataset/_dataset.py +64 -37
- inspect_ai/dataset/_sources/__init__.py +0 -0
- inspect_ai/dataset/_sources/csv.py +20 -12
- inspect_ai/dataset/_sources/file.py +4 -0
- inspect_ai/dataset/_sources/hf.py +39 -29
- inspect_ai/dataset/_sources/json.py +17 -9
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_convert.py +3 -3
- inspect_ai/log/_file.py +24 -9
- inspect_ai/log/_log.py +101 -13
- inspect_ai/log/_message.py +4 -2
- inspect_ai/log/_recorders/file.py +4 -0
- inspect_ai/log/_recorders/json.py +5 -7
- inspect_ai/log/_recorders/recorder.py +3 -0
- inspect_ai/log/_transcript.py +19 -8
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_cache.py +39 -21
- inspect_ai/model/_call_tools.py +4 -3
- inspect_ai/model/_chat_message.py +14 -4
- inspect_ai/model/_generate_config.py +1 -1
- inspect_ai/model/_model.py +31 -24
- inspect_ai/model/_model_output.py +14 -1
- inspect_ai/model/_openai.py +10 -18
- inspect_ai/model/_providers/anthropic.py +3 -3
- inspect_ai/model/_providers/google.py +9 -5
- inspect_ai/model/_providers/openai.py +5 -9
- inspect_ai/model/_providers/openai_o1.py +3 -5
- inspect_ai/model/_providers/openrouter.py +86 -0
- inspect_ai/model/_providers/providers.py +11 -0
- inspect_ai/scorer/__init__.py +6 -1
- inspect_ai/scorer/_answer.py +7 -7
- inspect_ai/scorer/_classification.py +38 -18
- inspect_ai/scorer/_common.py +2 -8
- inspect_ai/scorer/_match.py +4 -5
- inspect_ai/scorer/_metric.py +87 -28
- inspect_ai/scorer/_metrics/__init__.py +3 -3
- inspect_ai/scorer/_metrics/accuracy.py +8 -10
- inspect_ai/scorer/_metrics/mean.py +3 -17
- inspect_ai/scorer/_metrics/std.py +111 -30
- inspect_ai/scorer/_model.py +12 -12
- inspect_ai/scorer/_pattern.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +36 -21
- inspect_ai/scorer/_reducer/registry.py +2 -2
- inspect_ai/scorer/_reducer/types.py +7 -1
- inspect_ai/scorer/_score.py +11 -1
- inspect_ai/scorer/_scorer.py +110 -16
- inspect_ai/solver/__init__.py +1 -1
- inspect_ai/solver/_basic_agent.py +19 -22
- inspect_ai/solver/_bridge/__init__.py +0 -3
- inspect_ai/solver/_bridge/bridge.py +3 -3
- inspect_ai/solver/_chain.py +1 -2
- inspect_ai/solver/_critique.py +3 -3
- inspect_ai/solver/_fork.py +2 -2
- inspect_ai/solver/_human_agent/__init__.py +0 -0
- inspect_ai/solver/_human_agent/agent.py +5 -8
- inspect_ai/solver/_human_agent/commands/clock.py +14 -10
- inspect_ai/solver/_human_agent/commands/note.py +1 -1
- inspect_ai/solver/_human_agent/commands/score.py +0 -11
- inspect_ai/solver/_multiple_choice.py +38 -26
- inspect_ai/solver/_prompt.py +7 -7
- inspect_ai/solver/_solver.py +53 -52
- inspect_ai/solver/_task_state.py +80 -69
- inspect_ai/solver/_use_tools.py +9 -9
- inspect_ai/tool/__init__.py +4 -1
- inspect_ai/tool/_tool.py +43 -14
- inspect_ai/tool/_tool_call.py +6 -2
- inspect_ai/tool/_tool_choice.py +3 -1
- inspect_ai/tool/_tool_def.py +10 -8
- inspect_ai/tool/_tool_params.py +24 -0
- inspect_ai/tool/_tool_with.py +7 -7
- inspect_ai/tool/_tools/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
- inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
- inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_execute.py +23 -11
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
- inspect_ai/tool/_tools/_web_search.py +7 -5
- inspect_ai/tool/beta.py +3 -0
- inspect_ai/util/_concurrency.py +3 -3
- inspect_ai/util/_panel.py +2 -0
- inspect_ai/util/_resource.py +12 -12
- inspect_ai/util/_sandbox/docker/compose.py +23 -20
- inspect_ai/util/_sandbox/docker/config.py +2 -1
- inspect_ai/util/_sandbox/docker/docker.py +42 -86
- inspect_ai/util/_sandbox/docker/service.py +100 -0
- inspect_ai/util/_sandbox/environment.py +99 -96
- inspect_ai/util/_sandbox/self_check.py +124 -16
- inspect_ai/util/_subprocess.py +5 -3
- inspect_ai/util/_subtask.py +15 -16
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
- inspect_ai-0.3.64.dist-info/RECORD +625 -0
- inspect_ai/_view/www/src/Register.mjs +0 -3
- inspect_ai/_view/www/src/Types.mjs +0 -38
- inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
- inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
- inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
- inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
- inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
- inspect_ai/_view/www/src/components/Card.mjs +0 -126
- inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
- inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
- inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
- inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
- inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
- inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
- inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
- inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
- inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
- inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
- inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
- inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
- inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
- inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
- inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
- inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
- inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
- inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
- inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
- inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
- inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
- inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
- inspect_ai/_view/www/src/components/Tools.mjs +0 -376
- inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
- inspect_ai/_view/www/src/components/ansi-output.js +0 -932
- inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
- inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
- inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
- inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
- inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
- inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
- inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
- inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
- inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
- inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
- inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
- inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
- inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
- inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
- inspect_ai/_view/www/src/utils/Format.mjs +0 -260
- inspect_ai/_view/www/src/utils/Git.mjs +0 -12
- inspect_ai/_view/www/src/utils/Html.mjs +0 -21
- inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
- inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
- inspect_ai/_view/www/src/utils/http.mjs +0 -18
- inspect_ai/_view/www/src/utils/queue.mjs +0 -67
- inspect_ai/_view/www/src/utils/sync.mjs +0 -101
- inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
- inspect_ai/tool/beta/__init__.py +0 -5
- inspect_ai-0.3.62.dist-info/RECORD +0 -481
- /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
- /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
- /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,31 @@
|
|
1
1
|
import { compileExpression } from "filtrex";
|
2
|
-
import {
|
3
|
-
import {
|
2
|
+
import { SampleSummary } from "../../api/types";
|
3
|
+
import { kScoreTypeBoolean } from "../../constants";
|
4
|
+
import { ScoreLabel } from "../../types";
|
5
|
+
import { Scores1 } from "../../types/log";
|
6
|
+
import { inputString } from "../../utils/format";
|
7
|
+
import { EvalDescriptor, ScoreDescriptor } from "../descriptor/types";
|
4
8
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
export interface FilterError {
|
10
|
+
from: number;
|
11
|
+
to: number;
|
12
|
+
message: string;
|
13
|
+
severity: "warning" | "error";
|
14
|
+
}
|
15
|
+
|
16
|
+
export interface ScoreFilterItem {
|
17
|
+
shortName?: string;
|
18
|
+
qualifiedName?: string;
|
19
|
+
canonicalName: string;
|
20
|
+
tooltip?: string;
|
21
|
+
categories: string[];
|
22
|
+
scoreType: string;
|
23
|
+
}
|
12
24
|
|
13
25
|
/**
|
14
26
|
* Coerces a value to the type expected by the score.
|
15
|
-
*
|
16
|
-
* @param {any} value
|
17
|
-
* @param {import("../../samples/SamplesDescriptor.mjs").ScoreDescriptor} descriptor
|
18
|
-
* @returns {any}
|
19
27
|
*/
|
20
|
-
const coerceValue = (value, descriptor) => {
|
28
|
+
const coerceValue = (value: unknown, descriptor: ScoreDescriptor): unknown => {
|
21
29
|
if (descriptor && descriptor.scoreType === kScoreTypeBoolean) {
|
22
30
|
return Boolean(value);
|
23
31
|
} else {
|
@@ -25,24 +33,18 @@ const coerceValue = (value, descriptor) => {
|
|
25
33
|
}
|
26
34
|
};
|
27
35
|
|
28
|
-
|
29
|
-
|
30
|
-
* @returns {boolean}
|
31
|
-
*/
|
32
|
-
const isFilteringSupportedForValue = (value) =>
|
36
|
+
// Whether a particular value is filter-able
|
37
|
+
const isFilteringSupportedForValue = (value: unknown): boolean =>
|
33
38
|
["string", "number", "boolean"].includes(typeof value);
|
34
39
|
|
35
40
|
/**
|
36
41
|
* Returns the names of scores that are not allowed to be used as short names in
|
37
42
|
* filter expressions because they are not unique. This should be applied only to
|
38
43
|
* the nested scores, not to the top-level scorer names.
|
39
|
-
*
|
40
|
-
* @param {import("../../Types.mjs").ScoreLabel[]} scores
|
41
|
-
* @returns {Set<string>}
|
42
44
|
*/
|
43
|
-
const bannedShortScoreNames = (scores) => {
|
44
|
-
const used = new Set();
|
45
|
-
const banned = new Set();
|
45
|
+
const bannedShortScoreNames = (scores: ScoreLabel[]): Set<string> => {
|
46
|
+
const used: Set<string> = new Set();
|
47
|
+
const banned: Set<string> = new Set();
|
46
48
|
for (const { scorer, name } of scores) {
|
47
49
|
banned.add(scorer);
|
48
50
|
if (used.has(name)) {
|
@@ -60,19 +62,22 @@ const bannedShortScoreNames = (scores) => {
|
|
60
62
|
* Child metrics are accessed using dot notation (e.g. `scorer_name.score_name`) or
|
61
63
|
* directly by name when it is unique.
|
62
64
|
*
|
63
|
-
* @param {import("../../samples/
|
65
|
+
* @param {import("../../samples/descriptor/samplesDescriptor").EvalDescriptor} evalDescriptor
|
64
66
|
* @param {import("../../types/log").Scores1} sampleScores
|
65
67
|
* @returns {Object<string, any>}
|
66
68
|
*/
|
67
|
-
const scoreVariables = (
|
69
|
+
const scoreVariables = (
|
70
|
+
evalDescriptor: EvalDescriptor,
|
71
|
+
sampleScores: Scores1,
|
72
|
+
) => {
|
68
73
|
const bannedShortNames = bannedShortScoreNames(evalDescriptor.scores);
|
69
|
-
const variables = {};
|
74
|
+
const variables: Record<string, unknown> = {};
|
70
75
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
+
const addScore = (
|
77
|
+
variableName: string,
|
78
|
+
scoreLabel: ScoreLabel,
|
79
|
+
value: unknown,
|
80
|
+
) => {
|
76
81
|
const coercedValue = coerceValue(
|
77
82
|
value,
|
78
83
|
evalDescriptor.scoreDescriptor(scoreLabel),
|
@@ -82,7 +87,7 @@ const scoreVariables = (evalDescriptor, sampleScores) => {
|
|
82
87
|
}
|
83
88
|
};
|
84
89
|
|
85
|
-
for (const [scorer, score] of Object.entries(sampleScores)) {
|
90
|
+
for (const [scorer, score] of Object.entries(sampleScores || {})) {
|
86
91
|
addScore(scorer, { scorer, name: scorer }, score.value);
|
87
92
|
if (typeof score.value === "object") {
|
88
93
|
for (const [name, value] of Object.entries(score.value)) {
|
@@ -96,39 +101,34 @@ const scoreVariables = (evalDescriptor, sampleScores) => {
|
|
96
101
|
return variables;
|
97
102
|
};
|
98
103
|
|
99
|
-
/**
|
100
|
-
* @typedef {Object} ScoreFilterItem
|
101
|
-
* @property {string | undefined} shortName - The short name of the score, if doesn't conflict with other short names.
|
102
|
-
* @property {string | undefined} qualifiedName - The `scorer.score` name for children of complex scorers.
|
103
|
-
* @property {string} canonicalName - The canonical name: either `shortName` or `qualifiedName` (at least one must exist).
|
104
|
-
* @property {string} tooltip - The informational tooltip for the score.
|
105
|
-
* @property {string[]} categories - Category values for categorical scores.
|
106
|
-
* @property {string} scoreType - The type of the score (e.g., 'numeric', 'categorical', 'boolean').
|
107
|
-
*/
|
108
|
-
|
109
104
|
/**
|
110
105
|
* Generates a dictionary of variables that can be used in the filter expression.
|
111
106
|
* High-level scorer metrics can be accessed by name directly.
|
112
107
|
* Child metrics are accessed using dot notation (e.g. `scorer_name.score_name`) or
|
113
108
|
* directly by name when it is unique.
|
114
|
-
*
|
115
|
-
* @param {import("../../samples/SamplesDescriptor.mjs").EvalDescriptor} evalDescriptor
|
116
|
-
* @returns {ScoreFilterItem[]}
|
117
109
|
*/
|
118
|
-
export const scoreFilterItems = (
|
119
|
-
|
120
|
-
|
110
|
+
export const scoreFilterItems = (
|
111
|
+
evalDescriptor: EvalDescriptor,
|
112
|
+
): ScoreFilterItem[] => {
|
113
|
+
const items: ScoreFilterItem[] = [];
|
121
114
|
const bannedShortNames = bannedShortScoreNames(evalDescriptor.scores);
|
122
|
-
const valueToString = (value) =>
|
115
|
+
const valueToString = (value: unknown) =>
|
123
116
|
typeof value === "string" ? `"${value}"` : String(value);
|
124
117
|
|
125
118
|
/**
|
126
119
|
* @param {string | undefined} shortName
|
127
120
|
* @param {string | undefined} qualifiedName
|
128
|
-
* @param {import("../../
|
121
|
+
* @param {import("../../types").ScoreLabel} scoreLabel
|
129
122
|
*/
|
130
|
-
const addScore = (
|
123
|
+
const addScore = (
|
124
|
+
scoreLabel: ScoreLabel,
|
125
|
+
shortName?: string,
|
126
|
+
qualifiedName?: string,
|
127
|
+
) => {
|
131
128
|
const canonicalName = shortName || qualifiedName;
|
129
|
+
if (!canonicalName) {
|
130
|
+
throw new Error("Unable to create a canonical name for a score");
|
131
|
+
}
|
132
132
|
const descriptor = evalDescriptor.scoreDescriptor(scoreLabel);
|
133
133
|
const scoreType = descriptor?.scoreType;
|
134
134
|
if (!descriptor) {
|
@@ -143,17 +143,20 @@ export const scoreFilterItems = (evalDescriptor) => {
|
|
143
143
|
return;
|
144
144
|
}
|
145
145
|
var tooltip = `${canonicalName}: ${descriptor.scoreType}`;
|
146
|
-
var categories = [];
|
146
|
+
var categories: string[] = [];
|
147
147
|
if (descriptor.min !== undefined || descriptor.max !== undefined) {
|
148
|
-
const rounded = (num) => {
|
148
|
+
const rounded = (num: number) => {
|
149
149
|
// Additional round-trip to remove trailing zeros.
|
150
150
|
return parseFloat(num.toPrecision(3)).toString();
|
151
151
|
};
|
152
|
-
tooltip += `\nrange: ${rounded(descriptor.min)} to ${rounded(descriptor.max)}`;
|
152
|
+
tooltip += `\nrange: ${rounded(descriptor.min || 0)} to ${rounded(descriptor.max || 0)}`;
|
153
153
|
}
|
154
154
|
if (descriptor.categories) {
|
155
|
-
|
156
|
-
|
155
|
+
categories = descriptor.categories.map((cat) => {
|
156
|
+
const val = (cat as Record<string, unknown>).val;
|
157
|
+
return valueToString(val);
|
158
|
+
});
|
159
|
+
tooltip += `\ncategories: ${categories.join(" ")}`;
|
157
160
|
}
|
158
161
|
items.push({
|
159
162
|
shortName,
|
@@ -170,29 +173,24 @@ export const scoreFilterItems = (evalDescriptor) => {
|
|
170
173
|
const hasQualifiedName = name !== scorer;
|
171
174
|
const shortName = hasShortName ? name : undefined;
|
172
175
|
const qualifiedName = hasQualifiedName ? `${scorer}.${name}` : undefined;
|
173
|
-
addScore(
|
176
|
+
addScore({ name, scorer }, shortName, qualifiedName);
|
174
177
|
}
|
175
178
|
return items;
|
176
179
|
};
|
177
180
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
* @returns {{matches: boolean, error: FilterError | undefined}}
|
185
|
-
*/
|
186
|
-
export const filterExpression = (evalDescriptor, sample, filterValue) => {
|
181
|
+
// TODO: Add case-insensitive string comparison.
|
182
|
+
export const filterExpression = (
|
183
|
+
evalDescriptor: EvalDescriptor,
|
184
|
+
sample: SampleSummary,
|
185
|
+
filterValue: string,
|
186
|
+
) => {
|
187
187
|
try {
|
188
|
-
|
189
|
-
const inputContains = (regex) => {
|
188
|
+
const inputContains = (regex: string): boolean => {
|
190
189
|
return inputString(sample.input).some((msg) =>
|
191
190
|
msg.match(new RegExp(regex, "i")),
|
192
191
|
);
|
193
192
|
};
|
194
|
-
|
195
|
-
const targetContains = (regex) => {
|
193
|
+
const targetContains = (regex: string): boolean => {
|
196
194
|
let targets = Array.isArray(sample.target)
|
197
195
|
? sample.target
|
198
196
|
: [sample.target];
|
@@ -217,7 +215,8 @@ export const filterExpression = (evalDescriptor, sample, filterValue) => {
|
|
217
215
|
}
|
218
216
|
} catch (error) {
|
219
217
|
if (error instanceof ReferenceError) {
|
220
|
-
const
|
218
|
+
const errorObj = error as any as Record<string, unknown>;
|
219
|
+
const propertyName: string = (errorObj["propertyName"] as string) || "";
|
221
220
|
if (propertyName) {
|
222
221
|
const regex = new RegExp(`\\b${propertyName}\\b`);
|
223
222
|
const match = regex.exec(filterValue);
|
@@ -234,14 +233,16 @@ export const filterExpression = (evalDescriptor, sample, filterValue) => {
|
|
234
233
|
}
|
235
234
|
}
|
236
235
|
}
|
236
|
+
|
237
|
+
const message = error instanceof Error ? error.message : "";
|
237
238
|
if (
|
238
|
-
|
239
|
-
|
239
|
+
message.startsWith("Parse error") ||
|
240
|
+
message.startsWith("Lexical error")
|
240
241
|
) {
|
241
242
|
// Filterex uses formatting like this:
|
242
243
|
// foo and
|
243
244
|
// ----^
|
244
|
-
const from =
|
245
|
+
const from = message.match(/^(-*)\^$/m)?.[1]?.length;
|
245
246
|
return {
|
246
247
|
matches: false,
|
247
248
|
error: {
|
@@ -255,7 +256,7 @@ export const filterExpression = (evalDescriptor, sample, filterValue) => {
|
|
255
256
|
return {
|
256
257
|
matches: false,
|
257
258
|
error: {
|
258
|
-
message:
|
259
|
+
message: message,
|
259
260
|
severity: "error",
|
260
261
|
},
|
261
262
|
};
|
@@ -263,12 +264,16 @@ export const filterExpression = (evalDescriptor, sample, filterValue) => {
|
|
263
264
|
};
|
264
265
|
|
265
266
|
/**
|
266
|
-
* @param {import("../../samples/
|
267
|
-
* @param {import("../../api/
|
267
|
+
* @param {import("../../samples/descriptor/samplesDescriptor").EvalDescriptor} evalDescriptor
|
268
|
+
* @param {import("../../api/types").SampleSummary[]} samples
|
268
269
|
* @param {string} filterValue
|
269
|
-
* @returns {
|
270
|
+
* @returns {}
|
270
271
|
*/
|
271
|
-
export const filterSamples = (
|
272
|
+
export const filterSamples = (
|
273
|
+
evalDescriptor: EvalDescriptor,
|
274
|
+
samples: SampleSummary[],
|
275
|
+
filterValue: string,
|
276
|
+
): { result: SampleSummary[]; error: FilterError | undefined } => {
|
272
277
|
var error = undefined;
|
273
278
|
const result = samples.filter((sample) => {
|
274
279
|
if (filterValue) {
|
@@ -0,0 +1,288 @@
|
|
1
|
+
import { autocompletion, startCompletion } from "@codemirror/autocomplete";
|
2
|
+
import {
|
3
|
+
bracketMatching,
|
4
|
+
HighlightStyle,
|
5
|
+
syntaxHighlighting,
|
6
|
+
} from "@codemirror/language";
|
7
|
+
import { Diagnostic, linter } from "@codemirror/lint";
|
8
|
+
import {
|
9
|
+
Compartment,
|
10
|
+
EditorState,
|
11
|
+
Transaction,
|
12
|
+
TransactionSpec,
|
13
|
+
} from "@codemirror/state";
|
14
|
+
import { tags } from "@lezer/highlight";
|
15
|
+
import clsx from "clsx";
|
16
|
+
import { EditorView, minimalSetup } from "codemirror";
|
17
|
+
import { useEffect, useMemo, useRef, useState } from "react";
|
18
|
+
|
19
|
+
import { ScoreFilter } from "../../../types";
|
20
|
+
import { EvalDescriptor } from "../../descriptor/types";
|
21
|
+
import { FilterError, filterSamples, scoreFilterItems } from "../filters";
|
22
|
+
import { getCompletions } from "./completions";
|
23
|
+
import styles from "./SampleFilter.module.css";
|
24
|
+
import { language } from "./tokenize";
|
25
|
+
|
26
|
+
// Types
|
27
|
+
interface FilteringResult {
|
28
|
+
numSamples: number;
|
29
|
+
error?: FilterError;
|
30
|
+
}
|
31
|
+
|
32
|
+
interface SampleFilterProps {
|
33
|
+
evalDescriptor: EvalDescriptor;
|
34
|
+
scoreFilter: ScoreFilter;
|
35
|
+
setScoreFilter: (filter: ScoreFilter) => void;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Constants
|
39
|
+
const FILTER_TOOLTIP = `
|
40
|
+
Filter samples by:
|
41
|
+
• Scores
|
42
|
+
• Input and target regex search: input_contains, target_contains
|
43
|
+
|
44
|
+
Supported expressions:
|
45
|
+
• Arithmetic: +, -, *, /, mod, ^
|
46
|
+
• Comparison: <, <=, >, >=, ==, !=, including chain comparisons, e.g. "10 <= x < 20"
|
47
|
+
• Boolean: and, or, not
|
48
|
+
• Regex matching: ~= (case-sensitive)
|
49
|
+
• Set operations: in, not in; e.g. "x in (2, 3, 5)"
|
50
|
+
• Functions: min, max, abs, round, floor, ceil, sqrt, log, log2, log10
|
51
|
+
`.trim();
|
52
|
+
|
53
|
+
// Styles
|
54
|
+
const highlightStyle = HighlightStyle.define([
|
55
|
+
{ tag: tags.string, class: "token string" },
|
56
|
+
{ tag: tags.number, class: "token number" },
|
57
|
+
{ tag: tags.keyword, class: "token keyword" },
|
58
|
+
]);
|
59
|
+
|
60
|
+
const editorTheme = EditorView.theme({
|
61
|
+
"&": {
|
62
|
+
fontSize: "inherit",
|
63
|
+
color: "var(--inspect-input-foreground)",
|
64
|
+
backgroundColor: "var(--inspect-input-background)",
|
65
|
+
border: "1px solid var(--inspect-input-border)",
|
66
|
+
borderRadius: "var(--bs-border-radius)",
|
67
|
+
},
|
68
|
+
".cm-cursor.cm-cursor-primary": {
|
69
|
+
borderLeftColor: "var(--bs-body-color)",
|
70
|
+
},
|
71
|
+
".cm-selectionBackground": {
|
72
|
+
backgroundColor: "var(--inspect-inactive-selection-background)",
|
73
|
+
},
|
74
|
+
"&.cm-focused > .cm-scroller > .cm-selectionLayer > .cm-selectionBackground":
|
75
|
+
{
|
76
|
+
backgroundColor: "var(--inspect-active-selection-background)",
|
77
|
+
},
|
78
|
+
"&.cm-focused": {
|
79
|
+
outline: "none",
|
80
|
+
borderColor: "var(--inspect-focus-border-color)",
|
81
|
+
boxShadow: "var(--inspect-focus-border-shadow)",
|
82
|
+
},
|
83
|
+
".filter-pending > &.cm-focused": {
|
84
|
+
borderColor: "var(--inspect-focus-border-gray-color)",
|
85
|
+
boxShadow: "var(--inspect-focus-border-gray-shadow)",
|
86
|
+
},
|
87
|
+
".cm-tooltip": {
|
88
|
+
backgroundColor: "var(--bs-light)",
|
89
|
+
border: "1px solid var(--bs-border-color)",
|
90
|
+
color: "var(--bs-body-color)",
|
91
|
+
},
|
92
|
+
".cm-tooltip.cm-tooltip-autocomplete > ul > li": {
|
93
|
+
color: "var(--bs-body-color)",
|
94
|
+
},
|
95
|
+
".cm-tooltip.cm-tooltip-autocomplete > ul > li[aria-selected]": {
|
96
|
+
backgroundColor: "var(--inspect-active-selection-background)",
|
97
|
+
color: "var(--bs-body-color)",
|
98
|
+
},
|
99
|
+
".cm-scroller": {
|
100
|
+
overflow: "hidden",
|
101
|
+
},
|
102
|
+
});
|
103
|
+
|
104
|
+
// Helper functions
|
105
|
+
const getFilteringResult = (
|
106
|
+
evalDescriptor: EvalDescriptor,
|
107
|
+
filterValue: string,
|
108
|
+
): FilteringResult => {
|
109
|
+
const { result, error } = filterSamples(
|
110
|
+
evalDescriptor,
|
111
|
+
evalDescriptor.samples,
|
112
|
+
filterValue,
|
113
|
+
);
|
114
|
+
return { numSamples: result.length, error };
|
115
|
+
};
|
116
|
+
|
117
|
+
const ensureOneLine = (tr: Transaction): TransactionSpec => {
|
118
|
+
const newDoc = tr.newDoc.toString();
|
119
|
+
if (!newDoc.includes("\n")) return tr;
|
120
|
+
|
121
|
+
if (tr.isUserEvent("input.paste")) {
|
122
|
+
return {
|
123
|
+
changes: {
|
124
|
+
from: 0,
|
125
|
+
to: tr.startState.doc.length,
|
126
|
+
insert: newDoc.replace(/\n/g, " ").trim(),
|
127
|
+
},
|
128
|
+
};
|
129
|
+
}
|
130
|
+
return {};
|
131
|
+
};
|
132
|
+
|
133
|
+
const getLints = (
|
134
|
+
view: EditorView,
|
135
|
+
filterError?: FilterError,
|
136
|
+
): Diagnostic[] => {
|
137
|
+
if (!filterError) return [];
|
138
|
+
return [
|
139
|
+
{
|
140
|
+
from: filterError.from || 0,
|
141
|
+
to: filterError.to || view.state.doc.length,
|
142
|
+
severity: filterError.severity,
|
143
|
+
message: filterError.message,
|
144
|
+
},
|
145
|
+
];
|
146
|
+
};
|
147
|
+
|
148
|
+
// Main component
|
149
|
+
export const SampleFilter: React.FC<SampleFilterProps> = ({
|
150
|
+
evalDescriptor,
|
151
|
+
scoreFilter,
|
152
|
+
setScoreFilter,
|
153
|
+
}) => {
|
154
|
+
const editorRef = useRef<HTMLDivElement>(null);
|
155
|
+
const editorViewRef = useRef<EditorView>(null);
|
156
|
+
const linterCompartment = useRef<Compartment>(new Compartment());
|
157
|
+
const autocompletionCompartment = useRef<Compartment>(new Compartment());
|
158
|
+
const updateListenerCompartment = useRef<Compartment>(new Compartment());
|
159
|
+
|
160
|
+
const filterItems = useMemo(
|
161
|
+
() => scoreFilterItems(evalDescriptor),
|
162
|
+
[evalDescriptor],
|
163
|
+
);
|
164
|
+
|
165
|
+
const [filteringResultInstant, setFilteringResultInstant] =
|
166
|
+
useState<FilteringResult | null>(null);
|
167
|
+
|
168
|
+
const handleFocus = (event: FocusEvent, view: EditorView) => {
|
169
|
+
if (event.isTrusted && view.state.doc.toString() === "") {
|
170
|
+
setTimeout(() => startCompletion(view), 0);
|
171
|
+
}
|
172
|
+
};
|
173
|
+
|
174
|
+
const makeAutocompletion = () =>
|
175
|
+
autocompletion({
|
176
|
+
override: [(context) => getCompletions(context, filterItems)],
|
177
|
+
activateOnCompletion: (c) => c.label.endsWith(" "),
|
178
|
+
});
|
179
|
+
|
180
|
+
const makeLinter = () =>
|
181
|
+
linter((view) => getLints(view, filteringResultInstant?.error));
|
182
|
+
|
183
|
+
const makeUpdateListener = () =>
|
184
|
+
EditorView.updateListener.of((update) => {
|
185
|
+
if (update.docChanged) {
|
186
|
+
const newValue = update.state.doc.toString();
|
187
|
+
const filteringResult = getFilteringResult(evalDescriptor, newValue);
|
188
|
+
if (!filteringResult.error) {
|
189
|
+
setScoreFilter({ value: newValue });
|
190
|
+
}
|
191
|
+
setFilteringResultInstant(filteringResult);
|
192
|
+
}
|
193
|
+
});
|
194
|
+
|
195
|
+
// Initialize editor
|
196
|
+
useEffect(() => {
|
197
|
+
editorViewRef.current?.destroy();
|
198
|
+
|
199
|
+
editorViewRef.current = new EditorView({
|
200
|
+
parent: editorRef.current ?? undefined,
|
201
|
+
state: EditorState.create({
|
202
|
+
doc: scoreFilter.value || "",
|
203
|
+
extensions: [
|
204
|
+
minimalSetup,
|
205
|
+
bracketMatching(),
|
206
|
+
editorTheme,
|
207
|
+
EditorState.transactionFilter.of(ensureOneLine),
|
208
|
+
updateListenerCompartment.current.of(makeUpdateListener()),
|
209
|
+
EditorView.domEventHandlers({ focus: handleFocus }),
|
210
|
+
language,
|
211
|
+
syntaxHighlighting(highlightStyle),
|
212
|
+
autocompletionCompartment.current.of(makeAutocompletion()),
|
213
|
+
linterCompartment.current.of(makeLinter()),
|
214
|
+
],
|
215
|
+
}),
|
216
|
+
});
|
217
|
+
|
218
|
+
return () => editorViewRef.current?.destroy();
|
219
|
+
}, []);
|
220
|
+
|
221
|
+
// Handle filter value changes
|
222
|
+
useEffect(() => {
|
223
|
+
if (!editorViewRef.current) return;
|
224
|
+
|
225
|
+
const currentValue = editorViewRef.current.state.doc.toString();
|
226
|
+
if (scoreFilter.value === currentValue) return;
|
227
|
+
|
228
|
+
setFilteringResultInstant(
|
229
|
+
getFilteringResult(evalDescriptor, scoreFilter.value || ""),
|
230
|
+
);
|
231
|
+
editorViewRef.current.dispatch({
|
232
|
+
changes: {
|
233
|
+
from: 0,
|
234
|
+
to: currentValue.length,
|
235
|
+
insert: scoreFilter.value || "",
|
236
|
+
},
|
237
|
+
});
|
238
|
+
}, [evalDescriptor, scoreFilter.value]);
|
239
|
+
|
240
|
+
// Update compartments when dependencies change
|
241
|
+
useEffect(() => {
|
242
|
+
editorViewRef.current?.dispatch({
|
243
|
+
effects:
|
244
|
+
updateListenerCompartment.current.reconfigure(makeUpdateListener()),
|
245
|
+
});
|
246
|
+
}, [evalDescriptor]);
|
247
|
+
|
248
|
+
useEffect(() => {
|
249
|
+
editorViewRef.current?.dispatch({
|
250
|
+
effects:
|
251
|
+
autocompletionCompartment.current.reconfigure(makeAutocompletion()),
|
252
|
+
});
|
253
|
+
}, [filterItems]);
|
254
|
+
|
255
|
+
useEffect(() => {
|
256
|
+
editorViewRef.current?.dispatch({
|
257
|
+
effects: linterCompartment.current.reconfigure(makeLinter()),
|
258
|
+
});
|
259
|
+
}, [filteringResultInstant?.error]);
|
260
|
+
|
261
|
+
return (
|
262
|
+
<div style={{ display: "flex" }}>
|
263
|
+
<span
|
264
|
+
className={clsx(
|
265
|
+
"sample-filter-label",
|
266
|
+
"text-size-smaller",
|
267
|
+
"text-style-label",
|
268
|
+
"text-style-secondary",
|
269
|
+
styles.label,
|
270
|
+
)}
|
271
|
+
>
|
272
|
+
Filter:
|
273
|
+
</span>
|
274
|
+
<div
|
275
|
+
ref={editorRef}
|
276
|
+
className={clsx(
|
277
|
+
filteringResultInstant?.error && "filter-pending",
|
278
|
+
styles.input,
|
279
|
+
)}
|
280
|
+
/>
|
281
|
+
<span
|
282
|
+
className={clsx("bi", "bi-question-circle", styles.help)}
|
283
|
+
data-tooltip={FILTER_TOOLTIP}
|
284
|
+
data-tooltip-position="bottom-left"
|
285
|
+
/>
|
286
|
+
</div>
|
287
|
+
);
|
288
|
+
};
|