inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/cache.py +8 -7
- inspect_ai/_cli/common.py +0 -12
- inspect_ai/_cli/eval.py +32 -4
- inspect_ai/_cli/info.py +1 -0
- inspect_ai/_cli/list.py +1 -1
- inspect_ai/_cli/log.py +2 -0
- inspect_ai/_cli/main.py +1 -1
- inspect_ai/_cli/sandbox.py +4 -1
- inspect_ai/_cli/score.py +181 -32
- inspect_ai/_cli/trace.py +10 -0
- inspect_ai/_cli/view.py +4 -2
- inspect_ai/_display/core/active.py +2 -3
- inspect_ai/_display/core/config.py +7 -1
- inspect_ai/_display/textual/widgets/samples.py +4 -3
- inspect_ai/_display/textual/widgets/sandbox.py +6 -0
- inspect_ai/_eval/eval.py +104 -101
- inspect_ai/_eval/evalset.py +75 -75
- inspect_ai/_eval/loader.py +122 -12
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +14 -0
- inspect_ai/_eval/score.py +125 -36
- inspect_ai/_eval/task/log.py +105 -4
- inspect_ai/_eval/task/results.py +92 -38
- inspect_ai/_eval/task/run.py +9 -2
- inspect_ai/_eval/task/sandbox.py +35 -2
- inspect_ai/_eval/task/task.py +49 -46
- inspect_ai/_util/constants.py +1 -1
- inspect_ai/_util/content.py +8 -0
- inspect_ai/_util/error.py +2 -0
- inspect_ai/_util/file.py +15 -1
- inspect_ai/_util/hash.py +1 -1
- inspect_ai/_util/logger.py +4 -2
- inspect_ai/_util/registry.py +7 -1
- inspect_ai/_view/view.py +1 -2
- inspect_ai/_view/www/.vscode/extensions.json +3 -0
- inspect_ai/_view/www/.vscode/settings.json +8 -0
- inspect_ai/_view/www/App.css +97 -29
- inspect_ai/_view/www/README.md +1 -1
- inspect_ai/_view/www/dist/assets/index.css +16663 -14674
- inspect_ai/_view/www/dist/assets/index.js +58808 -51348
- inspect_ai/_view/www/dist/index.html +1 -1
- inspect_ai/_view/www/index.html +2 -2
- inspect_ai/_view/www/log-schema.json +87 -73
- inspect_ai/_view/www/package.json +22 -4
- inspect_ai/_view/www/postcss.config.cjs +8 -9
- inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
- inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
- inspect_ai/_view/www/src/api/api-browser.ts +2 -2
- inspect_ai/_view/www/src/api/api-http.ts +3 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
- inspect_ai/_view/www/src/api/client-api.ts +4 -4
- inspect_ai/_view/www/src/api/index.ts +4 -4
- inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
- inspect_ai/_view/www/src/appearance/colors.ts +9 -0
- inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
- inspect_ai/_view/www/src/appearance/icons.ts +100 -0
- inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
- inspect_ai/_view/www/src/components/Card.css +60 -0
- inspect_ai/_view/www/src/components/Card.tsx +109 -0
- inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
- inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
- inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
- inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
- inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
- inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
- inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
- inspect_ai/_view/www/src/components/FindBand.css +49 -0
- inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
- inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
- inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
- inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
- inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
- inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
- inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
- inspect_ai/_view/www/src/components/MessageBand.css +43 -0
- inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
- inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
- inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
- inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
- inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
- inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
- inspect_ai/_view/www/src/components/ToolButton.css +3 -0
- inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
- inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
- inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
- inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
- inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
- inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
- inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
- inspect_ai/_view/www/src/metadata/types.ts +18 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
- inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
- inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
- inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
- inspect_ai/_view/www/src/samples/error/error.ts +15 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
- inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
- inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
- inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
- inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
- inspect_ai/_view/www/src/types/log.d.ts +108 -19
- inspect_ai/_view/www/src/types/prism.d.ts +11 -0
- inspect_ai/_view/www/src/types.ts +71 -0
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
- inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
- inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
- inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
- inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
- inspect_ai/_view/www/src/utils/attachments.ts +42 -0
- inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
- inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
- inspect_ai/_view/www/src/utils/debugging.ts +28 -0
- inspect_ai/_view/www/src/utils/dom.ts +30 -0
- inspect_ai/_view/www/src/utils/format.ts +194 -0
- inspect_ai/_view/www/src/utils/git.ts +7 -0
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/_view/www/src/utils/http.ts +14 -0
- inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
- inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
- inspect_ai/_view/www/src/utils/queue.ts +51 -0
- inspect_ai/_view/www/src/utils/sync.ts +114 -0
- inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
- inspect_ai/_view/www/src/utils/vscode.ts +13 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
- inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
- inspect_ai/_view/www/src/workspace/types.ts +10 -0
- inspect_ai/_view/www/src/workspace/utils.ts +34 -0
- inspect_ai/_view/www/tsconfig.json +23 -9
- inspect_ai/_view/www/vite.config.js +8 -17
- inspect_ai/_view/www/yarn.lock +627 -556
- inspect_ai/approval/_approval.py +2 -0
- inspect_ai/approval/_approver.py +4 -4
- inspect_ai/approval/_auto.py +1 -1
- inspect_ai/approval/_human/approver.py +3 -0
- inspect_ai/approval/_policy.py +5 -0
- inspect_ai/approval/_registry.py +2 -2
- inspect_ai/dataset/_dataset.py +64 -37
- inspect_ai/dataset/_sources/__init__.py +0 -0
- inspect_ai/dataset/_sources/csv.py +20 -12
- inspect_ai/dataset/_sources/file.py +4 -0
- inspect_ai/dataset/_sources/hf.py +39 -29
- inspect_ai/dataset/_sources/json.py +17 -9
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_convert.py +3 -3
- inspect_ai/log/_file.py +24 -9
- inspect_ai/log/_log.py +101 -13
- inspect_ai/log/_message.py +4 -2
- inspect_ai/log/_recorders/file.py +4 -0
- inspect_ai/log/_recorders/json.py +5 -7
- inspect_ai/log/_recorders/recorder.py +3 -0
- inspect_ai/log/_transcript.py +19 -8
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_cache.py +39 -21
- inspect_ai/model/_call_tools.py +4 -3
- inspect_ai/model/_chat_message.py +14 -4
- inspect_ai/model/_generate_config.py +1 -1
- inspect_ai/model/_model.py +31 -24
- inspect_ai/model/_model_output.py +14 -1
- inspect_ai/model/_openai.py +10 -18
- inspect_ai/model/_providers/anthropic.py +3 -3
- inspect_ai/model/_providers/google.py +9 -5
- inspect_ai/model/_providers/openai.py +5 -9
- inspect_ai/model/_providers/openai_o1.py +3 -5
- inspect_ai/model/_providers/openrouter.py +86 -0
- inspect_ai/model/_providers/providers.py +11 -0
- inspect_ai/scorer/__init__.py +6 -1
- inspect_ai/scorer/_answer.py +7 -7
- inspect_ai/scorer/_classification.py +38 -18
- inspect_ai/scorer/_common.py +2 -8
- inspect_ai/scorer/_match.py +4 -5
- inspect_ai/scorer/_metric.py +87 -28
- inspect_ai/scorer/_metrics/__init__.py +3 -3
- inspect_ai/scorer/_metrics/accuracy.py +8 -10
- inspect_ai/scorer/_metrics/mean.py +3 -17
- inspect_ai/scorer/_metrics/std.py +111 -30
- inspect_ai/scorer/_model.py +12 -12
- inspect_ai/scorer/_pattern.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +36 -21
- inspect_ai/scorer/_reducer/registry.py +2 -2
- inspect_ai/scorer/_reducer/types.py +7 -1
- inspect_ai/scorer/_score.py +11 -1
- inspect_ai/scorer/_scorer.py +110 -16
- inspect_ai/solver/__init__.py +1 -1
- inspect_ai/solver/_basic_agent.py +19 -22
- inspect_ai/solver/_bridge/__init__.py +0 -3
- inspect_ai/solver/_bridge/bridge.py +3 -3
- inspect_ai/solver/_chain.py +1 -2
- inspect_ai/solver/_critique.py +3 -3
- inspect_ai/solver/_fork.py +2 -2
- inspect_ai/solver/_human_agent/__init__.py +0 -0
- inspect_ai/solver/_human_agent/agent.py +5 -8
- inspect_ai/solver/_human_agent/commands/clock.py +14 -10
- inspect_ai/solver/_human_agent/commands/note.py +1 -1
- inspect_ai/solver/_human_agent/commands/score.py +0 -11
- inspect_ai/solver/_multiple_choice.py +38 -26
- inspect_ai/solver/_prompt.py +7 -7
- inspect_ai/solver/_solver.py +53 -52
- inspect_ai/solver/_task_state.py +80 -69
- inspect_ai/solver/_use_tools.py +9 -9
- inspect_ai/tool/__init__.py +4 -1
- inspect_ai/tool/_tool.py +43 -14
- inspect_ai/tool/_tool_call.py +6 -2
- inspect_ai/tool/_tool_choice.py +3 -1
- inspect_ai/tool/_tool_def.py +10 -8
- inspect_ai/tool/_tool_params.py +24 -0
- inspect_ai/tool/_tool_with.py +7 -7
- inspect_ai/tool/_tools/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
- inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
- inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_execute.py +23 -11
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
- inspect_ai/tool/_tools/_web_search.py +7 -5
- inspect_ai/tool/beta.py +3 -0
- inspect_ai/util/_concurrency.py +3 -3
- inspect_ai/util/_panel.py +2 -0
- inspect_ai/util/_resource.py +12 -12
- inspect_ai/util/_sandbox/docker/compose.py +23 -20
- inspect_ai/util/_sandbox/docker/config.py +2 -1
- inspect_ai/util/_sandbox/docker/docker.py +42 -86
- inspect_ai/util/_sandbox/docker/service.py +100 -0
- inspect_ai/util/_sandbox/environment.py +99 -96
- inspect_ai/util/_sandbox/self_check.py +124 -16
- inspect_ai/util/_subprocess.py +5 -3
- inspect_ai/util/_subtask.py +15 -16
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
- inspect_ai-0.3.64.dist-info/RECORD +625 -0
- inspect_ai/_view/www/src/Register.mjs +0 -3
- inspect_ai/_view/www/src/Types.mjs +0 -38
- inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
- inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
- inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
- inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
- inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
- inspect_ai/_view/www/src/components/Card.mjs +0 -126
- inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
- inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
- inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
- inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
- inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
- inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
- inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
- inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
- inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
- inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
- inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
- inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
- inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
- inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
- inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
- inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
- inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
- inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
- inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
- inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
- inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
- inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
- inspect_ai/_view/www/src/components/Tools.mjs +0 -376
- inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
- inspect_ai/_view/www/src/components/ansi-output.js +0 -932
- inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
- inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
- inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
- inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
- inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
- inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
- inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
- inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
- inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
- inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
- inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
- inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
- inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
- inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
- inspect_ai/_view/www/src/utils/Format.mjs +0 -260
- inspect_ai/_view/www/src/utils/Git.mjs +0 -12
- inspect_ai/_view/www/src/utils/Html.mjs +0 -21
- inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
- inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
- inspect_ai/_view/www/src/utils/http.mjs +0 -18
- inspect_ai/_view/www/src/utils/queue.mjs +0 -67
- inspect_ai/_view/www/src/utils/sync.mjs +0 -101
- inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
- inspect_ai/tool/beta/__init__.py +0 -5
- inspect_ai-0.3.62.dist-info/RECORD +0 -481
- /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
- /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
- /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
inspect_ai/approval/_approval.py
CHANGED
inspect_ai/approval/_approver.py
CHANGED
@@ -20,10 +20,10 @@ class Approver(Protocol):
|
|
20
20
|
Approve or reject a tool call.
|
21
21
|
|
22
22
|
Args:
|
23
|
-
message
|
24
|
-
call
|
25
|
-
view
|
26
|
-
state
|
23
|
+
message: Message genreated by the model along with the tool call.
|
24
|
+
call: The tool call to be approved.
|
25
|
+
view: Custom rendering of tool context and call.
|
26
|
+
state: The current task state, if available.
|
27
27
|
|
28
28
|
Returns:
|
29
29
|
Approval: An Approval object containing the decision and explanation.
|
inspect_ai/approval/_auto.py
CHANGED
@@ -11,7 +11,7 @@ def auto_approver(decision: ApprovalDecision = "approve") -> Approver:
|
|
11
11
|
"""Automatically apply a decision to tool calls.
|
12
12
|
|
13
13
|
Args:
|
14
|
-
decision
|
14
|
+
decision: Decision to apply.
|
15
15
|
|
16
16
|
Returns:
|
17
17
|
Approver: Auto approver.
|
inspect_ai/approval/_policy.py
CHANGED
@@ -20,8 +20,13 @@ from ._call import call_approver, record_approval
|
|
20
20
|
|
21
21
|
@dataclass
|
22
22
|
class ApprovalPolicy:
|
23
|
+
"""Policy mapping approvers to tools."""
|
24
|
+
|
23
25
|
approver: Approver
|
26
|
+
"""Approver for policy."""
|
27
|
+
|
24
28
|
tools: str | list[str]
|
29
|
+
"""Tools to use this approver for (can be full tool names or globs)."""
|
25
30
|
|
26
31
|
|
27
32
|
def policy_approver(policies: str | list[ApprovalPolicy]) -> Approver:
|
inspect_ai/approval/_registry.py
CHANGED
@@ -31,11 +31,11 @@ def approver(*args: Any, name: str | None = None, **attribs: Any) -> Any:
|
|
31
31
|
Args:
|
32
32
|
*args: Function returning `Approver` targeted by
|
33
33
|
plain approver decorator without attributes (e.g. `@approver`)
|
34
|
-
name
|
34
|
+
name:
|
35
35
|
Optional name for approver. If the decorator has no name
|
36
36
|
argument then the name of the function
|
37
37
|
will be used to automatically assign a name.
|
38
|
-
**attribs:
|
38
|
+
**attribs: Additional approver attributes.
|
39
39
|
|
40
40
|
Returns:
|
41
41
|
Approver with registry attributes.
|
inspect_ai/dataset/_dataset.py
CHANGED
@@ -27,6 +27,8 @@ MT = TypeVar("MT", bound=BaseModel)
|
|
27
27
|
|
28
28
|
|
29
29
|
class Sample(BaseModel):
|
30
|
+
r"""Sample for an evaluation task."""
|
31
|
+
|
30
32
|
def __init__(
|
31
33
|
self,
|
32
34
|
input: str | list[ChatMessage],
|
@@ -38,22 +40,22 @@ class Sample(BaseModel):
|
|
38
40
|
files: dict[str, str] | None = None,
|
39
41
|
setup: str | None = None,
|
40
42
|
) -> None:
|
41
|
-
r"""
|
43
|
+
r"""Create a Sample.
|
42
44
|
|
43
45
|
Args:
|
44
|
-
input
|
45
|
-
choices
|
46
|
-
|
47
|
-
target
|
46
|
+
input: The input to be submitted to the model.
|
47
|
+
choices: Optional. List of available answer choices
|
48
|
+
(used only for multiple-choice evals).
|
49
|
+
target: Optional. Ideal target output. May be a literal value
|
48
50
|
or narrative text to be used by a model grader.
|
49
|
-
id
|
50
|
-
metadata
|
51
|
-
|
52
|
-
|
53
|
-
files
|
54
|
-
|
55
|
-
setup
|
56
|
-
|
51
|
+
id: Optional. Unique identifier for sample.
|
52
|
+
metadata: Optional. Arbitrary metadata associated with the sample.
|
53
|
+
sandbox (SandboxEnvironmentType | None): Sandbox environment type (or optionally a str or tuple with a shorthand spec)
|
54
|
+
sandbox: Optional. Sandbox specification for this sample.
|
55
|
+
files: Optional. Files that go along with the sample (copied to
|
56
|
+
SandboxEnvironment). Files can be paths, inline text, or inline binary (base64 encoded data URL).
|
57
|
+
setup: Optional. Setup script to run for sample (run
|
58
|
+
within default SandboxEnvironment).
|
57
59
|
"""
|
58
60
|
super().__init__(
|
59
61
|
input=input,
|
@@ -156,14 +158,6 @@ class Dataset(Sequence[Sample], abc.ABC):
|
|
156
158
|
@abc.abstractmethod
|
157
159
|
def __len__(self) -> int: ...
|
158
160
|
|
159
|
-
@abc.abstractmethod
|
160
|
-
def shuffle(self, seed: int | None = None) -> None:
|
161
|
-
"""Shuffle the order of the dataset (in place).
|
162
|
-
|
163
|
-
Args:
|
164
|
-
seed: (int | None): Random seed for shuffling (optional).
|
165
|
-
"""
|
166
|
-
|
167
161
|
@abc.abstractmethod
|
168
162
|
def sort(
|
169
163
|
self,
|
@@ -177,8 +171,8 @@ class Dataset(Sequence[Sample], abc.ABC):
|
|
177
171
|
The key function defaults to measuring the length of the sample's input field.
|
178
172
|
|
179
173
|
Args:
|
180
|
-
reverse
|
181
|
-
key
|
174
|
+
reverse: If `Treu`, sort in descending order. Defaults to False.
|
175
|
+
key: a callable mapping each item to a numeric value (optional, defaults to sample_input_len).
|
182
176
|
"""
|
183
177
|
|
184
178
|
@abc.abstractmethod
|
@@ -188,28 +182,33 @@ class Dataset(Sequence[Sample], abc.ABC):
|
|
188
182
|
"""Filter the dataset using a predicate.
|
189
183
|
|
190
184
|
Args:
|
191
|
-
predicate
|
192
|
-
name
|
185
|
+
predicate: Filtering function.
|
186
|
+
name: Name for filtered dataset (optional).
|
193
187
|
|
194
188
|
Returns:
|
195
189
|
Filtered dataset.
|
196
190
|
"""
|
197
191
|
|
192
|
+
@abc.abstractmethod
|
193
|
+
def shuffle(self, seed: int | None = None) -> None:
|
194
|
+
"""Shuffle the order of the dataset (in place).
|
195
|
+
|
196
|
+
Args:
|
197
|
+
seed: Random seed for shuffling (optional).
|
198
|
+
"""
|
199
|
+
|
200
|
+
@abc.abstractmethod
|
201
|
+
def shuffle_choices(self, seed: int | None = None) -> None:
|
202
|
+
"""Shuffle the order of the choices with each sample.
|
203
|
+
|
204
|
+
Args:
|
205
|
+
seed: Random seed for shuffling (optional).
|
206
|
+
"""
|
207
|
+
|
198
208
|
|
199
209
|
@dataclass
|
200
210
|
class FieldSpec:
|
201
|
-
r"""Specification for mapping data source fields to sample fields.
|
202
|
-
|
203
|
-
Args:
|
204
|
-
input (str): Name of the field containing the sample input.
|
205
|
-
target (str): Name of the field containing the sample target.
|
206
|
-
choices (str): Optional. Name of field containing the list of answer choices.
|
207
|
-
id (str): Optional. Unique identifier for the sample.
|
208
|
-
metadata (list[str] | None): List of additional field names that should be read as metadata.
|
209
|
-
sandbox (str): Optional. Sandbox type along with optional config file
|
210
|
-
files (str): Optional. Files that go along with the sample.
|
211
|
-
setup (str): Optional. Setup script to run for sample .
|
212
|
-
"""
|
211
|
+
r"""Specification for mapping data source fields to sample fields."""
|
213
212
|
|
214
213
|
input: str = field(default="input")
|
215
214
|
"""Name of the field containing the sample input."""
|
@@ -315,6 +314,34 @@ class MemoryDataset(Dataset):
|
|
315
314
|
random.shuffle(self.samples)
|
316
315
|
self._shuffled = True
|
317
316
|
|
317
|
+
@override
|
318
|
+
def shuffle_choices(self, seed: int | None = None) -> None:
|
319
|
+
rand = random.Random(seed)
|
320
|
+
for sample in self.samples:
|
321
|
+
if not sample.choices:
|
322
|
+
continue
|
323
|
+
# The original positions
|
324
|
+
positions = list(range(len(sample.choices)))
|
325
|
+
|
326
|
+
# Shuffle the choices
|
327
|
+
rand.shuffle(positions)
|
328
|
+
shuffled_choices = [sample.choices[i] for i in positions]
|
329
|
+
|
330
|
+
# Map of original position / target letter
|
331
|
+
position_map = {i: chr(65 + new_i) for new_i, i in enumerate(positions)}
|
332
|
+
|
333
|
+
# Update to the shuffled choices and target
|
334
|
+
sample.choices = shuffled_choices
|
335
|
+
sample.target = self._remap_target(sample.target, position_map=position_map)
|
336
|
+
|
337
|
+
def _remap_target(
|
338
|
+
self, target: str | list[str], position_map: dict[int, str]
|
339
|
+
) -> str | list[str]:
|
340
|
+
if isinstance(target, list):
|
341
|
+
return [position_map[ord(t) - 65] for t in target]
|
342
|
+
else:
|
343
|
+
return position_map[ord(target) - 65]
|
344
|
+
|
318
345
|
@override
|
319
346
|
def sort(
|
320
347
|
self,
|
File without changes
|
@@ -23,6 +23,7 @@ def csv_dataset(
|
|
23
23
|
auto_id: bool = False,
|
24
24
|
shuffle: bool = False,
|
25
25
|
seed: int | None = None,
|
26
|
+
shuffle_choices: bool | int | None = None,
|
26
27
|
limit: int | None = None,
|
27
28
|
dialect: str = "unix",
|
28
29
|
encoding: str = "utf-8",
|
@@ -34,29 +35,30 @@ def csv_dataset(
|
|
34
35
|
r"""Read dataset from CSV file.
|
35
36
|
|
36
37
|
Args:
|
37
|
-
csv_file
|
38
|
+
csv_file: Path to CSV file. Can be a local filesystem path,
|
38
39
|
a path to an S3 bucket (e.g. "s3://my-bucket"), or an HTTPS URL.
|
39
40
|
Use `fs_options` to pass arguments through to the `S3FileSystem` constructor.
|
40
|
-
sample_fields
|
41
|
+
sample_fields: Method of mapping underlying
|
41
42
|
fields in the data source to Sample objects. Pass `None` if the data is already
|
42
43
|
stored in `Sample` form (i.e. has "input" and "target" columns.); Pass a
|
43
44
|
`FieldSpec` to specify mapping fields by name; Pass a `RecordToSample` to
|
44
45
|
handle mapping with a custom function that returns one or more samples.
|
45
|
-
auto_id
|
46
|
-
shuffle
|
47
|
-
seed:
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
46
|
+
auto_id: Assign an auto-incrementing ID for each sample.
|
47
|
+
shuffle: Randomly shuffle the dataset order.
|
48
|
+
seed: Seed used for random shuffle.
|
49
|
+
shuffle_choices: Whether to shuffle the choices. If an int is passed, this will be used as the seed when shuffling.
|
50
|
+
limit: Limit the number of records to read.
|
51
|
+
dialect: CSV dialect ("unix", "excel" or"excel-tab"). Defaults to "unix". See https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters for more details
|
52
|
+
encoding: Text encoding for file (defaults to "utf-8").
|
53
|
+
name: Optional name for dataset (for logging). If not specified,
|
52
54
|
defaults to the stem of the filename
|
53
|
-
fs_options
|
55
|
+
fs_options: Optional. Additional arguments to pass through
|
54
56
|
to the filesystem provider (e.g. `S3FileSystem`). Use `{"anon": True }`
|
55
57
|
if you are accessing a public S3 bucket with no credentials.
|
56
|
-
fieldnames
|
58
|
+
fieldnames: Optional. A list of fieldnames to use for the CSV.
|
57
59
|
If None, the values in the first row of the file will be used as the fieldnames.
|
58
60
|
Useful for files without a header.
|
59
|
-
delimiter
|
61
|
+
delimiter: Optional. The delimiter to use when parsing the file. Defaults to ",".
|
60
62
|
|
61
63
|
Returns:
|
62
64
|
Dataset read from CSV file.
|
@@ -86,6 +88,12 @@ def csv_dataset(
|
|
86
88
|
if shuffle:
|
87
89
|
dataset.shuffle(seed=seed)
|
88
90
|
|
91
|
+
# shuffle choices, if requested
|
92
|
+
if isinstance(shuffle_choices, int):
|
93
|
+
dataset.shuffle_choices(seed=shuffle_choices)
|
94
|
+
elif shuffle_choices is True:
|
95
|
+
dataset.shuffle_choices()
|
96
|
+
|
89
97
|
# limit if requested
|
90
98
|
if limit:
|
91
99
|
return dataset[0:limit]
|
@@ -16,6 +16,7 @@ def file_dataset(
|
|
16
16
|
auto_id: bool = False,
|
17
17
|
shuffle: bool = False,
|
18
18
|
seed: int | None = None,
|
19
|
+
shuffle_choices: bool | int | None = None,
|
19
20
|
limit: int | None = None,
|
20
21
|
dialect: str = "unix",
|
21
22
|
encoding: str = "utf-8",
|
@@ -40,6 +41,7 @@ def file_dataset(
|
|
40
41
|
auto_id (bool): Assign an auto-incrementing ID for each sample.
|
41
42
|
shuffle (bool): Randomly shuffle the dataset order.
|
42
43
|
seed: (int | None): Seed used for random shuffle.
|
44
|
+
shuffle_choices: (bool | int | None): Whether to shuffle the choices. If an int is passed, this will be used as the seed when shuffling.
|
43
45
|
limit (int | None): Limit the number of records to read.
|
44
46
|
dialect (str): CSV dialect ("unix" or "excel", defaults to "unix"). Only
|
45
47
|
applies to reading CSV files.
|
@@ -66,6 +68,7 @@ def file_dataset(
|
|
66
68
|
auto_id=auto_id,
|
67
69
|
shuffle=shuffle,
|
68
70
|
seed=seed,
|
71
|
+
shuffle_choices=shuffle_choices,
|
69
72
|
limit=limit,
|
70
73
|
encoding=encoding,
|
71
74
|
name=name,
|
@@ -78,6 +81,7 @@ def file_dataset(
|
|
78
81
|
auto_id=auto_id,
|
79
82
|
shuffle=shuffle,
|
80
83
|
seed=seed,
|
84
|
+
shuffle_choices=shuffle_choices,
|
81
85
|
limit=limit,
|
82
86
|
dialect=dialect,
|
83
87
|
encoding=encoding,
|
@@ -29,6 +29,7 @@ def hf_dataset(
|
|
29
29
|
auto_id: bool = False,
|
30
30
|
shuffle: bool = False,
|
31
31
|
seed: int | None = None,
|
32
|
+
shuffle_choices: bool | int | None = None,
|
32
33
|
limit: int | None = None,
|
33
34
|
trust: bool = False,
|
34
35
|
cached: bool = True,
|
@@ -40,35 +41,36 @@ def hf_dataset(
|
|
40
41
|
`datasets` package, including remote datasets on Hugging Face Hub.
|
41
42
|
|
42
43
|
Args:
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
44
|
+
path: Path or name of the dataset. Depending on path, the dataset
|
45
|
+
builder that is used comes from a generic dataset script (JSON, CSV,
|
46
|
+
Parquet, text etc.) or from the dataset script (a python file) inside
|
47
|
+
the dataset directory.
|
48
|
+
split: Which split of the data to load.
|
49
|
+
name: Name of the dataset configuration.
|
50
|
+
data_dir: data_dir of the dataset configuration
|
51
|
+
to read data from.
|
52
|
+
revision: Specific revision to load (e.g. "main", a branch
|
53
|
+
name, or a specific commit SHA). When using `revision` the `cached` option
|
54
|
+
is ignored and datasets are revalidated on Hugging Face before loading.
|
55
|
+
sample_fields: Method of mapping underlying
|
56
|
+
fields in the data source to Sample objects. Pass `None` if the data is already
|
57
|
+
stored in `Sample` form (i.e. has "input" and "target" columns.); Pass a
|
58
|
+
`FieldSpec` to specify mapping fields by name; Pass a `RecordToSample` to
|
58
59
|
handle mapping with a custom function that returns one or more samples.
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
60
|
+
auto_id: Assign an auto-incrementing ID for each sample.
|
61
|
+
shuffle: Randomly shuffle the dataset order.
|
62
|
+
seed: Seed used for random shuffle.
|
63
|
+
shuffle_choices: Whether to shuffle the choices. If an int is passed, this will be used as the seed when shuffling.
|
64
|
+
limit: Limit the number of records to read.
|
65
|
+
trust: Whether or not to allow for datasets defined on the Hub
|
66
|
+
using a dataset script. This option should only be set to True for
|
67
|
+
repositories you trust and in which you have read the code, as it
|
68
|
+
will execute code present on the Hub on your local machine.
|
69
|
+
cached: By default, datasets are read once from HuggingFace
|
70
|
+
Hub and then cached for future reads. Pass `cached=False` to force
|
71
|
+
re-reading the dataset from Hugging Face. Ignored when the `revision`
|
72
|
+
option is specified.
|
73
|
+
**kwargs (dict[str, Any]): Additional arguments to pass through to the
|
72
74
|
`load_dataset` function of the `datasets` package.
|
73
75
|
|
74
76
|
Returns:
|
@@ -117,8 +119,16 @@ def hf_dataset(
|
|
117
119
|
dataset = dataset.select(range(limit))
|
118
120
|
|
119
121
|
# return the dataset
|
120
|
-
|
122
|
+
memory_dataset = MemoryDataset(
|
121
123
|
samples=data_to_samples(dataset.to_list(), data_to_sample, auto_id),
|
122
124
|
name=Path(path).stem if Path(path).exists() else path,
|
123
125
|
location=path,
|
124
126
|
)
|
127
|
+
|
128
|
+
# maybe shuffle the choices
|
129
|
+
if isinstance(shuffle_choices, int):
|
130
|
+
memory_dataset.shuffle_choices(seed=shuffle_choices)
|
131
|
+
elif shuffle_choices is True:
|
132
|
+
memory_dataset.shuffle_choices()
|
133
|
+
|
134
|
+
return memory_dataset
|
@@ -25,6 +25,7 @@ def json_dataset(
|
|
25
25
|
auto_id: bool = False,
|
26
26
|
shuffle: bool = False,
|
27
27
|
seed: int | None = None,
|
28
|
+
shuffle_choices: bool | int | None = None,
|
28
29
|
limit: int | None = None,
|
29
30
|
encoding: str = "utf-8",
|
30
31
|
name: str | None = None,
|
@@ -38,22 +39,23 @@ def json_dataset(
|
|
38
39
|
the `sample_fields` argument.
|
39
40
|
|
40
41
|
Args:
|
41
|
-
json_file
|
42
|
+
json_file: Path to JSON file. Can be a local filesystem path or
|
42
43
|
a path to an S3 bucket (e.g. "s3://my-bucket"). Use `fs_options`
|
43
44
|
to pass arguments through to the `S3FileSystem` constructor.
|
44
|
-
sample_fields
|
45
|
+
sample_fields: Method of mapping underlying
|
45
46
|
fields in the data source to `Sample` objects. Pass `None` if the data is already
|
46
47
|
stored in `Sample` form (i.e. object with "input" and "target" fields); Pass a
|
47
48
|
`FieldSpec` to specify mapping fields by name; Pass a `RecordToSample` to
|
48
49
|
handle mapping with a custom function that returns one or more samples.
|
49
|
-
auto_id
|
50
|
-
shuffle
|
51
|
-
seed:
|
52
|
-
|
53
|
-
|
54
|
-
|
50
|
+
auto_id: Assign an auto-incrementing ID for each sample.
|
51
|
+
shuffle: Randomly shuffle the dataset order.
|
52
|
+
seed: Seed used for random shuffle.
|
53
|
+
shuffle_choices: Whether to shuffle the choices. If an int is passed, this will be used as the seed when shuffling.
|
54
|
+
limit: Limit the number of records to read.
|
55
|
+
encoding: Text encoding for file (defaults to "utf-8").
|
56
|
+
name: Optional name for dataset (for logging). If not specified,
|
55
57
|
defaults to the stem of the filename.
|
56
|
-
fs_options
|
58
|
+
fs_options: Optional. Additional arguments to pass through
|
57
59
|
to the filesystem provider (e.g. `S3FileSystem`). Use `{"anon": True }`
|
58
60
|
if you are accessing a public S3 bucket with no credentials.
|
59
61
|
|
@@ -86,6 +88,12 @@ def json_dataset(
|
|
86
88
|
if shuffle:
|
87
89
|
dataset.shuffle(seed=seed)
|
88
90
|
|
91
|
+
# shuffle choices, if requested
|
92
|
+
if isinstance(shuffle_choices, int):
|
93
|
+
dataset.shuffle_choices(seed=shuffle_choices)
|
94
|
+
elif shuffle_choices is True:
|
95
|
+
dataset.shuffle_choices()
|
96
|
+
|
89
97
|
# limit if requested
|
90
98
|
if limit:
|
91
99
|
return dataset[0:limit]
|
inspect_ai/log/__init__.py
CHANGED
@@ -22,6 +22,7 @@ from ._log import (
|
|
22
22
|
EvalResults,
|
23
23
|
EvalRevision,
|
24
24
|
EvalSample,
|
25
|
+
EvalSampleLimit,
|
25
26
|
EvalSampleReductions,
|
26
27
|
EvalSampleScore,
|
27
28
|
EvalScore,
|
@@ -61,6 +62,7 @@ __all__ = [
|
|
61
62
|
"EvalResults",
|
62
63
|
"EvalRevision",
|
63
64
|
"EvalSample",
|
65
|
+
"EvalSampleLimit",
|
64
66
|
"EvalSampleScore",
|
65
67
|
"EvalSampleReductions",
|
66
68
|
"EvalScore",
|
inspect_ai/log/_convert.py
CHANGED
@@ -20,12 +20,12 @@ def convert_eval_logs(
|
|
20
20
|
|
21
21
|
Args:
|
22
22
|
path (str): Path to source log file(s). Should be either a single
|
23
|
-
|
23
|
+
log file or a directory containing log files.
|
24
24
|
to (Literal["eval", "json"]): Format to convert to. If a file is
|
25
|
-
|
25
|
+
already in the target format it will just be copied to the output dir.
|
26
26
|
output_dir (str): Output directory to write converted log file(s) to.
|
27
27
|
overwrite (bool): Overwrite existing log files (defaults to `False`,
|
28
|
-
|
28
|
+
raising an error if the output file path already exists).
|
29
29
|
"""
|
30
30
|
from inspect_ai._display import display
|
31
31
|
|
inspect_ai/log/_file.py
CHANGED
@@ -3,6 +3,7 @@ import re
|
|
3
3
|
from logging import getLogger
|
4
4
|
from typing import Any, Callable, Generator, Literal, cast
|
5
5
|
|
6
|
+
from pydantic import BaseModel
|
6
7
|
from pydantic_core import to_json
|
7
8
|
|
8
9
|
from inspect_ai._util._async import run_coroutine
|
@@ -22,7 +23,21 @@ from ._recorders import recorder_type_for_format, recorder_type_for_location
|
|
22
23
|
logger = getLogger(__name__)
|
23
24
|
|
24
25
|
|
25
|
-
class EvalLogInfo(
|
26
|
+
class EvalLogInfo(BaseModel):
|
27
|
+
"""File info and task identifiers for eval log."""
|
28
|
+
|
29
|
+
name: str
|
30
|
+
"""Name of file."""
|
31
|
+
|
32
|
+
type: str
|
33
|
+
"""Type of file (file or directory)"""
|
34
|
+
|
35
|
+
size: int
|
36
|
+
"""File size in bytes."""
|
37
|
+
|
38
|
+
mtime: float | None
|
39
|
+
"""File modification time (None if the file is a directory on S3)."""
|
40
|
+
|
26
41
|
task: str
|
27
42
|
"""Task name."""
|
28
43
|
|
@@ -231,7 +246,7 @@ def write_log_dir_manifest(
|
|
231
246
|
|
232
247
|
|
233
248
|
def read_eval_log(
|
234
|
-
log_file: str |
|
249
|
+
log_file: str | EvalLogInfo,
|
235
250
|
header_only: bool = False,
|
236
251
|
resolve_attachments: bool = False,
|
237
252
|
format: Literal["eval", "json", "auto"] = "auto",
|
@@ -241,7 +256,7 @@ def read_eval_log(
|
|
241
256
|
Args:
|
242
257
|
log_file (str | FileInfo): Log file to read.
|
243
258
|
header_only (bool): Read only the header (i.e. exclude
|
244
|
-
|
259
|
+
the "samples" and "logging" fields). Defaults to False.
|
245
260
|
resolve_attachments (bool): Resolve attachments (e.g. images)
|
246
261
|
to their full content.
|
247
262
|
format (Literal["eval", "json", "auto"]): Read from format
|
@@ -256,7 +271,7 @@ def read_eval_log(
|
|
256
271
|
|
257
272
|
|
258
273
|
async def read_eval_log_async(
|
259
|
-
log_file: str |
|
274
|
+
log_file: str | EvalLogInfo,
|
260
275
|
header_only: bool = False,
|
261
276
|
resolve_attachments: bool = False,
|
262
277
|
format: Literal["eval", "json", "auto"] = "auto",
|
@@ -304,13 +319,13 @@ async def read_eval_log_async(
|
|
304
319
|
|
305
320
|
|
306
321
|
def read_eval_log_headers(
|
307
|
-
log_files: list[str] | list[
|
322
|
+
log_files: list[str] | list[EvalLogInfo],
|
308
323
|
) -> list[EvalLog]:
|
309
324
|
return run_coroutine(read_eval_log_headers_async(log_files))
|
310
325
|
|
311
326
|
|
312
327
|
async def read_eval_log_headers_async(
|
313
|
-
log_files: list[str] | list[
|
328
|
+
log_files: list[str] | list[EvalLogInfo],
|
314
329
|
) -> list[EvalLog]:
|
315
330
|
return [
|
316
331
|
await read_eval_log_async(log_file, header_only=True) for log_file in log_files
|
@@ -318,7 +333,7 @@ async def read_eval_log_headers_async(
|
|
318
333
|
|
319
334
|
|
320
335
|
def read_eval_log_sample(
|
321
|
-
log_file: str |
|
336
|
+
log_file: str | EvalLogInfo,
|
322
337
|
id: int | str,
|
323
338
|
epoch: int = 1,
|
324
339
|
resolve_attachments: bool = False,
|
@@ -347,7 +362,7 @@ def read_eval_log_sample(
|
|
347
362
|
|
348
363
|
|
349
364
|
async def read_eval_log_sample_async(
|
350
|
-
log_file: str |
|
365
|
+
log_file: str | EvalLogInfo,
|
351
366
|
id: int | str,
|
352
367
|
epoch: int = 1,
|
353
368
|
resolve_attachments: bool = False,
|
@@ -386,7 +401,7 @@ async def read_eval_log_sample_async(
|
|
386
401
|
|
387
402
|
|
388
403
|
def read_eval_log_samples(
|
389
|
-
log_file: str |
|
404
|
+
log_file: str | EvalLogInfo,
|
390
405
|
all_samples_required: bool = True,
|
391
406
|
resolve_attachments: bool = False,
|
392
407
|
format: Literal["eval", "json", "auto"] = "auto",
|