PyPI - inspect-ai - Versions diffs - 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl - Mend

inspect-ai 0.3.62py3-none-any.whl → 0.3.64py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (518) hide show

inspect_ai/_cli/cache.py +8 -7
inspect_ai/_cli/common.py +0 -12
inspect_ai/_cli/eval.py +32 -4
inspect_ai/_cli/info.py +1 -0
inspect_ai/_cli/list.py +1 -1
inspect_ai/_cli/log.py +2 -0
inspect_ai/_cli/main.py +1 -1
inspect_ai/_cli/sandbox.py +4 -1
inspect_ai/_cli/score.py +181 -32
inspect_ai/_cli/trace.py +10 -0
inspect_ai/_cli/view.py +4 -2
inspect_ai/_display/core/active.py +2 -3
inspect_ai/_display/core/config.py +7 -1
inspect_ai/_display/textual/widgets/samples.py +4 -3
inspect_ai/_display/textual/widgets/sandbox.py +6 -0
inspect_ai/_eval/eval.py +104 -101
inspect_ai/_eval/evalset.py +75 -75
inspect_ai/_eval/loader.py +122 -12
inspect_ai/_eval/registry.py +1 -1
inspect_ai/_eval/run.py +14 -0
inspect_ai/_eval/score.py +125 -36
inspect_ai/_eval/task/log.py +105 -4
inspect_ai/_eval/task/results.py +92 -38
inspect_ai/_eval/task/run.py +9 -2
inspect_ai/_eval/task/sandbox.py +35 -2
inspect_ai/_eval/task/task.py +49 -46
inspect_ai/_util/constants.py +1 -1
inspect_ai/_util/content.py +8 -0
inspect_ai/_util/error.py +2 -0
inspect_ai/_util/file.py +15 -1
inspect_ai/_util/hash.py +1 -1
inspect_ai/_util/logger.py +4 -2
inspect_ai/_util/registry.py +7 -1
inspect_ai/_view/view.py +1 -2
inspect_ai/_view/www/.vscode/extensions.json +3 -0
inspect_ai/_view/www/.vscode/settings.json +8 -0
inspect_ai/_view/www/App.css +97 -29
inspect_ai/_view/www/README.md +1 -1
inspect_ai/_view/www/dist/assets/index.css +16663 -14674
inspect_ai/_view/www/dist/assets/index.js +58808 -51348
inspect_ai/_view/www/dist/index.html +1 -1
inspect_ai/_view/www/index.html +2 -2
inspect_ai/_view/www/log-schema.json +87 -73
inspect_ai/_view/www/package.json +22 -4
inspect_ai/_view/www/postcss.config.cjs +8 -9
inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
inspect_ai/_view/www/src/api/api-browser.ts +2 -2
inspect_ai/_view/www/src/api/api-http.ts +3 -5
inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
inspect_ai/_view/www/src/api/client-api.ts +4 -4
inspect_ai/_view/www/src/api/index.ts +4 -4
inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
inspect_ai/_view/www/src/appearance/colors.ts +9 -0
inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
inspect_ai/_view/www/src/appearance/icons.ts +100 -0
inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
inspect_ai/_view/www/src/components/Card.css +60 -0
inspect_ai/_view/www/src/components/Card.tsx +109 -0
inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
inspect_ai/_view/www/src/components/FindBand.css +49 -0
inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
inspect_ai/_view/www/src/components/MessageBand.css +43 -0
inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
inspect_ai/_view/www/src/components/ToolButton.css +3 -0
inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
inspect_ai/_view/www/src/metadata/types.ts +18 -0
inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
inspect_ai/_view/www/src/samples/error/error.ts +15 -0
inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
inspect_ai/_view/www/src/types/log.d.ts +108 -19
inspect_ai/_view/www/src/types/prism.d.ts +11 -0
inspect_ai/_view/www/src/types.ts +71 -0
inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
inspect_ai/_view/www/src/utils/attachments.ts +42 -0
inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
inspect_ai/_view/www/src/utils/debugging.ts +28 -0
inspect_ai/_view/www/src/utils/dom.ts +30 -0
inspect_ai/_view/www/src/utils/format.ts +194 -0
inspect_ai/_view/www/src/utils/git.ts +7 -0
inspect_ai/_view/www/src/utils/html.ts +6 -0
inspect_ai/_view/www/src/utils/http.ts +14 -0
inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
inspect_ai/_view/www/src/utils/queue.ts +51 -0
inspect_ai/_view/www/src/utils/sync.ts +114 -0
inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
inspect_ai/_view/www/src/utils/vscode.ts +13 -0
inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
inspect_ai/_view/www/src/workspace/types.ts +10 -0
inspect_ai/_view/www/src/workspace/utils.ts +34 -0
inspect_ai/_view/www/tsconfig.json +23 -9
inspect_ai/_view/www/vite.config.js +8 -17
inspect_ai/_view/www/yarn.lock +627 -556
inspect_ai/approval/_approval.py +2 -0
inspect_ai/approval/_approver.py +4 -4
inspect_ai/approval/_auto.py +1 -1
inspect_ai/approval/_human/approver.py +3 -0
inspect_ai/approval/_policy.py +5 -0
inspect_ai/approval/_registry.py +2 -2
inspect_ai/dataset/_dataset.py +64 -37
inspect_ai/dataset/_sources/__init__.py +0 -0
inspect_ai/dataset/_sources/csv.py +20 -12
inspect_ai/dataset/_sources/file.py +4 -0
inspect_ai/dataset/_sources/hf.py +39 -29
inspect_ai/dataset/_sources/json.py +17 -9
inspect_ai/log/__init__.py +2 -0
inspect_ai/log/_convert.py +3 -3
inspect_ai/log/_file.py +24 -9
inspect_ai/log/_log.py +101 -13
inspect_ai/log/_message.py +4 -2
inspect_ai/log/_recorders/file.py +4 -0
inspect_ai/log/_recorders/json.py +5 -7
inspect_ai/log/_recorders/recorder.py +3 -0
inspect_ai/log/_transcript.py +19 -8
inspect_ai/model/__init__.py +2 -0
inspect_ai/model/_cache.py +39 -21
inspect_ai/model/_call_tools.py +4 -3
inspect_ai/model/_chat_message.py +14 -4
inspect_ai/model/_generate_config.py +1 -1
inspect_ai/model/_model.py +31 -24
inspect_ai/model/_model_output.py +14 -1
inspect_ai/model/_openai.py +10 -18
inspect_ai/model/_providers/anthropic.py +3 -3
inspect_ai/model/_providers/google.py +9 -5
inspect_ai/model/_providers/openai.py +5 -9
inspect_ai/model/_providers/openai_o1.py +3 -5
inspect_ai/model/_providers/openrouter.py +86 -0
inspect_ai/model/_providers/providers.py +11 -0
inspect_ai/scorer/__init__.py +6 -1
inspect_ai/scorer/_answer.py +7 -7
inspect_ai/scorer/_classification.py +38 -18
inspect_ai/scorer/_common.py +2 -8
inspect_ai/scorer/_match.py +4 -5
inspect_ai/scorer/_metric.py +87 -28
inspect_ai/scorer/_metrics/__init__.py +3 -3
inspect_ai/scorer/_metrics/accuracy.py +8 -10
inspect_ai/scorer/_metrics/mean.py +3 -17
inspect_ai/scorer/_metrics/std.py +111 -30
inspect_ai/scorer/_model.py +12 -12
inspect_ai/scorer/_pattern.py +3 -3
inspect_ai/scorer/_reducer/reducer.py +36 -21
inspect_ai/scorer/_reducer/registry.py +2 -2
inspect_ai/scorer/_reducer/types.py +7 -1
inspect_ai/scorer/_score.py +11 -1
inspect_ai/scorer/_scorer.py +110 -16
inspect_ai/solver/__init__.py +1 -1
inspect_ai/solver/_basic_agent.py +19 -22
inspect_ai/solver/_bridge/__init__.py +0 -3
inspect_ai/solver/_bridge/bridge.py +3 -3
inspect_ai/solver/_chain.py +1 -2
inspect_ai/solver/_critique.py +3 -3
inspect_ai/solver/_fork.py +2 -2
inspect_ai/solver/_human_agent/__init__.py +0 -0
inspect_ai/solver/_human_agent/agent.py +5 -8
inspect_ai/solver/_human_agent/commands/clock.py +14 -10
inspect_ai/solver/_human_agent/commands/note.py +1 -1
inspect_ai/solver/_human_agent/commands/score.py +0 -11
inspect_ai/solver/_multiple_choice.py +38 -26
inspect_ai/solver/_prompt.py +7 -7
inspect_ai/solver/_solver.py +53 -52
inspect_ai/solver/_task_state.py +80 -69
inspect_ai/solver/_use_tools.py +9 -9
inspect_ai/tool/__init__.py +4 -1
inspect_ai/tool/_tool.py +43 -14
inspect_ai/tool/_tool_call.py +6 -2
inspect_ai/tool/_tool_choice.py +3 -1
inspect_ai/tool/_tool_def.py +10 -8
inspect_ai/tool/_tool_params.py +24 -0
inspect_ai/tool/_tool_with.py +7 -7
inspect_ai/tool/_tools/__init__.py +0 -0
inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/tool/_tools/_execute.py +23 -11
inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
inspect_ai/tool/_tools/_web_search.py +7 -5
inspect_ai/tool/beta.py +3 -0
inspect_ai/util/_concurrency.py +3 -3
inspect_ai/util/_panel.py +2 -0
inspect_ai/util/_resource.py +12 -12
inspect_ai/util/_sandbox/docker/compose.py +23 -20
inspect_ai/util/_sandbox/docker/config.py +2 -1
inspect_ai/util/_sandbox/docker/docker.py +42 -86
inspect_ai/util/_sandbox/docker/service.py +100 -0
inspect_ai/util/_sandbox/environment.py +99 -96
inspect_ai/util/_sandbox/self_check.py +124 -16
inspect_ai/util/_subprocess.py +5 -3
inspect_ai/util/_subtask.py +15 -16
{inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
{inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
inspect_ai-0.3.64.dist-info/RECORD +625 -0
inspect_ai/_view/www/src/Register.mjs +0 -3
inspect_ai/_view/www/src/Types.mjs +0 -38
inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
inspect_ai/_view/www/src/components/Card.mjs +0 -126
inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
inspect_ai/_view/www/src/components/Tools.mjs +0 -376
inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
inspect_ai/_view/www/src/components/ansi-output.js +0 -932
inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
inspect_ai/_view/www/src/utils/Format.mjs +0 -260
inspect_ai/_view/www/src/utils/Git.mjs +0 -12
inspect_ai/_view/www/src/utils/Html.mjs +0 -21
inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
inspect_ai/_view/www/src/utils/http.mjs +0 -18
inspect_ai/_view/www/src/utils/queue.mjs +0 -67
inspect_ai/_view/www/src/utils/sync.mjs +0 -101
inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
inspect_ai/tool/beta/__init__.py +0 -5
inspect_ai-0.3.62.dist-info/RECORD +0 -481
/inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
/inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
/inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
/inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
{inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0

inspect_ai/approval/_approval.py CHANGED Viewed

@@ -17,6 +17,8 @@ Possible values:
 class Approval(BaseModel):
+    """Approval details (decision, explanation, etc.)"""
     decision: ApprovalDecision
     """Approval decision."""

inspect_ai/approval/_approver.py CHANGED Viewed

@@ -20,10 +20,10 @@ class Approver(Protocol):
         Approve or reject a tool call.
         Args:
-            message (str): Message genreated by the model along with the tool call.
-            call (ToolCall): The tool call to be approved.
-            view (ToolCallView): Custom rendering of tool context and call.
-            state (state | None): The current task state, if available.
+            message: Message genreated by the model along with the tool call.
+            call: The tool call to be approved.
+            view: Custom rendering of tool context and call.
+            state: The current task state, if available.
         Returns:
             Approval: An Approval object containing the decision and explanation.

inspect_ai/approval/_auto.py CHANGED Viewed

@@ -11,7 +11,7 @@ def auto_approver(decision: ApprovalDecision = "approve") -> Approver:
     """Automatically apply a decision to tool calls.
     Args:
-       decision (ApprovalDecision): Decision to apply.
+       decision: Decision to apply.
     Returns:
        Approver: Auto approver.

inspect_ai/approval/_human/approver.py CHANGED Viewed

@@ -14,6 +14,9 @@ def human_approver(
 ) -> Approver:
     """Interactive human approver.
+    Args:
+       choices: Choices to present to human.
     Returns:
        Approver: Interactive human approver.
     """

inspect_ai/approval/_policy.py CHANGED Viewed

@@ -20,8 +20,13 @@ from ._call import call_approver, record_approval
 @dataclass
 class ApprovalPolicy:
+    """Policy mapping approvers to tools."""
     approver: Approver
+    """Approver for policy."""
     tools: str | list[str]
+    """Tools to use this approver for (can be full tool names or globs)."""
 def policy_approver(policies: str | list[ApprovalPolicy]) -> Approver:

inspect_ai/approval/_registry.py CHANGED Viewed

@@ -31,11 +31,11 @@ def approver(*args: Any, name: str | None = None, **attribs: Any) -> Any:
     Args:
       *args: Function returning `Approver` targeted by
         plain approver decorator without attributes (e.g. `@approver`)
-      name (str | None):
+      name:
         Optional name for approver. If the decorator has no name
         argument then the name of the function
         will be used to automatically assign a name.
-      **attribs: (dict[str,Any]): Additional approver attributes.
+      **attribs: Additional approver attributes.
     Returns:
         Approver with registry attributes.

inspect_ai/dataset/_dataset.py CHANGED Viewed

@@ -27,6 +27,8 @@ MT = TypeVar("MT", bound=BaseModel)
 class Sample(BaseModel):
+    r"""Sample for an evaluation task."""
     def __init__(
         self,
         input: str | list[ChatMessage],
@@ -38,22 +40,22 @@ class Sample(BaseModel):
         files: dict[str, str] | None = None,
         setup: str | None = None,
     ) -> None:
-        r"""Sample to be used in an evaluation task.
+        r"""Create a Sample.
         Args:
-            input (str | list[ChatMessage]): The input to be submitted to the model.
-            choices (list[str] | None): Optional. List of available answer choices
-            (used only for multiple-choice evals).
-            target (str | list[str]): Optional. Ideal target output. May be a literal value
+            input: The input to be submitted to the model.
+            choices: Optional. List of available answer choices
+                (used only for multiple-choice evals).
+            target: Optional. Ideal target output. May be a literal value
                 or narrative text to be used by a model grader.
-            id (int | str | None): Optional. Unique identifier for sample.
-            metadata (dict[str,Any] | None): Optional. Arbitrary metadata associated with the sample.
-            sandbox (SandboxEnvironmentType | None): Sandbox environment type
-            (or optionally a str or tuple with a shorthand spec)
-            files (dict[str, str] | None): Optional. Files that go along with the sample (copied to
-            SandboxEnvironment). Files can be paths, inline text, or inline binary (base64 encoded data URL).
-            setup (str | None): Optional. Setup script to run for sample (run
-            within default SandboxEnvironment).
+            id: Optional. Unique identifier for sample.
+            metadata: Optional. Arbitrary metadata associated with the sample.
+                sandbox (SandboxEnvironmentType | None): Sandbox environment type (or optionally a str or tuple with a shorthand spec)
+            sandbox: Optional. Sandbox specification for this sample.
+            files: Optional. Files that go along with the sample (copied to
+                SandboxEnvironment). Files can be paths, inline text, or inline binary (base64 encoded data URL).
+            setup: Optional. Setup script to run for sample (run
+                within default SandboxEnvironment).
         """
         super().__init__(
             input=input,
@@ -156,14 +158,6 @@ class Dataset(Sequence[Sample], abc.ABC):
     @abc.abstractmethod
     def __len__(self) -> int: ...
-    @abc.abstractmethod
-    def shuffle(self, seed: int | None = None) -> None:
-        """Shuffle the order of the dataset (in place).
-        Args:
-           seed: (int | None): Random seed for shuffling (optional).
-        """
     @abc.abstractmethod
     def sort(
         self,
@@ -177,8 +171,8 @@ class Dataset(Sequence[Sample], abc.ABC):
         The key function defaults to measuring the length of the sample's input field.
         Args:
-            reverse (bool): if true, sort in descending order. Defaults to False.
-            key (Callable[[Any], Any]): a callable mapping each item to a numeric value (optional, defaults to sample_input_len).
+            reverse: If `Treu`, sort in descending order. Defaults to False.
+            key: a callable mapping each item to a numeric value (optional, defaults to sample_input_len).
         """
     @abc.abstractmethod
@@ -188,28 +182,33 @@ class Dataset(Sequence[Sample], abc.ABC):
         """Filter the dataset using a predicate.
         Args:
-          predicate (Callable[[Sample], bool]): Filtering function.
-          name (str | None): Name for filtered dataset (optional).
+          predicate: Filtering function.
+          name: Name for filtered dataset (optional).
         Returns:
           Filtered dataset.
         """
+    @abc.abstractmethod
+    def shuffle(self, seed: int | None = None) -> None:
+        """Shuffle the order of the dataset (in place).
+        Args:
+           seed: Random seed for shuffling (optional).
+        """
+    @abc.abstractmethod
+    def shuffle_choices(self, seed: int | None = None) -> None:
+        """Shuffle the order of the choices with each sample.
+        Args:
+           seed: Random seed for shuffling (optional).
+        """
 @dataclass
 class FieldSpec:
-    r"""Specification for mapping data source fields to sample fields.
-    Args:
-        input (str): Name of the field containing the sample input.
-        target (str): Name of the field containing the sample target.
-        choices (str): Optional. Name of field containing the list of answer choices.
-        id (str): Optional. Unique identifier for the sample.
-        metadata (list[str] | None): List of additional field names that should be read as metadata.
-        sandbox (str): Optional. Sandbox type along with optional config file
-        files (str): Optional. Files that go along with the sample.
-        setup (str): Optional. Setup script to run for sample .
-    """
+    r"""Specification for mapping data source fields to sample fields."""
     input: str = field(default="input")
     """Name of the field containing the sample input."""
@@ -315,6 +314,34 @@ class MemoryDataset(Dataset):
             random.shuffle(self.samples)
         self._shuffled = True
+    @override
+    def shuffle_choices(self, seed: int | None = None) -> None:
+        rand = random.Random(seed)
+        for sample in self.samples:
+            if not sample.choices:
+                continue
+            # The original positions
+            positions = list(range(len(sample.choices)))
+            # Shuffle the choices
+            rand.shuffle(positions)
+            shuffled_choices = [sample.choices[i] for i in positions]
+            # Map of original position / target letter
+            position_map = {i: chr(65 + new_i) for new_i, i in enumerate(positions)}
+            # Update to the shuffled choices and target
+            sample.choices = shuffled_choices
+            sample.target = self._remap_target(sample.target, position_map=position_map)
+    def _remap_target(
+        self, target: str | list[str], position_map: dict[int, str]
+    ) -> str | list[str]:
+        if isinstance(target, list):
+            return [position_map[ord(t) - 65] for t in target]
+        else:
+            return position_map[ord(target) - 65]
     @override
     def sort(
         self,

inspect_ai/dataset/_sources/__init__.py ADDED Viewed

File without changes

inspect_ai/dataset/_sources/csv.py CHANGED Viewed

@@ -23,6 +23,7 @@ def csv_dataset(
     auto_id: bool = False,
     shuffle: bool = False,
     seed: int | None = None,
+    shuffle_choices: bool | int | None = None,
     limit: int | None = None,
     dialect: str = "unix",
     encoding: str = "utf-8",
@@ -34,29 +35,30 @@ def csv_dataset(
     r"""Read dataset from CSV file.
     Args:
-        csv_file (str): Path to CSV file. Can be a local filesystem path,
+        csv_file: Path to CSV file. Can be a local filesystem path,
             a path to an S3 bucket (e.g. "s3://my-bucket"), or an HTTPS URL.
             Use `fs_options` to pass arguments through to the `S3FileSystem` constructor.
-        sample_fields (FieldSpec | RecordToSample): Method of mapping underlying
+        sample_fields: Method of mapping underlying
             fields in the data source to Sample objects. Pass `None` if the data is already
             stored in `Sample` form (i.e. has "input" and "target" columns.); Pass a
             `FieldSpec` to specify mapping fields by name; Pass a `RecordToSample` to
             handle mapping with a custom function that returns one or more samples.
-        auto_id (bool): Assign an auto-incrementing ID for each sample.
-        shuffle (bool): Randomly shuffle the dataset order.
-        seed: (int | None): Seed used for random shuffle.
-        limit (int | None): Limit the number of records to read.
-        dialect (str): CSV dialect ("unix", "excel" or"excel-tab"). Defaults to "unix". See https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters for more details
-        encoding (str): Text encoding for file (defaults to "utf-8").
-        name (str): Optional name for dataset (for logging). If not specified,
+        auto_id: Assign an auto-incrementing ID for each sample.
+        shuffle: Randomly shuffle the dataset order.
+        seed: Seed used for random shuffle.
+        shuffle_choices: Whether to shuffle the choices. If an int is passed, this will be used as the seed when shuffling.
+        limit: Limit the number of records to read.
+        dialect: CSV dialect ("unix", "excel" or"excel-tab"). Defaults to "unix". See https://docs.python.org/3/library/csv.html#dialects-and-formatting-parameters for more details
+        encoding: Text encoding for file (defaults to "utf-8").
+        name: Optional name for dataset (for logging). If not specified,
             defaults to the stem of the filename
-        fs_options (dict[str, Any]): Optional. Additional arguments to pass through
+        fs_options: Optional. Additional arguments to pass through
             to the filesystem provider (e.g. `S3FileSystem`). Use `{"anon": True }`
             if you are accessing a public S3 bucket with no credentials.
-        fieldnames (list[str] | None): Optional. A list of fieldnames to use for the CSV.
+        fieldnames: Optional. A list of fieldnames to use for the CSV.
             If None, the values in the first row of the file will be used as the fieldnames.
             Useful for files without a header.
-        delimiter (str): Optional. The delimiter to use when parsing the file. Defaults to ",".
+        delimiter: Optional. The delimiter to use when parsing the file. Defaults to ",".
     Returns:
         Dataset read from CSV file.
@@ -86,6 +88,12 @@ def csv_dataset(
         if shuffle:
             dataset.shuffle(seed=seed)
+        # shuffle choices, if requested
+        if isinstance(shuffle_choices, int):
+            dataset.shuffle_choices(seed=shuffle_choices)
+        elif shuffle_choices is True:
+            dataset.shuffle_choices()
         # limit if requested
         if limit:
             return dataset[0:limit]

inspect_ai/dataset/_sources/file.py CHANGED Viewed

@@ -16,6 +16,7 @@ def file_dataset(
     auto_id: bool = False,
     shuffle: bool = False,
     seed: int | None = None,
+    shuffle_choices: bool | int | None = None,
     limit: int | None = None,
     dialect: str = "unix",
     encoding: str = "utf-8",
@@ -40,6 +41,7 @@ def file_dataset(
         auto_id (bool): Assign an auto-incrementing ID for each sample.
         shuffle (bool): Randomly shuffle the dataset order.
         seed: (int | None): Seed used for random shuffle.
+        shuffle_choices: (bool | int | None): Whether to shuffle the choices. If an int is passed, this will be used as the seed when shuffling.
         limit (int | None): Limit the number of records to read.
         dialect (str): CSV dialect ("unix" or "excel", defaults to "unix"). Only
             applies to reading CSV files.
@@ -66,6 +68,7 @@ def file_dataset(
                 auto_id=auto_id,
                 shuffle=shuffle,
                 seed=seed,
+                shuffle_choices=shuffle_choices,
                 limit=limit,
                 encoding=encoding,
                 name=name,
@@ -78,6 +81,7 @@ def file_dataset(
                 auto_id=auto_id,
                 shuffle=shuffle,
                 seed=seed,
+                shuffle_choices=shuffle_choices,
                 limit=limit,
                 dialect=dialect,
                 encoding=encoding,

inspect_ai/dataset/_sources/hf.py CHANGED Viewed

@@ -29,6 +29,7 @@ def hf_dataset(
     auto_id: bool = False,
     shuffle: bool = False,
     seed: int | None = None,
+    shuffle_choices: bool | int | None = None,
     limit: int | None = None,
     trust: bool = False,
     cached: bool = True,
@@ -40,35 +41,36 @@ def hf_dataset(
     `datasets` package, including remote datasets on Hugging Face Hub.
     Args:
-        path (str): Path or name of the dataset. Depending on path, the dataset
-          builder that is used comes from a generic dataset script (JSON, CSV,
-          Parquet, text etc.) or from the dataset script (a python file) inside
-          the dataset directory.
-        split (str): Which split of the data to load.
-        name (str | None): Name of the dataset configuration.
-        data_dir (str | None): data_dir of the dataset configuration
-          to read data from.
-        revision (str | None): Specific revision to load (e.g. "main", a branch
-          name, or a specific commit SHA). When using `revision` the `cached` option
-          is ignored and datasets are revalidated on Hugging Face before loading.
-        sample_fields (FieldSpec | RecordToSample): Method of mapping underlying
-          fields in the data source to Sample objects. Pass `None` if the data is already
-          stored in `Sample` form (i.e. has "input" and "target" columns.); Pass a
-          `FieldSpec` to specify mapping fields by name; Pass a `RecordToSample` to
+      path: Path or name of the dataset. Depending on path, the dataset
+        builder that is used comes from a generic dataset script (JSON, CSV,
+        Parquet, text etc.) or from the dataset script (a python file) inside
+        the dataset directory.
+      split: Which split of the data to load.
+      name: Name of the dataset configuration.
+      data_dir: data_dir of the dataset configuration
+        to read data from.
+      revision: Specific revision to load (e.g. "main", a branch
+        name, or a specific commit SHA). When using `revision` the `cached` option
+        is ignored and datasets are revalidated on Hugging Face before loading.
+      sample_fields: Method of mapping underlying
+        fields in the data source to Sample objects. Pass `None` if the data is already
+        stored in `Sample` form (i.e. has "input" and "target" columns.); Pass a
+        `FieldSpec` to specify mapping fields by name; Pass a `RecordToSample` to
           handle mapping with a custom function that returns one or more samples.
-        auto_id (bool): Assign an auto-incrementing ID for each sample.
-        shuffle (bool): Randomly shuffle the dataset order.
-        seed: (int | None): Seed used for random shuffle.
-        limit (int | None): Limit the number of records to read.
-        trust (bool): Whether or not to allow for datasets defined on the Hub
-          using a dataset script. This option should only be set to True for
-          repositories you trust and in which you have read the code, as it
-          will execute code present on the Hub on your local machine.
-        cached (bool): By default, datasets are read once from HuggingFace
-          Hub and then cached for future reads. Pass `cached=False` to force
-          re-reading the dataset from Hugging Face. Ignored when the `revision`
-          option is specified.
-        **kwargs (dict[str, Any]): Additional arguments to pass through to the
+      auto_id: Assign an auto-incrementing ID for each sample.
+      shuffle: Randomly shuffle the dataset order.
+      seed: Seed used for random shuffle.
+      shuffle_choices: Whether to shuffle the choices. If an int is passed, this will be used as the seed when shuffling.
+      limit: Limit the number of records to read.
+      trust: Whether or not to allow for datasets defined on the Hub
+        using a dataset script. This option should only be set to True for
+        repositories you trust and in which you have read the code, as it
+        will execute code present on the Hub on your local machine.
+      cached: By default, datasets are read once from HuggingFace
+        Hub and then cached for future reads. Pass `cached=False` to force
+        re-reading the dataset from Hugging Face. Ignored when the `revision`
+        option is specified.
+      **kwargs (dict[str, Any]): Additional arguments to pass through to the
           `load_dataset` function of the `datasets` package.
     Returns:
@@ -117,8 +119,16 @@ def hf_dataset(
         dataset = dataset.select(range(limit))
     # return the dataset
-    return MemoryDataset(
+    memory_dataset = MemoryDataset(
         samples=data_to_samples(dataset.to_list(), data_to_sample, auto_id),
         name=Path(path).stem if Path(path).exists() else path,
         location=path,
     )
+    # maybe shuffle the choices
+    if isinstance(shuffle_choices, int):
+        memory_dataset.shuffle_choices(seed=shuffle_choices)
+    elif shuffle_choices is True:
+        memory_dataset.shuffle_choices()
+    return memory_dataset

inspect_ai/dataset/_sources/json.py CHANGED Viewed

@@ -25,6 +25,7 @@ def json_dataset(
     auto_id: bool = False,
     shuffle: bool = False,
     seed: int | None = None,
+    shuffle_choices: bool | int | None = None,
     limit: int | None = None,
     encoding: str = "utf-8",
     name: str | None = None,
@@ -38,22 +39,23 @@ def json_dataset(
     the `sample_fields` argument.
     Args:
-      json_file (str): Path to JSON file. Can be a local filesystem path or
+      json_file: Path to JSON file. Can be a local filesystem path or
         a path to an S3 bucket (e.g. "s3://my-bucket"). Use `fs_options`
         to pass arguments through to the `S3FileSystem` constructor.
-      sample_fields (FieldSpec | RecordToSample): Method of mapping underlying
+      sample_fields: Method of mapping underlying
         fields in the data source to `Sample` objects. Pass `None` if the data is already
         stored in `Sample` form (i.e. object with "input" and "target" fields); Pass a
         `FieldSpec` to specify mapping fields by name; Pass a `RecordToSample` to
         handle mapping with a custom function that returns one or more samples.
-      auto_id (bool): Assign an auto-incrementing ID for each sample.
-      shuffle (bool): Randomly shuffle the dataset order.
-      seed: (int | None): Seed used for random shuffle.
-      limit (int | None): Limit the number of records to read.
-      encoding (str): Text encoding for file (defaults to "utf-8").
-      name (str): Optional name for dataset (for logging). If not specified,
+      auto_id: Assign an auto-incrementing ID for each sample.
+      shuffle: Randomly shuffle the dataset order.
+      seed: Seed used for random shuffle.
+      shuffle_choices: Whether to shuffle the choices. If an int is passed, this will be used as the seed when shuffling.
+      limit: Limit the number of records to read.
+      encoding: Text encoding for file (defaults to "utf-8").
+      name: Optional name for dataset (for logging). If not specified,
         defaults to the stem of the filename.
-      fs_options (dict[str, Any]): Optional. Additional arguments to pass through
+      fs_options: Optional. Additional arguments to pass through
         to the filesystem provider (e.g. `S3FileSystem`). Use `{"anon": True }`
         if you are accessing a public S3 bucket with no credentials.
@@ -86,6 +88,12 @@ def json_dataset(
         if shuffle:
             dataset.shuffle(seed=seed)
+        # shuffle choices, if requested
+        if isinstance(shuffle_choices, int):
+            dataset.shuffle_choices(seed=shuffle_choices)
+        elif shuffle_choices is True:
+            dataset.shuffle_choices()
         # limit if requested
         if limit:
             return dataset[0:limit]

inspect_ai/log/__init__.py CHANGED Viewed

@@ -22,6 +22,7 @@ from ._log import (
     EvalResults,
     EvalRevision,
     EvalSample,
+    EvalSampleLimit,
     EvalSampleReductions,
     EvalSampleScore,
     EvalScore,
@@ -61,6 +62,7 @@ __all__ = [
     "EvalResults",
     "EvalRevision",
     "EvalSample",
+    "EvalSampleLimit",
     "EvalSampleScore",
     "EvalSampleReductions",
     "EvalScore",

inspect_ai/log/_convert.py CHANGED Viewed

@@ -20,12 +20,12 @@ def convert_eval_logs(
     Args:
         path (str): Path to source log file(s). Should be either a single
-          log file or a directory containing log files.
+            log file or a directory containing log files.
         to (Literal["eval", "json"]): Format to convert to. If a file is
-          already in the target format it will just be copied to the output dir.
+            already in the target format it will just be copied to the output dir.
         output_dir (str): Output directory to write converted log file(s) to.
         overwrite (bool): Overwrite existing log files (defaults to `False`,
-          raising an error if the output file path already exists).
+            raising an error if the output file path already exists).
     """
     from inspect_ai._display import display

inspect_ai/log/_file.py CHANGED Viewed

@@ -3,6 +3,7 @@ import re
 from logging import getLogger
 from typing import Any, Callable, Generator, Literal, cast
+from pydantic import BaseModel
 from pydantic_core import to_json
 from inspect_ai._util._async import run_coroutine
@@ -22,7 +23,21 @@ from ._recorders import recorder_type_for_format, recorder_type_for_location
 logger = getLogger(__name__)
-class EvalLogInfo(FileInfo):
+class EvalLogInfo(BaseModel):
+    """File info and task identifiers for eval log."""
+    name: str
+    """Name of file."""
+    type: str
+    """Type of file (file or directory)"""
+    size: int
+    """File size in bytes."""
+    mtime: float | None
+    """File modification time (None if the file is a directory on S3)."""
     task: str
     """Task name."""
@@ -231,7 +246,7 @@ def write_log_dir_manifest(
 def read_eval_log(
-    log_file: str | FileInfo,
+    log_file: str | EvalLogInfo,
     header_only: bool = False,
     resolve_attachments: bool = False,
     format: Literal["eval", "json", "auto"] = "auto",
@@ -241,7 +256,7 @@ def read_eval_log(
     Args:
        log_file (str | FileInfo): Log file to read.
        header_only (bool): Read only the header (i.e. exclude
-         the "samples" and "logging" fields). Defaults to False.
+          the "samples" and "logging" fields). Defaults to False.
        resolve_attachments (bool): Resolve attachments (e.g. images)
           to their full content.
        format (Literal["eval", "json", "auto"]): Read from format
@@ -256,7 +271,7 @@ def read_eval_log(
 async def read_eval_log_async(
-    log_file: str | FileInfo,
+    log_file: str | EvalLogInfo,
     header_only: bool = False,
     resolve_attachments: bool = False,
     format: Literal["eval", "json", "auto"] = "auto",
@@ -304,13 +319,13 @@ async def read_eval_log_async(
 def read_eval_log_headers(
-    log_files: list[str] | list[FileInfo] | list[EvalLogInfo],
+    log_files: list[str] | list[EvalLogInfo],
 ) -> list[EvalLog]:
     return run_coroutine(read_eval_log_headers_async(log_files))
 async def read_eval_log_headers_async(
-    log_files: list[str] | list[FileInfo] | list[EvalLogInfo],
+    log_files: list[str] | list[EvalLogInfo],
 ) -> list[EvalLog]:
     return [
         await read_eval_log_async(log_file, header_only=True) for log_file in log_files
@@ -318,7 +333,7 @@ async def read_eval_log_headers_async(
 def read_eval_log_sample(
-    log_file: str | FileInfo,
+    log_file: str | EvalLogInfo,
     id: int | str,
     epoch: int = 1,
     resolve_attachments: bool = False,
@@ -347,7 +362,7 @@ def read_eval_log_sample(
 async def read_eval_log_sample_async(
-    log_file: str | FileInfo,
+    log_file: str | EvalLogInfo,
     id: int | str,
     epoch: int = 1,
     resolve_attachments: bool = False,
@@ -386,7 +401,7 @@ async def read_eval_log_sample_async(
 def read_eval_log_samples(
-    log_file: str | FileInfo,
+    log_file: str | EvalLogInfo,
     all_samples_required: bool = True,
     resolve_attachments: bool = False,
     format: Literal["eval", "json", "auto"] = "auto",

inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl

inspect-ai 0.3.62py3-none-any.whl → 0.3.64py3-none-any.whl