inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/cache.py +8 -7
- inspect_ai/_cli/common.py +0 -12
- inspect_ai/_cli/eval.py +32 -4
- inspect_ai/_cli/info.py +1 -0
- inspect_ai/_cli/list.py +1 -1
- inspect_ai/_cli/log.py +2 -0
- inspect_ai/_cli/main.py +1 -1
- inspect_ai/_cli/sandbox.py +4 -1
- inspect_ai/_cli/score.py +181 -32
- inspect_ai/_cli/trace.py +10 -0
- inspect_ai/_cli/view.py +4 -2
- inspect_ai/_display/core/active.py +2 -3
- inspect_ai/_display/core/config.py +7 -1
- inspect_ai/_display/textual/widgets/samples.py +4 -3
- inspect_ai/_display/textual/widgets/sandbox.py +6 -0
- inspect_ai/_eval/eval.py +104 -101
- inspect_ai/_eval/evalset.py +75 -75
- inspect_ai/_eval/loader.py +122 -12
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +14 -0
- inspect_ai/_eval/score.py +125 -36
- inspect_ai/_eval/task/log.py +105 -4
- inspect_ai/_eval/task/results.py +92 -38
- inspect_ai/_eval/task/run.py +9 -2
- inspect_ai/_eval/task/sandbox.py +35 -2
- inspect_ai/_eval/task/task.py +49 -46
- inspect_ai/_util/constants.py +1 -1
- inspect_ai/_util/content.py +8 -0
- inspect_ai/_util/error.py +2 -0
- inspect_ai/_util/file.py +15 -1
- inspect_ai/_util/hash.py +1 -1
- inspect_ai/_util/logger.py +4 -2
- inspect_ai/_util/registry.py +7 -1
- inspect_ai/_view/view.py +1 -2
- inspect_ai/_view/www/.vscode/extensions.json +3 -0
- inspect_ai/_view/www/.vscode/settings.json +8 -0
- inspect_ai/_view/www/App.css +97 -29
- inspect_ai/_view/www/README.md +1 -1
- inspect_ai/_view/www/dist/assets/index.css +16663 -14674
- inspect_ai/_view/www/dist/assets/index.js +58808 -51348
- inspect_ai/_view/www/dist/index.html +1 -1
- inspect_ai/_view/www/index.html +2 -2
- inspect_ai/_view/www/log-schema.json +87 -73
- inspect_ai/_view/www/package.json +22 -4
- inspect_ai/_view/www/postcss.config.cjs +8 -9
- inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
- inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
- inspect_ai/_view/www/src/api/api-browser.ts +2 -2
- inspect_ai/_view/www/src/api/api-http.ts +3 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
- inspect_ai/_view/www/src/api/client-api.ts +4 -4
- inspect_ai/_view/www/src/api/index.ts +4 -4
- inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
- inspect_ai/_view/www/src/appearance/colors.ts +9 -0
- inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
- inspect_ai/_view/www/src/appearance/icons.ts +100 -0
- inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
- inspect_ai/_view/www/src/components/Card.css +60 -0
- inspect_ai/_view/www/src/components/Card.tsx +109 -0
- inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
- inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
- inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
- inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
- inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
- inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
- inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
- inspect_ai/_view/www/src/components/FindBand.css +49 -0
- inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
- inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
- inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
- inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
- inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
- inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
- inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
- inspect_ai/_view/www/src/components/MessageBand.css +43 -0
- inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
- inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
- inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
- inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
- inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
- inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
- inspect_ai/_view/www/src/components/ToolButton.css +3 -0
- inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
- inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
- inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
- inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
- inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
- inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
- inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
- inspect_ai/_view/www/src/metadata/types.ts +18 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
- inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
- inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
- inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
- inspect_ai/_view/www/src/samples/error/error.ts +15 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
- inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
- inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
- inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
- inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
- inspect_ai/_view/www/src/types/log.d.ts +108 -19
- inspect_ai/_view/www/src/types/prism.d.ts +11 -0
- inspect_ai/_view/www/src/types.ts +71 -0
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
- inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
- inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
- inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
- inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
- inspect_ai/_view/www/src/utils/attachments.ts +42 -0
- inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
- inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
- inspect_ai/_view/www/src/utils/debugging.ts +28 -0
- inspect_ai/_view/www/src/utils/dom.ts +30 -0
- inspect_ai/_view/www/src/utils/format.ts +194 -0
- inspect_ai/_view/www/src/utils/git.ts +7 -0
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/_view/www/src/utils/http.ts +14 -0
- inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
- inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
- inspect_ai/_view/www/src/utils/queue.ts +51 -0
- inspect_ai/_view/www/src/utils/sync.ts +114 -0
- inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
- inspect_ai/_view/www/src/utils/vscode.ts +13 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
- inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
- inspect_ai/_view/www/src/workspace/types.ts +10 -0
- inspect_ai/_view/www/src/workspace/utils.ts +34 -0
- inspect_ai/_view/www/tsconfig.json +23 -9
- inspect_ai/_view/www/vite.config.js +8 -17
- inspect_ai/_view/www/yarn.lock +627 -556
- inspect_ai/approval/_approval.py +2 -0
- inspect_ai/approval/_approver.py +4 -4
- inspect_ai/approval/_auto.py +1 -1
- inspect_ai/approval/_human/approver.py +3 -0
- inspect_ai/approval/_policy.py +5 -0
- inspect_ai/approval/_registry.py +2 -2
- inspect_ai/dataset/_dataset.py +64 -37
- inspect_ai/dataset/_sources/__init__.py +0 -0
- inspect_ai/dataset/_sources/csv.py +20 -12
- inspect_ai/dataset/_sources/file.py +4 -0
- inspect_ai/dataset/_sources/hf.py +39 -29
- inspect_ai/dataset/_sources/json.py +17 -9
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_convert.py +3 -3
- inspect_ai/log/_file.py +24 -9
- inspect_ai/log/_log.py +101 -13
- inspect_ai/log/_message.py +4 -2
- inspect_ai/log/_recorders/file.py +4 -0
- inspect_ai/log/_recorders/json.py +5 -7
- inspect_ai/log/_recorders/recorder.py +3 -0
- inspect_ai/log/_transcript.py +19 -8
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_cache.py +39 -21
- inspect_ai/model/_call_tools.py +4 -3
- inspect_ai/model/_chat_message.py +14 -4
- inspect_ai/model/_generate_config.py +1 -1
- inspect_ai/model/_model.py +31 -24
- inspect_ai/model/_model_output.py +14 -1
- inspect_ai/model/_openai.py +10 -18
- inspect_ai/model/_providers/anthropic.py +3 -3
- inspect_ai/model/_providers/google.py +9 -5
- inspect_ai/model/_providers/openai.py +5 -9
- inspect_ai/model/_providers/openai_o1.py +3 -5
- inspect_ai/model/_providers/openrouter.py +86 -0
- inspect_ai/model/_providers/providers.py +11 -0
- inspect_ai/scorer/__init__.py +6 -1
- inspect_ai/scorer/_answer.py +7 -7
- inspect_ai/scorer/_classification.py +38 -18
- inspect_ai/scorer/_common.py +2 -8
- inspect_ai/scorer/_match.py +4 -5
- inspect_ai/scorer/_metric.py +87 -28
- inspect_ai/scorer/_metrics/__init__.py +3 -3
- inspect_ai/scorer/_metrics/accuracy.py +8 -10
- inspect_ai/scorer/_metrics/mean.py +3 -17
- inspect_ai/scorer/_metrics/std.py +111 -30
- inspect_ai/scorer/_model.py +12 -12
- inspect_ai/scorer/_pattern.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +36 -21
- inspect_ai/scorer/_reducer/registry.py +2 -2
- inspect_ai/scorer/_reducer/types.py +7 -1
- inspect_ai/scorer/_score.py +11 -1
- inspect_ai/scorer/_scorer.py +110 -16
- inspect_ai/solver/__init__.py +1 -1
- inspect_ai/solver/_basic_agent.py +19 -22
- inspect_ai/solver/_bridge/__init__.py +0 -3
- inspect_ai/solver/_bridge/bridge.py +3 -3
- inspect_ai/solver/_chain.py +1 -2
- inspect_ai/solver/_critique.py +3 -3
- inspect_ai/solver/_fork.py +2 -2
- inspect_ai/solver/_human_agent/__init__.py +0 -0
- inspect_ai/solver/_human_agent/agent.py +5 -8
- inspect_ai/solver/_human_agent/commands/clock.py +14 -10
- inspect_ai/solver/_human_agent/commands/note.py +1 -1
- inspect_ai/solver/_human_agent/commands/score.py +0 -11
- inspect_ai/solver/_multiple_choice.py +38 -26
- inspect_ai/solver/_prompt.py +7 -7
- inspect_ai/solver/_solver.py +53 -52
- inspect_ai/solver/_task_state.py +80 -69
- inspect_ai/solver/_use_tools.py +9 -9
- inspect_ai/tool/__init__.py +4 -1
- inspect_ai/tool/_tool.py +43 -14
- inspect_ai/tool/_tool_call.py +6 -2
- inspect_ai/tool/_tool_choice.py +3 -1
- inspect_ai/tool/_tool_def.py +10 -8
- inspect_ai/tool/_tool_params.py +24 -0
- inspect_ai/tool/_tool_with.py +7 -7
- inspect_ai/tool/_tools/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
- inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
- inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_execute.py +23 -11
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
- inspect_ai/tool/_tools/_web_search.py +7 -5
- inspect_ai/tool/beta.py +3 -0
- inspect_ai/util/_concurrency.py +3 -3
- inspect_ai/util/_panel.py +2 -0
- inspect_ai/util/_resource.py +12 -12
- inspect_ai/util/_sandbox/docker/compose.py +23 -20
- inspect_ai/util/_sandbox/docker/config.py +2 -1
- inspect_ai/util/_sandbox/docker/docker.py +42 -86
- inspect_ai/util/_sandbox/docker/service.py +100 -0
- inspect_ai/util/_sandbox/environment.py +99 -96
- inspect_ai/util/_sandbox/self_check.py +124 -16
- inspect_ai/util/_subprocess.py +5 -3
- inspect_ai/util/_subtask.py +15 -16
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
- inspect_ai-0.3.64.dist-info/RECORD +625 -0
- inspect_ai/_view/www/src/Register.mjs +0 -3
- inspect_ai/_view/www/src/Types.mjs +0 -38
- inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
- inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
- inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
- inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
- inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
- inspect_ai/_view/www/src/components/Card.mjs +0 -126
- inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
- inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
- inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
- inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
- inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
- inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
- inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
- inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
- inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
- inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
- inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
- inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
- inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
- inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
- inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
- inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
- inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
- inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
- inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
- inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
- inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
- inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
- inspect_ai/_view/www/src/components/Tools.mjs +0 -376
- inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
- inspect_ai/_view/www/src/components/ansi-output.js +0 -932
- inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
- inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
- inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
- inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
- inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
- inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
- inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
- inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
- inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
- inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
- inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
- inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
- inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
- inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
- inspect_ai/_view/www/src/utils/Format.mjs +0 -260
- inspect_ai/_view/www/src/utils/Git.mjs +0 -12
- inspect_ai/_view/www/src/utils/Html.mjs +0 -21
- inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
- inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
- inspect_ai/_view/www/src/utils/http.mjs +0 -18
- inspect_ai/_view/www/src/utils/queue.mjs +0 -67
- inspect_ai/_view/www/src/utils/sync.mjs +0 -101
- inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
- inspect_ai/tool/beta/__init__.py +0 -5
- inspect_ai-0.3.62.dist-info/RECORD +0 -481
- /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
- /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
- /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -65,91 +65,6 @@ class SandboxEnvironment(abc.ABC):
|
|
65
65
|
filesystem context to copy samples files into and resolve relative paths to.
|
66
66
|
"""
|
67
67
|
|
68
|
-
@classmethod
|
69
|
-
def config_files(cls) -> list[str]:
|
70
|
-
"""Standard config files for this provider (used for automatic discovery)"""
|
71
|
-
return []
|
72
|
-
|
73
|
-
@classmethod
|
74
|
-
def default_concurrency(cls) -> int | None:
|
75
|
-
"""Default max_sandboxes for this provider (`None` means no maximum)"""
|
76
|
-
return None
|
77
|
-
|
78
|
-
@classmethod
|
79
|
-
async def task_init(
|
80
|
-
cls, task_name: str, config: SandboxEnvironmentConfigType | None
|
81
|
-
) -> None:
|
82
|
-
"""Called at task startup initialize resources.
|
83
|
-
|
84
|
-
Args:
|
85
|
-
task_name (str): Name of task using the sandbox environment.
|
86
|
-
config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
|
87
|
-
"""
|
88
|
-
pass
|
89
|
-
|
90
|
-
@classmethod
|
91
|
-
async def sample_init(
|
92
|
-
cls,
|
93
|
-
task_name: str,
|
94
|
-
config: SandboxEnvironmentConfigType | None,
|
95
|
-
metadata: dict[str, str],
|
96
|
-
) -> dict[str, "SandboxEnvironment"]:
|
97
|
-
"""Initialize sandbox environments for a sample.
|
98
|
-
|
99
|
-
Args:
|
100
|
-
task_name (str): Name of task using the sandbox environment.
|
101
|
-
config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
|
102
|
-
metadata (dict[str,str]): Sample `metadata` field
|
103
|
-
|
104
|
-
Returns:
|
105
|
-
Dictionary of named sandbox environments. The environment which represents
|
106
|
-
the default environment (resolved by `sandbox("default")` or `sandbox()`) must
|
107
|
-
be the first key/value pair in the dictionary.
|
108
|
-
"""
|
109
|
-
return {}
|
110
|
-
|
111
|
-
@classmethod
|
112
|
-
@abc.abstractmethod
|
113
|
-
async def sample_cleanup(
|
114
|
-
cls,
|
115
|
-
task_name: str,
|
116
|
-
config: SandboxEnvironmentConfigType | None,
|
117
|
-
environments: dict[str, "SandboxEnvironment"],
|
118
|
-
interrupted: bool,
|
119
|
-
) -> None:
|
120
|
-
"""Cleanup sandbox environments.
|
121
|
-
|
122
|
-
Args:
|
123
|
-
task_name (str): Name of task using the sandbox environment.
|
124
|
-
config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
|
125
|
-
environments (dict[str,SandboxEnvironment]): Sandbox environments created for this sample.
|
126
|
-
interrupted (bool): Was the task interrupted by an error or cancellation
|
127
|
-
"""
|
128
|
-
...
|
129
|
-
|
130
|
-
@classmethod
|
131
|
-
async def task_cleanup(
|
132
|
-
cls, task_name: str, config: SandboxEnvironmentConfigType | None, cleanup: bool
|
133
|
-
) -> None:
|
134
|
-
"""Called at task exit as a last chance to cleanup resources.
|
135
|
-
|
136
|
-
Args:
|
137
|
-
task_name (str): Name of task using the sandbox environment.
|
138
|
-
config (SandboxEnvironmentConfigType): Implementation defined configuration (optional).
|
139
|
-
cleanup (bool): Whether to actually cleanup environment resources
|
140
|
-
(False if `--no-sandbox-cleanup` was specified)
|
141
|
-
"""
|
142
|
-
pass
|
143
|
-
|
144
|
-
@classmethod
|
145
|
-
async def cli_cleanup(cls, id: str | None) -> None:
|
146
|
-
"""Handle a cleanup invoked from the CLI (e.g. inspect sandbox cleanup).
|
147
|
-
|
148
|
-
Args:
|
149
|
-
id (str | None): Optional ID to limit scope of cleanup.
|
150
|
-
"""
|
151
|
-
pass
|
152
|
-
|
153
68
|
@abc.abstractmethod
|
154
69
|
async def exec(
|
155
70
|
self,
|
@@ -170,13 +85,13 @@ class SandboxEnvironment(abc.ABC):
|
|
170
85
|
`OutputLimitExceededError` will be raised.
|
171
86
|
|
172
87
|
Args:
|
173
|
-
cmd
|
174
|
-
input
|
175
|
-
cwd
|
176
|
-
env
|
177
|
-
user
|
178
|
-
timeout
|
179
|
-
timeout_retry
|
88
|
+
cmd: Command or command and arguments to execute.
|
89
|
+
input: Standard input (optional).
|
90
|
+
cwd: Current working dir (optional). If relative, will be relative to the per-sample filesystem context.
|
91
|
+
env: Environment variables for execution.
|
92
|
+
user: Optional username or UID to run the command as.
|
93
|
+
timeout: Optional execution timeout (seconds).
|
94
|
+
timeout_retry: Retry the command in the case that it times out.
|
180
95
|
Commands will be retried up to twice, with a timeout of no greater
|
181
96
|
than 60 seconds for the first retry and 30 for the second.
|
182
97
|
|
@@ -204,9 +119,9 @@ class SandboxEnvironment(abc.ABC):
|
|
204
119
|
should be automatically created.
|
205
120
|
|
206
121
|
Args:
|
207
|
-
file
|
122
|
+
file: Path to file (relative file paths will resolve to the
|
208
123
|
per-sample working directory).
|
209
|
-
contents
|
124
|
+
contents: Text or binary file contents.
|
210
125
|
|
211
126
|
Raises:
|
212
127
|
PermissionError: If the current user does not have permission to
|
@@ -233,9 +148,9 @@ class SandboxEnvironment(abc.ABC):
|
|
233
148
|
to specifying `newline=""` in a call to the Python `open()` function.
|
234
149
|
|
235
150
|
Args:
|
236
|
-
file
|
151
|
+
file: Path to file (relative file paths will resolve to the
|
237
152
|
per-sample working directory).
|
238
|
-
text
|
153
|
+
text: Read as a utf-8 encoded text file.
|
239
154
|
|
240
155
|
Returns:
|
241
156
|
Contents of file (as str or bytes for binary files)
|
@@ -265,6 +180,91 @@ class SandboxEnvironment(abc.ABC):
|
|
265
180
|
"""
|
266
181
|
raise NotImplementedError("connection not implemented")
|
267
182
|
|
183
|
+
@classmethod
|
184
|
+
def config_files(cls) -> list[str]:
|
185
|
+
"""Standard config files for this provider (used for automatic discovery)"""
|
186
|
+
return []
|
187
|
+
|
188
|
+
@classmethod
|
189
|
+
def default_concurrency(cls) -> int | None:
|
190
|
+
"""Default max_sandboxes for this provider (`None` means no maximum)"""
|
191
|
+
return None
|
192
|
+
|
193
|
+
@classmethod
|
194
|
+
async def task_init(
|
195
|
+
cls, task_name: str, config: SandboxEnvironmentConfigType | None
|
196
|
+
) -> None:
|
197
|
+
"""Called at task startup initialize resources.
|
198
|
+
|
199
|
+
Args:
|
200
|
+
task_name: Name of task using the sandbox environment.
|
201
|
+
config: Implementation defined configuration (optional).
|
202
|
+
"""
|
203
|
+
pass
|
204
|
+
|
205
|
+
@classmethod
|
206
|
+
async def sample_init(
|
207
|
+
cls,
|
208
|
+
task_name: str,
|
209
|
+
config: SandboxEnvironmentConfigType | None,
|
210
|
+
metadata: dict[str, str],
|
211
|
+
) -> dict[str, "SandboxEnvironment"]:
|
212
|
+
"""Initialize sandbox environments for a sample.
|
213
|
+
|
214
|
+
Args:
|
215
|
+
task_name: Name of task using the sandbox environment.
|
216
|
+
config: Implementation defined configuration (optional).
|
217
|
+
metadata: Sample `metadata` field
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
Dictionary of named sandbox environments. The environment which represents
|
221
|
+
the default environment (resolved by `sandbox("default")` or `sandbox()`) must
|
222
|
+
be the first key/value pair in the dictionary.
|
223
|
+
"""
|
224
|
+
return {}
|
225
|
+
|
226
|
+
@classmethod
|
227
|
+
@abc.abstractmethod
|
228
|
+
async def sample_cleanup(
|
229
|
+
cls,
|
230
|
+
task_name: str,
|
231
|
+
config: SandboxEnvironmentConfigType | None,
|
232
|
+
environments: dict[str, "SandboxEnvironment"],
|
233
|
+
interrupted: bool,
|
234
|
+
) -> None:
|
235
|
+
"""Cleanup sandbox environments.
|
236
|
+
|
237
|
+
Args:
|
238
|
+
task_name: Name of task using the sandbox environment.
|
239
|
+
config: Implementation defined configuration (optional).
|
240
|
+
environments: Sandbox environments created for this sample.
|
241
|
+
interrupted: Was the task interrupted by an error or cancellation
|
242
|
+
"""
|
243
|
+
...
|
244
|
+
|
245
|
+
@classmethod
|
246
|
+
async def task_cleanup(
|
247
|
+
cls, task_name: str, config: SandboxEnvironmentConfigType | None, cleanup: bool
|
248
|
+
) -> None:
|
249
|
+
"""Called at task exit as a last chance to cleanup resources.
|
250
|
+
|
251
|
+
Args:
|
252
|
+
task_name: Name of task using the sandbox environment.
|
253
|
+
config: Implementation defined configuration (optional).
|
254
|
+
cleanup: Whether to actually cleanup environment resources
|
255
|
+
(False if `--no-sandbox-cleanup` was specified)
|
256
|
+
"""
|
257
|
+
pass
|
258
|
+
|
259
|
+
@classmethod
|
260
|
+
async def cli_cleanup(cls, id: str | None) -> None:
|
261
|
+
"""Handle a cleanup invoked from the CLI (e.g. inspect sandbox cleanup).
|
262
|
+
|
263
|
+
Args:
|
264
|
+
id: Optional ID to limit scope of cleanup.
|
265
|
+
"""
|
266
|
+
pass
|
267
|
+
|
268
268
|
|
269
269
|
@dataclass
|
270
270
|
class SandboxEnvironments:
|
@@ -284,7 +284,10 @@ class SandboxEnvironmentSpec(NamedTuple):
|
|
284
284
|
"""Specification of a SandboxEnvironment."""
|
285
285
|
|
286
286
|
type: str
|
287
|
+
"""Sandbox type (e.g. 'local', 'docker')"""
|
288
|
+
|
287
289
|
config: SandboxEnvironmentConfigType | None = None
|
290
|
+
"""Sandbox configuration (filename or config object)."""
|
288
291
|
|
289
292
|
|
290
293
|
SandboxEnvironmentConfigType = BaseModel | str
|
@@ -32,6 +32,7 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
|
|
32
32
|
for fn in [
|
33
33
|
test_read_and_write_file_text,
|
34
34
|
test_read_and_write_file_binary,
|
35
|
+
test_read_and_write_large_file_binary,
|
35
36
|
test_write_file_text_utf,
|
36
37
|
test_read_and_write_file_including_directory_absolute,
|
37
38
|
test_read_and_write_file_including_directory_relative,
|
@@ -41,12 +42,19 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
|
|
41
42
|
test_read_file_is_directory,
|
42
43
|
test_read_file_nonsense_name,
|
43
44
|
test_read_file_limit,
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
test_write_text_file_zero_length,
|
46
|
+
test_write_text_file_space,
|
47
|
+
test_write_text_file_is_directory,
|
48
|
+
test_write_text_file_without_permissions,
|
49
|
+
test_write_text_file_exists,
|
50
|
+
test_write_binary_file_zero_length,
|
51
|
+
test_write_binary_file_space,
|
52
|
+
test_write_binary_file_is_directory,
|
53
|
+
test_write_binary_file_without_permissions,
|
54
|
+
test_write_binary_file_exists,
|
49
55
|
test_exec_output,
|
56
|
+
test_exec_stderr,
|
57
|
+
test_exec_returncode,
|
50
58
|
test_exec_timeout,
|
51
59
|
test_exec_permission_error,
|
52
60
|
test_exec_as_user,
|
@@ -100,6 +108,17 @@ async def test_read_and_write_file_binary(sandbox_env: SandboxEnvironment) -> No
|
|
100
108
|
await _cleanup_file(sandbox_env, file_name)
|
101
109
|
|
102
110
|
|
111
|
+
async def test_read_and_write_large_file_binary(
|
112
|
+
sandbox_env: SandboxEnvironment,
|
113
|
+
) -> None:
|
114
|
+
file_name = "test_read_and_write_large_file_binary.file"
|
115
|
+
long_bytes = b"\xc3" * 5_000_000
|
116
|
+
await sandbox_env.write_file(file_name, long_bytes)
|
117
|
+
written_file_bytes = await sandbox_env.read_file(file_name, text=False)
|
118
|
+
assert long_bytes == written_file_bytes
|
119
|
+
await _cleanup_file(sandbox_env, file_name)
|
120
|
+
|
121
|
+
|
103
122
|
async def test_read_and_write_file_including_directory_absolute(
|
104
123
|
sandbox_env: SandboxEnvironment,
|
105
124
|
) -> None:
|
@@ -176,7 +195,7 @@ async def test_read_file_limit(sandbox_env: SandboxEnvironment) -> None:
|
|
176
195
|
await _cleanup_file(sandbox_env, file_name)
|
177
196
|
|
178
197
|
|
179
|
-
async def
|
198
|
+
async def test_write_text_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
|
180
199
|
file_name = "zero_length_file.file"
|
181
200
|
await sandbox_env.write_file(file_name, "")
|
182
201
|
zero_length = await sandbox_env.read_file(file_name, text=True)
|
@@ -185,7 +204,7 @@ async def test_write_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
|
|
185
204
|
await _cleanup_file(sandbox_env, file_name)
|
186
205
|
|
187
206
|
|
188
|
-
async def
|
207
|
+
async def test_write_text_file_space(sandbox_env: SandboxEnvironment) -> None:
|
189
208
|
space = "to the moon"
|
190
209
|
file_name = "file with space.file"
|
191
210
|
await sandbox_env.write_file(file_name, space)
|
@@ -195,28 +214,28 @@ async def test_write_file_space(sandbox_env: SandboxEnvironment) -> None:
|
|
195
214
|
await _cleanup_file(sandbox_env, file_name)
|
196
215
|
|
197
216
|
|
198
|
-
async def
|
217
|
+
async def test_write_text_file_is_directory(
|
199
218
|
sandbox_env: SandboxEnvironment,
|
200
219
|
) -> None:
|
201
220
|
# ensure /tmp/directory exists
|
202
221
|
await sandbox_env.write_file(
|
203
|
-
"/tmp/
|
222
|
+
"/tmp/inspect_ai_test_write_text_file_is_directory/file", "unused content"
|
204
223
|
)
|
205
224
|
with Raises(IsADirectoryError) as e_info:
|
206
225
|
await sandbox_env.write_file(
|
207
|
-
"/tmp/
|
226
|
+
"/tmp/inspect_ai_test_write_text_file_is_directory",
|
208
227
|
"content cannot go in a directory, dummy",
|
209
228
|
)
|
210
229
|
assert "directory" in str(e_info.value)
|
211
230
|
await sandbox_env.exec(
|
212
|
-
["rm", "-rf", "/tmp/
|
231
|
+
["rm", "-rf", "/tmp/inspect_ai_test_write_text_file_is_directory"]
|
213
232
|
)
|
214
233
|
|
215
234
|
|
216
|
-
async def
|
235
|
+
async def test_write_text_file_without_permissions(
|
217
236
|
sandbox_env: SandboxEnvironment,
|
218
237
|
) -> None:
|
219
|
-
file_name = "
|
238
|
+
file_name = "test_write_text_file_without_permissions.file"
|
220
239
|
await sandbox_env.write_file(file_name, "impervious #content")
|
221
240
|
await sandbox_env.exec(["chmod", "-w", file_name])
|
222
241
|
with Raises(PermissionError) as e_info:
|
@@ -226,7 +245,7 @@ async def test_write_file_without_permissions(
|
|
226
245
|
await _cleanup_file(sandbox_env, file_name)
|
227
246
|
|
228
247
|
|
229
|
-
async def
|
248
|
+
async def test_write_text_file_exists(
|
230
249
|
sandbox_env: SandboxEnvironment,
|
231
250
|
) -> None:
|
232
251
|
file_name = "file_exists.file"
|
@@ -237,6 +256,67 @@ async def test_write_file_exists(
|
|
237
256
|
await _cleanup_file(sandbox_env, file_name)
|
238
257
|
|
239
258
|
|
259
|
+
async def test_write_binary_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
|
260
|
+
file_name = "zero_length_file.file"
|
261
|
+
await sandbox_env.write_file(file_name, b"")
|
262
|
+
zero_length = await sandbox_env.read_file(file_name, text=False)
|
263
|
+
assert isinstance(zero_length, bytes)
|
264
|
+
assert zero_length == b""
|
265
|
+
await _cleanup_file(sandbox_env, file_name)
|
266
|
+
|
267
|
+
|
268
|
+
async def test_write_binary_file_space(sandbox_env: SandboxEnvironment) -> None:
|
269
|
+
binary_content = b"\xc3\x28"
|
270
|
+
file_name = "file with space.file"
|
271
|
+
await sandbox_env.write_file(file_name, binary_content)
|
272
|
+
file_with_space = await sandbox_env.read_file(file_name, text=False)
|
273
|
+
assert isinstance(file_with_space, bytes)
|
274
|
+
assert file_with_space == binary_content
|
275
|
+
await _cleanup_file(sandbox_env, file_name)
|
276
|
+
|
277
|
+
|
278
|
+
async def test_write_binary_file_is_directory(
|
279
|
+
sandbox_env: SandboxEnvironment,
|
280
|
+
) -> None:
|
281
|
+
# ensure /tmp/directory exists
|
282
|
+
await sandbox_env.write_file(
|
283
|
+
"/tmp/inspect_ai_test_write_binary_file_is_directory/file", "unused content"
|
284
|
+
)
|
285
|
+
with Raises(IsADirectoryError) as e_info:
|
286
|
+
await sandbox_env.write_file(
|
287
|
+
"/tmp/inspect_ai_test_write_binary_file_is_directory",
|
288
|
+
b"\xc3\x28",
|
289
|
+
)
|
290
|
+
assert "directory" in str(e_info.value)
|
291
|
+
await sandbox_env.exec(
|
292
|
+
["rm", "-rf", "/tmp/inspect_ai_test_write_binary_file_is_directory"]
|
293
|
+
)
|
294
|
+
|
295
|
+
|
296
|
+
async def test_write_binary_file_without_permissions(
|
297
|
+
sandbox_env: SandboxEnvironment,
|
298
|
+
) -> None:
|
299
|
+
file_name = "test_write_binary_file_without_permissions.file"
|
300
|
+
await sandbox_env.write_file(file_name, "impervious #content")
|
301
|
+
await sandbox_env.exec(["chmod", "-w", file_name])
|
302
|
+
with Raises(PermissionError) as e_info:
|
303
|
+
await sandbox_env.write_file(file_name, b"\xc3\x28")
|
304
|
+
assert file_name in str(e_info.value)
|
305
|
+
await sandbox_env.exec(["chmod", "+w", file_name])
|
306
|
+
await _cleanup_file(sandbox_env, file_name)
|
307
|
+
|
308
|
+
|
309
|
+
async def test_write_binary_file_exists(
|
310
|
+
sandbox_env: SandboxEnvironment,
|
311
|
+
) -> None:
|
312
|
+
file_name = "file_exists.file"
|
313
|
+
await sandbox_env.write_file(file_name, b"\xc3\x28")
|
314
|
+
await sandbox_env.write_file(file_name, b"\xc3\x29")
|
315
|
+
altered_content = await sandbox_env.read_file(file_name, text=False)
|
316
|
+
assert altered_content == b"\xc3\x29"
|
317
|
+
await _cleanup_file(sandbox_env, file_name)
|
318
|
+
|
319
|
+
|
240
320
|
async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
|
241
321
|
exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; echo bar"])
|
242
322
|
expected = "foo\nbar\n"
|
@@ -246,9 +326,19 @@ async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
|
|
246
326
|
)
|
247
327
|
|
248
328
|
|
329
|
+
async def test_exec_stderr(sandbox_env: SandboxEnvironment) -> None:
|
330
|
+
exec_result = await sandbox_env.exec(["sh", "-c", "echo boof; echo baz >&2"])
|
331
|
+
assert exec_result.stderr == "baz\n"
|
332
|
+
|
333
|
+
|
334
|
+
async def test_exec_returncode(sandbox_env: SandboxEnvironment) -> None:
|
335
|
+
exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; exit 70"])
|
336
|
+
assert exec_result.returncode == 70
|
337
|
+
|
338
|
+
|
249
339
|
async def test_exec_timeout(sandbox_env: SandboxEnvironment) -> None:
|
250
340
|
with Raises(TimeoutError):
|
251
|
-
await sandbox_env.exec(["sleep", "
|
341
|
+
await sandbox_env.exec(["sleep", "4"], timeout=2)
|
252
342
|
|
253
343
|
|
254
344
|
async def test_exec_permission_error(sandbox_env: SandboxEnvironment) -> None:
|
@@ -259,10 +349,28 @@ async def test_exec_permission_error(sandbox_env: SandboxEnvironment) -> None:
|
|
259
349
|
|
260
350
|
async def test_exec_as_user(sandbox_env: SandboxEnvironment) -> None:
|
261
351
|
username = "inspect-ai-test-exec-as-user"
|
352
|
+
|
353
|
+
# Neither adduser nor useradd are part of POSIX, so we need some brittle logic here
|
354
|
+
adduser_help_exec_result = await sandbox_env.exec(["adduser", "--help"])
|
355
|
+
adduser_help_text = (
|
356
|
+
adduser_help_exec_result.stdout + adduser_help_exec_result.stderr
|
357
|
+
)
|
358
|
+
|
359
|
+
if "BusyBox" in adduser_help_text:
|
360
|
+
adduser_command = ["adduser", "-D", username]
|
361
|
+
else:
|
362
|
+
adduser_command = [
|
363
|
+
"adduser",
|
364
|
+
"--comment",
|
365
|
+
"self_check.py",
|
366
|
+
"--disabled-password",
|
367
|
+
username,
|
368
|
+
]
|
369
|
+
|
262
370
|
try:
|
263
371
|
# Create a new user
|
264
372
|
add_user_result = await sandbox_env.exec(
|
265
|
-
|
373
|
+
adduser_command,
|
266
374
|
user="root",
|
267
375
|
timeout=10, # in one case adduser decided to ask for input which caused the test to hang indefinitely
|
268
376
|
)
|
inspect_ai/util/_subprocess.py
CHANGED
@@ -20,6 +20,8 @@ T = TypeVar("T", str, bytes)
|
|
20
20
|
|
21
21
|
@dataclass
|
22
22
|
class ExecResult(Generic[T]):
|
23
|
+
"""Execution result from call to `subprocess()`."""
|
24
|
+
|
23
25
|
success: bool
|
24
26
|
"""Did the process exit with success."""
|
25
27
|
|
@@ -85,11 +87,11 @@ async def subprocess(
|
|
85
87
|
cwd (str | Path | None): Switch to directory for execution.
|
86
88
|
env (dict[str, str]): Additional environment variables.
|
87
89
|
capture_output (bool): Capture stderr and stdout into ExecResult
|
88
|
-
|
90
|
+
(if False, then output is redirected to parent stderr/stdout)
|
89
91
|
output_limit (int | None): Stop reading output if it exceeds
|
90
|
-
|
92
|
+
the specified limit (in bytes).
|
91
93
|
timeout (int | None): Timeout. If the timeout expires then
|
92
|
-
|
94
|
+
a `TimeoutError` will be raised.
|
93
95
|
|
94
96
|
Returns:
|
95
97
|
Subprocess result (text or binary depending on `text` param)
|
inspect_ai/util/_subtask.py
CHANGED
@@ -27,21 +27,21 @@ logger = getLogger(__name__)
|
|
27
27
|
|
28
28
|
@runtime_checkable
|
29
29
|
class Subtask(Protocol):
|
30
|
-
"""Subtask with distinct `Store` and `Transcript`.
|
31
|
-
|
32
|
-
Args:
|
33
|
-
*args (Any): Arguments for the subtask.
|
34
|
-
**kwargs (Any): Keyword arguments for the subtask.
|
35
|
-
|
36
|
-
Returns:
|
37
|
-
Result of subtask.
|
38
|
-
"""
|
39
|
-
|
40
30
|
async def __call__(
|
41
31
|
self,
|
42
32
|
*args: Any,
|
43
33
|
**kwargs: Any,
|
44
|
-
) -> Any:
|
34
|
+
) -> Any:
|
35
|
+
"""Subtask with distinct `Store` and `Transcript`.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
*args (Any): Arguments for the subtask.
|
39
|
+
**kwargs (Any): Keyword arguments for the subtask.
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
Result of subtask.
|
43
|
+
"""
|
44
|
+
...
|
45
45
|
|
46
46
|
|
47
47
|
@overload
|
@@ -71,11 +71,10 @@ def subtask(
|
|
71
71
|
r"""Decorator for subtasks.
|
72
72
|
|
73
73
|
Args:
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
input (dict[str, Any] | None): Input to log for subtask
|
74
|
+
name: Name for subtask (defaults to function name)
|
75
|
+
store: Store to use for subtask
|
76
|
+
type: Type to use for subtask
|
77
|
+
input: Input to log for subtask
|
79
78
|
|
80
79
|
Returns:
|
81
80
|
Function which runs the Subtask, providing an isolated
|
@@ -1,8 +1,8 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.64
|
4
4
|
Summary: Framework for large language model evaluations
|
5
|
-
Author: UK AI
|
5
|
+
Author: UK AI Security Institute
|
6
6
|
License: MIT License
|
7
7
|
Project-URL: Documentation, https://inspect.ai-safety-institute.org.uk/
|
8
8
|
Project-URL: Source Code, https://github.com/UKGovernmentBEIS/inspect_ai
|
@@ -55,6 +55,7 @@ Requires-Dist: azure-ai-inference; extra == "dev"
|
|
55
55
|
Requires-Dist: google-cloud-aiplatform; extra == "dev"
|
56
56
|
Requires-Dist: google-generativeai; extra == "dev"
|
57
57
|
Requires-Dist: goodfire; extra == "dev"
|
58
|
+
Requires-Dist: griffe; extra == "dev"
|
58
59
|
Requires-Dist: groq; extra == "dev"
|
59
60
|
Requires-Dist: ipython; extra == "dev"
|
60
61
|
Requires-Dist: mistralai; extra == "dev"
|
@@ -63,13 +64,15 @@ Requires-Dist: mypy; extra == "dev"
|
|
63
64
|
Requires-Dist: nbformat; extra == "dev"
|
64
65
|
Requires-Dist: openai; extra == "dev"
|
65
66
|
Requires-Dist: pre-commit; extra == "dev"
|
67
|
+
Requires-Dist: pylint; extra == "dev"
|
66
68
|
Requires-Dist: pytest; extra == "dev"
|
67
69
|
Requires-Dist: pytest-asyncio; extra == "dev"
|
68
70
|
Requires-Dist: pytest-cov; extra == "dev"
|
69
71
|
Requires-Dist: pytest-dotenv; extra == "dev"
|
70
72
|
Requires-Dist: pytest-xdist; extra == "dev"
|
71
|
-
Requires-Dist: ruff==0.9.
|
73
|
+
Requires-Dist: ruff==0.9.5; extra == "dev"
|
72
74
|
Requires-Dist: textual-dev>=0.86.2; extra == "dev"
|
75
|
+
Requires-Dist: types-Markdown; extra == "dev"
|
73
76
|
Requires-Dist: types-PyYAML; extra == "dev"
|
74
77
|
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
75
78
|
Requires-Dist: types-aioboto3; extra == "dev"
|
@@ -81,15 +84,17 @@ Requires-Dist: types-protobuf; extra == "dev"
|
|
81
84
|
Requires-Dist: types-psutil; extra == "dev"
|
82
85
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
83
86
|
Provides-Extra: doc
|
84
|
-
Requires-Dist: quarto-cli; extra == "doc"
|
87
|
+
Requires-Dist: quarto-cli==1.5.57; extra == "doc"
|
85
88
|
Requires-Dist: jupyter; extra == "doc"
|
89
|
+
Requires-Dist: panflute; extra == "doc"
|
90
|
+
Requires-Dist: markdown; extra == "doc"
|
86
91
|
Provides-Extra: dist
|
87
92
|
Requires-Dist: twine; extra == "dist"
|
88
93
|
Requires-Dist: build; extra == "dist"
|
89
94
|
|
90
|
-
[<img width="295" src="https://inspect.ai-safety-institute.org.uk/images/aisi-logo.
|
95
|
+
[<img width="295" src="https://inspect.ai-safety-institute.org.uk/images/aisi-logo.svg" />](https://aisi.gov.uk/)
|
91
96
|
|
92
|
-
Welcome to Inspect, a framework for large language model evaluations created by the [UK AI
|
97
|
+
Welcome to Inspect, a framework for large language model evaluations created by the [UK AI Security Institute](https://aisi.gov.uk/).
|
93
98
|
|
94
99
|
Inspect provides many built-in components, including facilities for prompt engineering, tool usage, multi-turn dialog, and model graded evaluations. Extensions to Inspect (e.g. to support new elicitation and scoring techniques) can be provided by other Python packages.
|
95
100
|
|