inspect-ai 0.3.62__py3-none-any.whl → 0.3.63__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/main.py +1 -1
- inspect_ai/_cli/trace.py +8 -0
- inspect_ai/_display/core/active.py +2 -3
- inspect_ai/_eval/eval.py +4 -4
- inspect_ai/_eval/evalset.py +6 -6
- inspect_ai/_eval/task/run.py +3 -0
- inspect_ai/_util/hash.py +1 -1
- inspect_ai/_view/www/.vscode/extensions.json +3 -0
- inspect_ai/_view/www/.vscode/settings.json +8 -0
- inspect_ai/_view/www/App.css +92 -29
- inspect_ai/_view/www/dist/assets/index.css +16637 -14676
- inspect_ai/_view/www/dist/assets/index.js +58897 -51440
- inspect_ai/_view/www/dist/index.html +1 -1
- inspect_ai/_view/www/index.html +2 -2
- inspect_ai/_view/www/log-schema.json +1 -0
- inspect_ai/_view/www/package.json +22 -4
- inspect_ai/_view/www/postcss.config.cjs +8 -9
- inspect_ai/_view/www/src/{App.mjs → App.tsx} +355 -365
- inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
- inspect_ai/_view/www/src/api/api-browser.ts +2 -2
- inspect_ai/_view/www/src/api/api-http.ts +3 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
- inspect_ai/_view/www/src/api/client-api.ts +4 -4
- inspect_ai/_view/www/src/api/index.ts +4 -4
- inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
- inspect_ai/_view/www/src/appearance/colors.ts +9 -0
- inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
- inspect_ai/_view/www/src/appearance/icons.ts +100 -0
- inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
- inspect_ai/_view/www/src/components/Card.css +60 -0
- inspect_ai/_view/www/src/components/Card.tsx +109 -0
- inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
- inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
- inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
- inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
- inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
- inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
- inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
- inspect_ai/_view/www/src/components/FindBand.css +49 -0
- inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
- inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
- inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
- inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
- inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
- inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
- inspect_ai/_view/www/src/components/LargeModal.tsx +199 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
- inspect_ai/_view/www/src/components/MessageBand.css +43 -0
- inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
- inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
- inspect_ai/_view/www/src/components/NavPills.tsx +99 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
- inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
- inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +200 -0
- inspect_ai/_view/www/src/components/ToolButton.css +3 -0
- inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
- inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
- inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
- inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
- inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -7
- inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
- inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
- inspect_ai/_view/www/src/metadata/types.ts +18 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
- inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +309 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +326 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +175 -0
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +46 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +143 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +131 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +145 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +86 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +53 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +107 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +363 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
- inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
- inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
- inspect_ai/_view/www/src/samples/error/error.ts +15 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
- inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +173 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +182 -0
- inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
- inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +108 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +91 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +38 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +190 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +274 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
- inspect_ai/_view/www/src/samples/transcript/state/{StateEventView.mjs → StateEventView.tsx} +148 -110
- inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
- inspect_ai/_view/www/src/types/log.d.ts +1 -0
- inspect_ai/_view/www/src/types/prism.d.ts +11 -0
- inspect_ai/_view/www/src/types.ts +71 -0
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +22 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +95 -0
- inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
- inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
- inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
- inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
- inspect_ai/_view/www/src/utils/attachments.ts +42 -0
- inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
- inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
- inspect_ai/_view/www/src/utils/debugging.ts +28 -0
- inspect_ai/_view/www/src/utils/dom.ts +30 -0
- inspect_ai/_view/www/src/utils/format.ts +194 -0
- inspect_ai/_view/www/src/utils/git.ts +7 -0
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/_view/www/src/utils/http.ts +14 -0
- inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
- inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
- inspect_ai/_view/www/src/utils/queue.ts +51 -0
- inspect_ai/_view/www/src/utils/sync.ts +114 -0
- inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
- inspect_ai/_view/www/src/utils/vscode.ts +13 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +160 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +113 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +67 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +156 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +222 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +41 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +61 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +80 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
- inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
- inspect_ai/_view/www/src/workspace/types.ts +10 -0
- inspect_ai/_view/www/tsconfig.json +23 -9
- inspect_ai/_view/www/vite.config.js +8 -17
- inspect_ai/_view/www/yarn.lock +627 -556
- inspect_ai/dataset/_dataset.py +36 -0
- inspect_ai/dataset/_sources/csv.py +8 -0
- inspect_ai/dataset/_sources/file.py +4 -0
- inspect_ai/dataset/_sources/hf.py +11 -1
- inspect_ai/dataset/_sources/json.py +8 -0
- inspect_ai/log/_log.py +3 -6
- inspect_ai/log/_message.py +1 -1
- inspect_ai/log/_recorders/json.py +5 -7
- inspect_ai/model/_call_tools.py +2 -1
- inspect_ai/model/_providers/anthropic.py +3 -3
- inspect_ai/model/_providers/openai_o1.py +3 -5
- inspect_ai/model/_providers/openrouter.py +86 -0
- inspect_ai/model/_providers/providers.py +11 -0
- inspect_ai/scorer/_answer.py +7 -7
- inspect_ai/scorer/_classification.py +34 -18
- inspect_ai/scorer/_common.py +2 -8
- inspect_ai/solver/_multiple_choice.py +24 -9
- inspect_ai/tool/__init__.py +2 -0
- inspect_ai/tool/{beta → _tools}/_computer/_computer.py +2 -5
- inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_execute.py +8 -2
- inspect_ai/tool/beta.py +3 -0
- inspect_ai/util/_sandbox/docker/docker.py +32 -85
- inspect_ai/util/_sandbox/self_check.py +124 -16
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.63.dist-info}/METADATA +2 -1
- inspect_ai-0.3.63.dist-info/RECORD +618 -0
- inspect_ai/_view/www/src/Register.mjs +0 -3
- inspect_ai/_view/www/src/Types.mjs +0 -38
- inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
- inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
- inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
- inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
- inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
- inspect_ai/_view/www/src/components/Card.mjs +0 -126
- inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
- inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
- inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
- inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
- inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
- inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
- inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
- inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
- inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
- inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
- inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
- inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
- inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
- inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
- inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
- inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
- inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
- inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
- inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
- inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
- inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
- inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
- inspect_ai/_view/www/src/components/Tools.mjs +0 -376
- inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
- inspect_ai/_view/www/src/components/ansi-output.js +0 -932
- inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
- inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
- inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
- inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
- inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
- inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
- inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
- inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
- inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
- inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
- inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
- inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
- inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
- inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
- inspect_ai/_view/www/src/utils/Format.mjs +0 -260
- inspect_ai/_view/www/src/utils/Git.mjs +0 -12
- inspect_ai/_view/www/src/utils/Html.mjs +0 -21
- inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
- inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
- inspect_ai/_view/www/src/utils/http.mjs +0 -18
- inspect_ai/_view/www/src/utils/queue.mjs +0 -67
- inspect_ai/_view/www/src/utils/sync.mjs +0 -101
- inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
- inspect_ai/tool/beta/__init__.py +0 -5
- inspect_ai-0.3.62.dist-info/RECORD +0 -481
- /inspect_ai/{tool/beta/_computer/_resources/tool/__init__.py → _view/www/src/components/MorePopOver.css} +0 -0
- /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
- /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _view/www/src/workspace/tabs/InfoTab.module.css} +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_common.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.63.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.63.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.63.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.63.dist-info}/top_level.txt +0 -0
@@ -2,10 +2,7 @@ from typing import Awaitable, Callable
|
|
2
2
|
|
3
3
|
from inspect_ai._util.content import Content, ContentImage, ContentText
|
4
4
|
from inspect_ai.tool import Tool, ToolResult, tool
|
5
|
-
from inspect_ai.tool._tool import
|
6
|
-
TOOL_INIT_MODEL_INPUT,
|
7
|
-
ToolParsingError,
|
8
|
-
)
|
5
|
+
from inspect_ai.tool._tool import TOOL_INIT_MODEL_INPUT, ToolParsingError
|
9
6
|
from inspect_ai.tool._tool_call import ToolCallModelInput
|
10
7
|
|
11
8
|
from . import _common as common
|
@@ -84,7 +81,7 @@ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool
|
|
84
81
|
if coordinate is not None:
|
85
82
|
raise ToolParsingError(f"coordinate is not accepted for {action}")
|
86
83
|
if not isinstance(text, str):
|
87
|
-
raise ToolParsingError(
|
84
|
+
raise ToolParsingError(f"{text} must be a string")
|
88
85
|
|
89
86
|
if action == "key":
|
90
87
|
return await common.press_key(text, timeout=timeout)
|
File without changes
|
@@ -138,7 +138,7 @@ class X11Client:
|
|
138
138
|
if coordinate is not None:
|
139
139
|
raise ToolError(f"coordinate is not accepted for {action}")
|
140
140
|
if not isinstance(text, str):
|
141
|
-
raise ToolError(
|
141
|
+
raise ToolError(f"{text} must be a string")
|
142
142
|
|
143
143
|
if action == "key":
|
144
144
|
return await self.shell(
|
File without changes
|
@@ -74,8 +74,14 @@ def python(timeout: int | None = None, user: str | None = None) -> Tool:
|
|
74
74
|
"""
|
75
75
|
Use the python function to execute Python code.
|
76
76
|
|
77
|
-
The
|
78
|
-
|
77
|
+
The Python tool executes single-run Python scripts. Important notes:
|
78
|
+
1. Each execution is independent - no state is preserved between runs
|
79
|
+
2. You must explicitly use print() statements to see any output
|
80
|
+
3. Simply writing expressions (like in notebooks) will not display results
|
81
|
+
4. The script cannot accept interactive input during execution
|
82
|
+
5. Return statements alone won't produce visible output
|
83
|
+
6. All variables and imports are cleared between executions
|
84
|
+
7. Standard output (via print()) is the only way to see results
|
79
85
|
|
80
86
|
Args:
|
81
87
|
code (str): The python code to execute.
|
inspect_ai/tool/beta.py
ADDED
@@ -1,3 +1,4 @@
|
|
1
|
+
import base64
|
1
2
|
import errno
|
2
3
|
import json
|
3
4
|
import os
|
@@ -34,7 +35,6 @@ from .compose import (
|
|
34
35
|
compose_build,
|
35
36
|
compose_check_running,
|
36
37
|
compose_cleanup_images,
|
37
|
-
compose_command,
|
38
38
|
compose_cp,
|
39
39
|
compose_exec,
|
40
40
|
compose_ps,
|
@@ -270,103 +270,50 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
270
270
|
|
271
271
|
@override
|
272
272
|
async def write_file(self, file: str, contents: str | bytes) -> None:
|
273
|
-
# exec function w/ timeout
|
274
|
-
async def exec(cmd: list[str]) -> ExecResult[str]:
|
275
|
-
return await self.exec(cmd, timeout=60)
|
276
|
-
|
277
273
|
# resolve relative file paths
|
278
274
|
file = self.container_file(file)
|
279
275
|
|
280
|
-
#
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
276
|
+
# ensure that the directory exists
|
277
|
+
parent = Path(file).parent.as_posix()
|
278
|
+
if parent != ".":
|
279
|
+
result = await self.exec(["mkdir", "-p", parent])
|
280
|
+
if not result.success:
|
281
|
+
msg = f"Failed to create container directory {parent}: {result.stderr}"
|
282
|
+
raise RuntimeError(msg)
|
287
283
|
|
288
|
-
# write
|
284
|
+
# write the file
|
289
285
|
if isinstance(contents, str):
|
290
|
-
|
286
|
+
result = await self.exec(
|
287
|
+
["sh", "-e", "-c", 'tee -- "$1"', "write_file_script", file],
|
288
|
+
input=contents,
|
289
|
+
)
|
291
290
|
else:
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
container_tmpfile = (
|
309
|
-
f".tmp_inspect_sandbox_{os.path.basename(local_tmpfile.name)}"
|
310
|
-
)
|
311
|
-
|
312
|
-
# compose cp will leave the file owned by root
|
313
|
-
await compose_cp(
|
314
|
-
src=local_tmpfile.name,
|
315
|
-
dest=f"{self._service}:{self.container_file(container_tmpfile)}",
|
316
|
-
project=self._project,
|
317
|
-
)
|
318
|
-
|
319
|
-
local_tmpfile.close() # this will also delete the file
|
320
|
-
|
321
|
-
if not hasattr(self, "_docker_user"):
|
322
|
-
uid = (await exec(["id", "-u"])).stdout.strip()
|
323
|
-
gid = (await exec(["id", "-g"])).stdout.strip()
|
324
|
-
self._docker_user = (uid, gid)
|
325
|
-
|
326
|
-
await compose_command(
|
327
|
-
[
|
328
|
-
"exec",
|
329
|
-
"--user",
|
330
|
-
"root",
|
331
|
-
self._service,
|
332
|
-
"chown",
|
333
|
-
f"{self._docker_user[0]}:{self._docker_user[1]}",
|
334
|
-
container_tmpfile,
|
335
|
-
],
|
336
|
-
project=self._project,
|
337
|
-
timeout=60,
|
338
|
-
)
|
339
|
-
|
340
|
-
parent = PurePosixPath(file).parent
|
341
|
-
|
342
|
-
# We do these steps in a shell script for efficiency to avoid round-trips to docker.
|
343
|
-
res_cp = await exec(
|
344
|
-
[
|
345
|
-
"sh",
|
346
|
-
"-e",
|
347
|
-
"-c",
|
348
|
-
'mkdir -p -- "$1"; cp -T -- "$2" "$3"; rm -- "$2"',
|
349
|
-
"copy_script",
|
350
|
-
str(parent),
|
351
|
-
container_tmpfile,
|
352
|
-
file,
|
353
|
-
]
|
354
|
-
)
|
355
|
-
|
356
|
-
if res_cp.returncode != 0:
|
357
|
-
if "Permission denied" in res_cp.stderr:
|
358
|
-
ls_result = await exec(["ls", "-la", "."])
|
359
|
-
error_string = f"Permission was denied. Error details: {res_cp.stderr}; ls -la: {ls_result.stdout}; {self._docker_user=}"
|
291
|
+
base64_contents = base64.b64encode(contents).decode("US-ASCII")
|
292
|
+
result = await self.exec(
|
293
|
+
[
|
294
|
+
"sh",
|
295
|
+
"-e",
|
296
|
+
"-c",
|
297
|
+
'base64 -d | tee -- "$1" > /dev/null',
|
298
|
+
"write_file_script",
|
299
|
+
file,
|
300
|
+
],
|
301
|
+
input=base64_contents,
|
302
|
+
)
|
303
|
+
if result.returncode != 0:
|
304
|
+
if "permission denied" in result.stderr.casefold():
|
305
|
+
ls_result = await self.exec(["ls", "-la", "."])
|
306
|
+
error_string = f"Permission was denied. Error details: {result.stderr}; ls -la: {ls_result.stdout}"
|
360
307
|
raise PermissionError(error_string)
|
361
308
|
elif (
|
362
|
-
"cannot overwrite directory" in
|
363
|
-
or "is a directory" in
|
309
|
+
"cannot overwrite directory" in result.stderr.casefold()
|
310
|
+
or "is a directory" in result.stderr.casefold()
|
364
311
|
):
|
365
312
|
raise IsADirectoryError(
|
366
313
|
f"Failed to write file: {file} because it is a directory already"
|
367
314
|
)
|
368
315
|
else:
|
369
|
-
raise RuntimeError(f"failed to copy during write_file: {
|
316
|
+
raise RuntimeError(f"failed to copy during write_file: {result}")
|
370
317
|
|
371
318
|
@overload
|
372
319
|
async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
|
@@ -32,6 +32,7 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
|
|
32
32
|
for fn in [
|
33
33
|
test_read_and_write_file_text,
|
34
34
|
test_read_and_write_file_binary,
|
35
|
+
test_read_and_write_large_file_binary,
|
35
36
|
test_write_file_text_utf,
|
36
37
|
test_read_and_write_file_including_directory_absolute,
|
37
38
|
test_read_and_write_file_including_directory_relative,
|
@@ -41,12 +42,19 @@ async def self_check(sandbox_env: SandboxEnvironment) -> dict[str, bool | str]:
|
|
41
42
|
test_read_file_is_directory,
|
42
43
|
test_read_file_nonsense_name,
|
43
44
|
test_read_file_limit,
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
test_write_text_file_zero_length,
|
46
|
+
test_write_text_file_space,
|
47
|
+
test_write_text_file_is_directory,
|
48
|
+
test_write_text_file_without_permissions,
|
49
|
+
test_write_text_file_exists,
|
50
|
+
test_write_binary_file_zero_length,
|
51
|
+
test_write_binary_file_space,
|
52
|
+
test_write_binary_file_is_directory,
|
53
|
+
test_write_binary_file_without_permissions,
|
54
|
+
test_write_binary_file_exists,
|
49
55
|
test_exec_output,
|
56
|
+
test_exec_stderr,
|
57
|
+
test_exec_returncode,
|
50
58
|
test_exec_timeout,
|
51
59
|
test_exec_permission_error,
|
52
60
|
test_exec_as_user,
|
@@ -100,6 +108,17 @@ async def test_read_and_write_file_binary(sandbox_env: SandboxEnvironment) -> No
|
|
100
108
|
await _cleanup_file(sandbox_env, file_name)
|
101
109
|
|
102
110
|
|
111
|
+
async def test_read_and_write_large_file_binary(
|
112
|
+
sandbox_env: SandboxEnvironment,
|
113
|
+
) -> None:
|
114
|
+
file_name = "test_read_and_write_large_file_binary.file"
|
115
|
+
long_bytes = b"\xc3" * 5_000_000
|
116
|
+
await sandbox_env.write_file(file_name, long_bytes)
|
117
|
+
written_file_bytes = await sandbox_env.read_file(file_name, text=False)
|
118
|
+
assert long_bytes == written_file_bytes
|
119
|
+
await _cleanup_file(sandbox_env, file_name)
|
120
|
+
|
121
|
+
|
103
122
|
async def test_read_and_write_file_including_directory_absolute(
|
104
123
|
sandbox_env: SandboxEnvironment,
|
105
124
|
) -> None:
|
@@ -176,7 +195,7 @@ async def test_read_file_limit(sandbox_env: SandboxEnvironment) -> None:
|
|
176
195
|
await _cleanup_file(sandbox_env, file_name)
|
177
196
|
|
178
197
|
|
179
|
-
async def
|
198
|
+
async def test_write_text_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
|
180
199
|
file_name = "zero_length_file.file"
|
181
200
|
await sandbox_env.write_file(file_name, "")
|
182
201
|
zero_length = await sandbox_env.read_file(file_name, text=True)
|
@@ -185,7 +204,7 @@ async def test_write_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
|
|
185
204
|
await _cleanup_file(sandbox_env, file_name)
|
186
205
|
|
187
206
|
|
188
|
-
async def
|
207
|
+
async def test_write_text_file_space(sandbox_env: SandboxEnvironment) -> None:
|
189
208
|
space = "to the moon"
|
190
209
|
file_name = "file with space.file"
|
191
210
|
await sandbox_env.write_file(file_name, space)
|
@@ -195,28 +214,28 @@ async def test_write_file_space(sandbox_env: SandboxEnvironment) -> None:
|
|
195
214
|
await _cleanup_file(sandbox_env, file_name)
|
196
215
|
|
197
216
|
|
198
|
-
async def
|
217
|
+
async def test_write_text_file_is_directory(
|
199
218
|
sandbox_env: SandboxEnvironment,
|
200
219
|
) -> None:
|
201
220
|
# ensure /tmp/directory exists
|
202
221
|
await sandbox_env.write_file(
|
203
|
-
"/tmp/
|
222
|
+
"/tmp/inspect_ai_test_write_text_file_is_directory/file", "unused content"
|
204
223
|
)
|
205
224
|
with Raises(IsADirectoryError) as e_info:
|
206
225
|
await sandbox_env.write_file(
|
207
|
-
"/tmp/
|
226
|
+
"/tmp/inspect_ai_test_write_text_file_is_directory",
|
208
227
|
"content cannot go in a directory, dummy",
|
209
228
|
)
|
210
229
|
assert "directory" in str(e_info.value)
|
211
230
|
await sandbox_env.exec(
|
212
|
-
["rm", "-rf", "/tmp/
|
231
|
+
["rm", "-rf", "/tmp/inspect_ai_test_write_text_file_is_directory"]
|
213
232
|
)
|
214
233
|
|
215
234
|
|
216
|
-
async def
|
235
|
+
async def test_write_text_file_without_permissions(
|
217
236
|
sandbox_env: SandboxEnvironment,
|
218
237
|
) -> None:
|
219
|
-
file_name = "
|
238
|
+
file_name = "test_write_text_file_without_permissions.file"
|
220
239
|
await sandbox_env.write_file(file_name, "impervious #content")
|
221
240
|
await sandbox_env.exec(["chmod", "-w", file_name])
|
222
241
|
with Raises(PermissionError) as e_info:
|
@@ -226,7 +245,7 @@ async def test_write_file_without_permissions(
|
|
226
245
|
await _cleanup_file(sandbox_env, file_name)
|
227
246
|
|
228
247
|
|
229
|
-
async def
|
248
|
+
async def test_write_text_file_exists(
|
230
249
|
sandbox_env: SandboxEnvironment,
|
231
250
|
) -> None:
|
232
251
|
file_name = "file_exists.file"
|
@@ -237,6 +256,67 @@ async def test_write_file_exists(
|
|
237
256
|
await _cleanup_file(sandbox_env, file_name)
|
238
257
|
|
239
258
|
|
259
|
+
async def test_write_binary_file_zero_length(sandbox_env: SandboxEnvironment) -> None:
|
260
|
+
file_name = "zero_length_file.file"
|
261
|
+
await sandbox_env.write_file(file_name, b"")
|
262
|
+
zero_length = await sandbox_env.read_file(file_name, text=False)
|
263
|
+
assert isinstance(zero_length, bytes)
|
264
|
+
assert zero_length == b""
|
265
|
+
await _cleanup_file(sandbox_env, file_name)
|
266
|
+
|
267
|
+
|
268
|
+
async def test_write_binary_file_space(sandbox_env: SandboxEnvironment) -> None:
|
269
|
+
binary_content = b"\xc3\x28"
|
270
|
+
file_name = "file with space.file"
|
271
|
+
await sandbox_env.write_file(file_name, binary_content)
|
272
|
+
file_with_space = await sandbox_env.read_file(file_name, text=False)
|
273
|
+
assert isinstance(file_with_space, bytes)
|
274
|
+
assert file_with_space == binary_content
|
275
|
+
await _cleanup_file(sandbox_env, file_name)
|
276
|
+
|
277
|
+
|
278
|
+
async def test_write_binary_file_is_directory(
|
279
|
+
sandbox_env: SandboxEnvironment,
|
280
|
+
) -> None:
|
281
|
+
# ensure /tmp/directory exists
|
282
|
+
await sandbox_env.write_file(
|
283
|
+
"/tmp/inspect_ai_test_write_binary_file_is_directory/file", "unused content"
|
284
|
+
)
|
285
|
+
with Raises(IsADirectoryError) as e_info:
|
286
|
+
await sandbox_env.write_file(
|
287
|
+
"/tmp/inspect_ai_test_write_binary_file_is_directory",
|
288
|
+
b"\xc3\x28",
|
289
|
+
)
|
290
|
+
assert "directory" in str(e_info.value)
|
291
|
+
await sandbox_env.exec(
|
292
|
+
["rm", "-rf", "/tmp/inspect_ai_test_write_binary_file_is_directory"]
|
293
|
+
)
|
294
|
+
|
295
|
+
|
296
|
+
async def test_write_binary_file_without_permissions(
|
297
|
+
sandbox_env: SandboxEnvironment,
|
298
|
+
) -> None:
|
299
|
+
file_name = "test_write_binary_file_without_permissions.file"
|
300
|
+
await sandbox_env.write_file(file_name, "impervious #content")
|
301
|
+
await sandbox_env.exec(["chmod", "-w", file_name])
|
302
|
+
with Raises(PermissionError) as e_info:
|
303
|
+
await sandbox_env.write_file(file_name, b"\xc3\x28")
|
304
|
+
assert file_name in str(e_info.value)
|
305
|
+
await sandbox_env.exec(["chmod", "+w", file_name])
|
306
|
+
await _cleanup_file(sandbox_env, file_name)
|
307
|
+
|
308
|
+
|
309
|
+
async def test_write_binary_file_exists(
|
310
|
+
sandbox_env: SandboxEnvironment,
|
311
|
+
) -> None:
|
312
|
+
file_name = "file_exists.file"
|
313
|
+
await sandbox_env.write_file(file_name, b"\xc3\x28")
|
314
|
+
await sandbox_env.write_file(file_name, b"\xc3\x29")
|
315
|
+
altered_content = await sandbox_env.read_file(file_name, text=False)
|
316
|
+
assert altered_content == b"\xc3\x29"
|
317
|
+
await _cleanup_file(sandbox_env, file_name)
|
318
|
+
|
319
|
+
|
240
320
|
async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
|
241
321
|
exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; echo bar"])
|
242
322
|
expected = "foo\nbar\n"
|
@@ -246,9 +326,19 @@ async def test_exec_output(sandbox_env: SandboxEnvironment) -> None:
|
|
246
326
|
)
|
247
327
|
|
248
328
|
|
329
|
+
async def test_exec_stderr(sandbox_env: SandboxEnvironment) -> None:
|
330
|
+
exec_result = await sandbox_env.exec(["sh", "-c", "echo boof; echo baz >&2"])
|
331
|
+
assert exec_result.stderr == "baz\n"
|
332
|
+
|
333
|
+
|
334
|
+
async def test_exec_returncode(sandbox_env: SandboxEnvironment) -> None:
|
335
|
+
exec_result = await sandbox_env.exec(["sh", "-c", "echo foo; exit 70"])
|
336
|
+
assert exec_result.returncode == 70
|
337
|
+
|
338
|
+
|
249
339
|
async def test_exec_timeout(sandbox_env: SandboxEnvironment) -> None:
|
250
340
|
with Raises(TimeoutError):
|
251
|
-
await sandbox_env.exec(["sleep", "
|
341
|
+
await sandbox_env.exec(["sleep", "4"], timeout=2)
|
252
342
|
|
253
343
|
|
254
344
|
async def test_exec_permission_error(sandbox_env: SandboxEnvironment) -> None:
|
@@ -259,10 +349,28 @@ async def test_exec_permission_error(sandbox_env: SandboxEnvironment) -> None:
|
|
259
349
|
|
260
350
|
async def test_exec_as_user(sandbox_env: SandboxEnvironment) -> None:
|
261
351
|
username = "inspect-ai-test-exec-as-user"
|
352
|
+
|
353
|
+
# Neither adduser nor useradd are part of POSIX, so we need some brittle logic here
|
354
|
+
adduser_help_exec_result = await sandbox_env.exec(["adduser", "--help"])
|
355
|
+
adduser_help_text = (
|
356
|
+
adduser_help_exec_result.stdout + adduser_help_exec_result.stderr
|
357
|
+
)
|
358
|
+
|
359
|
+
if "BusyBox" in adduser_help_text:
|
360
|
+
adduser_command = ["adduser", "-D", username]
|
361
|
+
else:
|
362
|
+
adduser_command = [
|
363
|
+
"adduser",
|
364
|
+
"--comment",
|
365
|
+
"self_check.py",
|
366
|
+
"--disabled-password",
|
367
|
+
username,
|
368
|
+
]
|
369
|
+
|
262
370
|
try:
|
263
371
|
# Create a new user
|
264
372
|
add_user_result = await sandbox_env.exec(
|
265
|
-
|
373
|
+
adduser_command,
|
266
374
|
user="root",
|
267
375
|
timeout=10, # in one case adduser decided to ask for input which caused the test to hang indefinitely
|
268
376
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.63
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Safety Institute
|
6
6
|
License: MIT License
|
@@ -63,6 +63,7 @@ Requires-Dist: mypy; extra == "dev"
|
|
63
63
|
Requires-Dist: nbformat; extra == "dev"
|
64
64
|
Requires-Dist: openai; extra == "dev"
|
65
65
|
Requires-Dist: pre-commit; extra == "dev"
|
66
|
+
Requires-Dist: pylint; extra == "dev"
|
66
67
|
Requires-Dist: pytest; extra == "dev"
|
67
68
|
Requires-Dist: pytest-asyncio; extra == "dev"
|
68
69
|
Requires-Dist: pytest-cov; extra == "dev"
|