inspect-ai 0.3.62__py3-none-any.whl → 0.3.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/cache.py +8 -7
- inspect_ai/_cli/common.py +0 -12
- inspect_ai/_cli/eval.py +32 -4
- inspect_ai/_cli/info.py +1 -0
- inspect_ai/_cli/list.py +1 -1
- inspect_ai/_cli/log.py +2 -0
- inspect_ai/_cli/main.py +1 -1
- inspect_ai/_cli/sandbox.py +4 -1
- inspect_ai/_cli/score.py +181 -32
- inspect_ai/_cli/trace.py +10 -0
- inspect_ai/_cli/view.py +4 -2
- inspect_ai/_display/core/active.py +2 -3
- inspect_ai/_display/core/config.py +7 -1
- inspect_ai/_display/textual/widgets/samples.py +4 -3
- inspect_ai/_display/textual/widgets/sandbox.py +6 -0
- inspect_ai/_eval/eval.py +104 -101
- inspect_ai/_eval/evalset.py +75 -75
- inspect_ai/_eval/loader.py +122 -12
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +14 -0
- inspect_ai/_eval/score.py +125 -36
- inspect_ai/_eval/task/log.py +105 -4
- inspect_ai/_eval/task/results.py +92 -38
- inspect_ai/_eval/task/run.py +9 -2
- inspect_ai/_eval/task/sandbox.py +35 -2
- inspect_ai/_eval/task/task.py +49 -46
- inspect_ai/_util/constants.py +1 -1
- inspect_ai/_util/content.py +8 -0
- inspect_ai/_util/error.py +2 -0
- inspect_ai/_util/file.py +15 -1
- inspect_ai/_util/hash.py +1 -1
- inspect_ai/_util/logger.py +4 -2
- inspect_ai/_util/registry.py +7 -1
- inspect_ai/_view/view.py +1 -2
- inspect_ai/_view/www/.vscode/extensions.json +3 -0
- inspect_ai/_view/www/.vscode/settings.json +8 -0
- inspect_ai/_view/www/App.css +97 -29
- inspect_ai/_view/www/README.md +1 -1
- inspect_ai/_view/www/dist/assets/index.css +16663 -14674
- inspect_ai/_view/www/dist/assets/index.js +58808 -51348
- inspect_ai/_view/www/dist/index.html +1 -1
- inspect_ai/_view/www/index.html +2 -2
- inspect_ai/_view/www/log-schema.json +87 -73
- inspect_ai/_view/www/package.json +22 -4
- inspect_ai/_view/www/postcss.config.cjs +8 -9
- inspect_ai/_view/www/src/{App.mjs → App.tsx} +356 -365
- inspect_ai/_view/www/src/AppErrorBoundary.tsx +47 -0
- inspect_ai/_view/www/src/api/api-browser.ts +2 -2
- inspect_ai/_view/www/src/api/api-http.ts +3 -5
- inspect_ai/_view/www/src/api/api-vscode.ts +6 -6
- inspect_ai/_view/www/src/api/client-api.ts +4 -4
- inspect_ai/_view/www/src/api/index.ts +4 -4
- inspect_ai/_view/www/src/api/{Types.ts → types.ts} +25 -9
- inspect_ai/_view/www/src/appearance/colors.ts +9 -0
- inspect_ai/_view/www/src/appearance/fonts.ts +39 -0
- inspect_ai/_view/www/src/appearance/icons.ts +100 -0
- inspect_ai/_view/www/src/appearance/{Styles.mjs → styles.ts} +2 -32
- inspect_ai/_view/www/src/components/AnsiDisplay.tsx +198 -0
- inspect_ai/_view/www/src/components/AsciinemaPlayer.tsx +86 -0
- inspect_ai/_view/www/src/components/Card.css +60 -0
- inspect_ai/_view/www/src/components/Card.tsx +109 -0
- inspect_ai/_view/www/src/components/CopyButton.module.css +11 -0
- inspect_ai/_view/www/src/components/CopyButton.tsx +58 -0
- inspect_ai/_view/www/src/components/DownloadButton.css +4 -0
- inspect_ai/_view/www/src/components/DownloadButton.tsx +25 -0
- inspect_ai/_view/www/src/components/DownloadPanel.css +10 -0
- inspect_ai/_view/www/src/components/DownloadPanel.tsx +30 -0
- inspect_ai/_view/www/src/components/EmptyPanel.css +12 -0
- inspect_ai/_view/www/src/components/EmptyPanel.tsx +15 -0
- inspect_ai/_view/www/src/components/ErrorPanel.css +37 -0
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +39 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.css +40 -0
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +115 -0
- inspect_ai/_view/www/src/components/FindBand.css +49 -0
- inspect_ai/_view/www/src/components/FindBand.tsx +130 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.css +41 -0
- inspect_ai/_view/www/src/components/HumanBaselineView.tsx +162 -0
- inspect_ai/_view/www/src/components/JsonPanel.css +20 -0
- inspect_ai/_view/www/src/components/JsonPanel.tsx +82 -0
- inspect_ai/_view/www/src/components/LabeledValue.css +20 -0
- inspect_ai/_view/www/src/components/LabeledValue.tsx +41 -0
- inspect_ai/_view/www/src/components/LargeModal.module.css +54 -0
- inspect_ai/_view/www/src/components/LargeModal.tsx +189 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.css +95 -0
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +132 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.css +3 -0
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +133 -0
- inspect_ai/_view/www/src/components/MessageBand.css +43 -0
- inspect_ai/_view/www/src/components/MessageBand.tsx +39 -0
- inspect_ai/_view/www/src/components/MorePopOver.css +0 -0
- inspect_ai/_view/www/src/components/MorePopOver.tsx +67 -0
- inspect_ai/_view/www/src/components/NavPills.module.css +18 -0
- inspect_ai/_view/www/src/components/NavPills.tsx +101 -0
- inspect_ai/_view/www/src/components/ProgressBar.module.css +37 -0
- inspect_ai/_view/www/src/components/ProgressBar.tsx +22 -0
- inspect_ai/_view/www/src/components/TabSet.module.css +40 -0
- inspect_ai/_view/www/src/components/TabSet.tsx +215 -0
- inspect_ai/_view/www/src/components/ToolButton.css +3 -0
- inspect_ai/_view/www/src/components/ToolButton.tsx +27 -0
- inspect_ai/_view/www/src/components/VirtualList.module.css +19 -0
- inspect_ai/_view/www/src/components/VirtualList.tsx +292 -0
- inspect_ai/_view/www/src/{index.js → index.tsx} +45 -19
- inspect_ai/_view/www/src/{log → logfile}/remoteLogFile.ts +3 -8
- inspect_ai/_view/www/src/{utils/remoteZipFile.mjs → logfile/remoteZipFile.ts} +86 -80
- inspect_ai/_view/www/src/metadata/MetaDataGrid.tsx +83 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.module.css +35 -0
- inspect_ai/_view/www/src/metadata/MetaDataView.tsx +95 -0
- inspect_ai/_view/www/src/metadata/MetadataGrid.module.css +15 -0
- inspect_ai/_view/www/src/metadata/RenderedContent.module.css +12 -0
- inspect_ai/_view/www/src/{components/RenderedContent/RenderedContent.mjs → metadata/RenderedContent.tsx} +92 -73
- inspect_ai/_view/www/src/metadata/types.ts +18 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/DatasetDetailView.tsx +37 -0
- inspect_ai/_view/www/src/plan/DetailStep.module.css +9 -0
- inspect_ai/_view/www/src/plan/DetailStep.tsx +31 -0
- inspect_ai/_view/www/src/plan/PlanCard.tsx +28 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.module.css +48 -0
- inspect_ai/_view/www/src/plan/PlanDetailView.tsx +324 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.module.css +3 -0
- inspect_ai/_view/www/src/plan/ScorerDetailView.tsx +30 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.module.css +15 -0
- inspect_ai/_view/www/src/plan/SolverDetailView.tsx +32 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.module.css +8 -0
- inspect_ai/_view/www/src/samples/InlineSampleDisplay.tsx +53 -0
- inspect_ai/_view/www/src/samples/SampleDialog.tsx +122 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.module.css +29 -0
- inspect_ai/_view/www/src/samples/SampleDisplay.tsx +331 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.module.css +24 -0
- inspect_ai/_view/www/src/samples/SampleSummaryView.tsx +177 -0
- inspect_ai/_view/www/src/samples/SamplesTools.tsx +52 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.module.css +29 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessage.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRenderer.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.module.css +9 -0
- inspect_ai/_view/www/src/samples/chat/ChatMessageRow.tsx +57 -0
- inspect_ai/_view/www/src/samples/chat/ChatView.tsx +47 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/ChatViewVirtualList.tsx +58 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/MessageContent.tsx +157 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +3 -0
- inspect_ai/_view/www/src/samples/chat/MessageContents.tsx +133 -0
- inspect_ai/_view/www/src/samples/chat/messages.ts +112 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.tsx +147 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +14 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.tsx +76 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.module.css +19 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolOutput.tsx +60 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/samples/chat/tools/ToolTitle.tsx +18 -0
- inspect_ai/_view/www/src/samples/chat/tools/tool.ts +92 -0
- inspect_ai/_view/www/src/samples/descriptor/samplesDescriptor.tsx +365 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.module.css +22 -0
- inspect_ai/_view/www/src/samples/descriptor/score/BooleanScoreDescriptor.tsx +26 -0
- inspect_ai/_view/www/src/samples/descriptor/score/CategoricalScoreDescriptor.tsx +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/NumericScoreDescriptor.tsx +27 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.module.css +18 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ObjectScoreDescriptor.tsx +71 -0
- inspect_ai/_view/www/src/samples/descriptor/score/OtherScoreDescriptor.tsx +20 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.module.css +28 -0
- inspect_ai/_view/www/src/samples/descriptor/score/PassFailScoreDescriptor.tsx +81 -0
- inspect_ai/_view/www/src/samples/descriptor/score/ScoreDescriptor.tsx +99 -0
- inspect_ai/_view/www/src/samples/descriptor/types.ts +55 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.module.css +19 -0
- inspect_ai/_view/www/src/samples/error/FlatSampleErrorView.tsx +22 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.module.css +17 -0
- inspect_ai/_view/www/src/samples/error/SampleErrorView.tsx +31 -0
- inspect_ai/_view/www/src/samples/error/error.ts +15 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.module.css +9 -0
- inspect_ai/_view/www/src/samples/list/SampleFooter.tsx +14 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.module.css +13 -0
- inspect_ai/_view/www/src/samples/list/SampleHeader.tsx +36 -0
- inspect_ai/_view/www/src/samples/list/SampleList.module.css +11 -0
- inspect_ai/_view/www/src/samples/list/SampleList.tsx +247 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.module.css +33 -0
- inspect_ai/_view/www/src/samples/list/SampleRow.tsx +98 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.module.css +6 -0
- inspect_ai/_view/www/src/samples/list/SampleSeparator.tsx +24 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/EpochFilter.tsx +51 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/SelectScorer.tsx +175 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.module.css +9 -0
- inspect_ai/_view/www/src/samples/sample-tools/SortFilter.tsx +186 -0
- inspect_ai/_view/www/src/samples/{tools/filters.mjs → sample-tools/filters.ts} +86 -81
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.module.css +16 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/SampleFilter.tsx +288 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/completions.ts +346 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/language.ts +19 -0
- inspect_ai/_view/www/src/samples/sample-tools/sample-filter/tokenize.ts +97 -0
- inspect_ai/_view/www/src/samples/{SampleLimit.mjs → sampleLimit.ts} +3 -6
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.module.css +53 -0
- inspect_ai/_view/www/src/samples/scores/SampleScoreView.tsx +168 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.module.css +5 -0
- inspect_ai/_view/www/src/samples/scores/SampleScores.tsx +37 -0
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.tsx +66 -0
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.tsx +51 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.tsx +54 -0
- inspect_ai/_view/www/src/samples/transcript/InputEventView.tsx +48 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.module.css +6 -0
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.tsx +36 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.module.css +43 -0
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.tsx +223 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.module.css +23 -0
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.tsx +112 -0
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.tsx +75 -0
- inspect_ai/_view/www/src/samples/transcript/SampleTranscript.tsx +22 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.module.css +15 -0
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.tsx +100 -0
- inspect_ai/_view/www/src/samples/transcript/StepEventView.tsx +171 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.module.css +19 -0
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.tsx +133 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.tsx +92 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.module.css +49 -0
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.tsx +449 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.module.css +5 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNav.tsx +43 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.module.css +3 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventNavs.tsx +39 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.module.css +25 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventPanel.tsx +191 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.module.css +13 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventRow.tsx +32 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.module.css +8 -0
- inspect_ai/_view/www/src/samples/transcript/event/EventSection.tsx +29 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.tsx +67 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.tsx +285 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenders.module.css +10 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.module.css +9 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.tsx +346 -0
- inspect_ai/_view/www/src/samples/transcript/types.ts +58 -0
- inspect_ai/_view/www/src/types/log.d.ts +108 -19
- inspect_ai/_view/www/src/types/prism.d.ts +11 -0
- inspect_ai/_view/www/src/types.ts +71 -0
- inspect_ai/_view/www/src/usage/ModelTokenTable.tsx +28 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.module.css +24 -0
- inspect_ai/_view/www/src/usage/ModelUsagePanel.tsx +97 -0
- inspect_ai/_view/www/src/usage/TokenTable.module.css +17 -0
- inspect_ai/_view/www/src/usage/TokenTable.tsx +91 -0
- inspect_ai/_view/www/src/usage/UsageCard.module.css +15 -0
- inspect_ai/_view/www/src/usage/UsageCard.tsx +67 -0
- inspect_ai/_view/www/src/utils/attachments.ts +42 -0
- inspect_ai/_view/www/src/utils/{Base64.mjs → base64.ts} +1 -6
- inspect_ai/_view/www/src/{components/Browser.mjs → utils/browser.ts} +0 -1
- inspect_ai/_view/www/src/utils/debugging.ts +28 -0
- inspect_ai/_view/www/src/utils/dom.ts +30 -0
- inspect_ai/_view/www/src/utils/format.ts +194 -0
- inspect_ai/_view/www/src/utils/git.ts +7 -0
- inspect_ai/_view/www/src/utils/html.ts +6 -0
- inspect_ai/_view/www/src/utils/http.ts +14 -0
- inspect_ai/_view/www/src/utils/{Path.mjs → path.ts} +2 -9
- inspect_ai/_view/www/src/utils/{Print.mjs → print.ts} +34 -26
- inspect_ai/_view/www/src/utils/queue.ts +51 -0
- inspect_ai/_view/www/src/utils/sync.ts +114 -0
- inspect_ai/_view/www/src/utils/{Type.mjs → type.ts} +3 -6
- inspect_ai/_view/www/src/utils/vscode.ts +13 -0
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +324 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.module.css +33 -0
- inspect_ai/_view/www/src/workspace/WorkSpaceView.tsx +158 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.module.css +3 -0
- inspect_ai/_view/www/src/workspace/error/TaskErrorPanel.tsx +28 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.module.css +54 -0
- inspect_ai/_view/www/src/workspace/navbar/Navbar.tsx +68 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.module.css +52 -0
- inspect_ai/_view/www/src/workspace/navbar/PrimaryBar.tsx +114 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.module.css +90 -0
- inspect_ai/_view/www/src/workspace/navbar/ResultsPanel.tsx +180 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.module.css +28 -0
- inspect_ai/_view/www/src/workspace/navbar/SecondaryBar.tsx +226 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.module.css +14 -0
- inspect_ai/_view/www/src/workspace/navbar/StatusPanel.tsx +61 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.module.css +15 -0
- inspect_ai/_view/www/src/workspace/sidebar/EvalStatus.tsx +71 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +5 -0
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +56 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.module.css +68 -0
- inspect_ai/_view/www/src/workspace/sidebar/Sidebar.tsx +85 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.module.css +29 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarLogEntry.tsx +95 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.module.css +23 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoreView.tsx +43 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.module.css +35 -0
- inspect_ai/_view/www/src/workspace/sidebar/SidebarScoresView.tsx +63 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.module.css +0 -0
- inspect_ai/_view/www/src/workspace/tabs/InfoTab.tsx +70 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.module.css +5 -0
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +46 -0
- inspect_ai/_view/www/src/workspace/tabs/SamplesTab.tsx +204 -0
- inspect_ai/_view/www/src/workspace/tabs/grouping.ts +195 -0
- inspect_ai/_view/www/src/workspace/tabs/types.ts +19 -0
- inspect_ai/_view/www/src/workspace/types.ts +10 -0
- inspect_ai/_view/www/src/workspace/utils.ts +34 -0
- inspect_ai/_view/www/tsconfig.json +23 -9
- inspect_ai/_view/www/vite.config.js +8 -17
- inspect_ai/_view/www/yarn.lock +627 -556
- inspect_ai/approval/_approval.py +2 -0
- inspect_ai/approval/_approver.py +4 -4
- inspect_ai/approval/_auto.py +1 -1
- inspect_ai/approval/_human/approver.py +3 -0
- inspect_ai/approval/_policy.py +5 -0
- inspect_ai/approval/_registry.py +2 -2
- inspect_ai/dataset/_dataset.py +64 -37
- inspect_ai/dataset/_sources/__init__.py +0 -0
- inspect_ai/dataset/_sources/csv.py +20 -12
- inspect_ai/dataset/_sources/file.py +4 -0
- inspect_ai/dataset/_sources/hf.py +39 -29
- inspect_ai/dataset/_sources/json.py +17 -9
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_convert.py +3 -3
- inspect_ai/log/_file.py +24 -9
- inspect_ai/log/_log.py +101 -13
- inspect_ai/log/_message.py +4 -2
- inspect_ai/log/_recorders/file.py +4 -0
- inspect_ai/log/_recorders/json.py +5 -7
- inspect_ai/log/_recorders/recorder.py +3 -0
- inspect_ai/log/_transcript.py +19 -8
- inspect_ai/model/__init__.py +2 -0
- inspect_ai/model/_cache.py +39 -21
- inspect_ai/model/_call_tools.py +4 -3
- inspect_ai/model/_chat_message.py +14 -4
- inspect_ai/model/_generate_config.py +1 -1
- inspect_ai/model/_model.py +31 -24
- inspect_ai/model/_model_output.py +14 -1
- inspect_ai/model/_openai.py +10 -18
- inspect_ai/model/_providers/anthropic.py +3 -3
- inspect_ai/model/_providers/google.py +9 -5
- inspect_ai/model/_providers/openai.py +5 -9
- inspect_ai/model/_providers/openai_o1.py +3 -5
- inspect_ai/model/_providers/openrouter.py +86 -0
- inspect_ai/model/_providers/providers.py +11 -0
- inspect_ai/scorer/__init__.py +6 -1
- inspect_ai/scorer/_answer.py +7 -7
- inspect_ai/scorer/_classification.py +38 -18
- inspect_ai/scorer/_common.py +2 -8
- inspect_ai/scorer/_match.py +4 -5
- inspect_ai/scorer/_metric.py +87 -28
- inspect_ai/scorer/_metrics/__init__.py +3 -3
- inspect_ai/scorer/_metrics/accuracy.py +8 -10
- inspect_ai/scorer/_metrics/mean.py +3 -17
- inspect_ai/scorer/_metrics/std.py +111 -30
- inspect_ai/scorer/_model.py +12 -12
- inspect_ai/scorer/_pattern.py +3 -3
- inspect_ai/scorer/_reducer/reducer.py +36 -21
- inspect_ai/scorer/_reducer/registry.py +2 -2
- inspect_ai/scorer/_reducer/types.py +7 -1
- inspect_ai/scorer/_score.py +11 -1
- inspect_ai/scorer/_scorer.py +110 -16
- inspect_ai/solver/__init__.py +1 -1
- inspect_ai/solver/_basic_agent.py +19 -22
- inspect_ai/solver/_bridge/__init__.py +0 -3
- inspect_ai/solver/_bridge/bridge.py +3 -3
- inspect_ai/solver/_chain.py +1 -2
- inspect_ai/solver/_critique.py +3 -3
- inspect_ai/solver/_fork.py +2 -2
- inspect_ai/solver/_human_agent/__init__.py +0 -0
- inspect_ai/solver/_human_agent/agent.py +5 -8
- inspect_ai/solver/_human_agent/commands/clock.py +14 -10
- inspect_ai/solver/_human_agent/commands/note.py +1 -1
- inspect_ai/solver/_human_agent/commands/score.py +0 -11
- inspect_ai/solver/_multiple_choice.py +38 -26
- inspect_ai/solver/_prompt.py +7 -7
- inspect_ai/solver/_solver.py +53 -52
- inspect_ai/solver/_task_state.py +80 -69
- inspect_ai/solver/_use_tools.py +9 -9
- inspect_ai/tool/__init__.py +4 -1
- inspect_ai/tool/_tool.py +43 -14
- inspect_ai/tool/_tool_call.py +6 -2
- inspect_ai/tool/_tool_choice.py +3 -1
- inspect_ai/tool/_tool_def.py +10 -8
- inspect_ai/tool/_tool_params.py +24 -0
- inspect_ai/tool/_tool_with.py +7 -7
- inspect_ai/tool/_tools/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_common.py +2 -2
- inspect_ai/tool/{beta → _tools}/_computer/_computer.py +13 -5
- inspect_ai/tool/_tools/_computer/_resources/tool/__init__.py +0 -0
- inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_x11_client.py +1 -1
- inspect_ai/tool/_tools/_computer/_resources/tool/requirements.txt +0 -0
- inspect_ai/tool/_tools/_execute.py +23 -11
- inspect_ai/tool/_tools/_web_browser/_resources/README.md +2 -2
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +5 -3
- inspect_ai/tool/_tools/_web_search.py +7 -5
- inspect_ai/tool/beta.py +3 -0
- inspect_ai/util/_concurrency.py +3 -3
- inspect_ai/util/_panel.py +2 -0
- inspect_ai/util/_resource.py +12 -12
- inspect_ai/util/_sandbox/docker/compose.py +23 -20
- inspect_ai/util/_sandbox/docker/config.py +2 -1
- inspect_ai/util/_sandbox/docker/docker.py +42 -86
- inspect_ai/util/_sandbox/docker/service.py +100 -0
- inspect_ai/util/_sandbox/environment.py +99 -96
- inspect_ai/util/_sandbox/self_check.py +124 -16
- inspect_ai/util/_subprocess.py +5 -3
- inspect_ai/util/_subtask.py +15 -16
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/LICENSE +1 -1
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/METADATA +11 -6
- inspect_ai-0.3.64.dist-info/RECORD +625 -0
- inspect_ai/_view/www/src/Register.mjs +0 -3
- inspect_ai/_view/www/src/Types.mjs +0 -38
- inspect_ai/_view/www/src/appearance/Colors.mjs +0 -27
- inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -66
- inspect_ai/_view/www/src/appearance/Icons.mjs +0 -240
- inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -184
- inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -34
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +0 -74
- inspect_ai/_view/www/src/components/Card.mjs +0 -126
- inspect_ai/_view/www/src/components/ChatView.mjs +0 -441
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -48
- inspect_ai/_view/www/src/components/Dialog.mjs +0 -61
- inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -15
- inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -29
- inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -23
- inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -66
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -136
- inspect_ai/_view/www/src/components/FindBand.mjs +0 -157
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +0 -168
- inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -61
- inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -32
- inspect_ai/_view/www/src/components/LargeModal.mjs +0 -190
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +0 -217
- inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -118
- inspect_ai/_view/www/src/components/MessageBand.mjs +0 -48
- inspect_ai/_view/www/src/components/MessageContent.mjs +0 -111
- inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -92
- inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -109
- inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -50
- inspect_ai/_view/www/src/components/NavPills.mjs +0 -63
- inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -51
- inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -54
- inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -19
- inspect_ai/_view/www/src/components/TabSet.mjs +0 -184
- inspect_ai/_view/www/src/components/ToolButton.mjs +0 -16
- inspect_ai/_view/www/src/components/Tools.mjs +0 -376
- inspect_ai/_view/www/src/components/VirtualList.mjs +0 -280
- inspect_ai/_view/www/src/components/ansi-output.js +0 -932
- inspect_ai/_view/www/src/json/JsonTab.mjs +0 -48
- inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -25
- inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -13
- inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -263
- inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -418
- inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -175
- inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -418
- inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -123
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -516
- inspect_ai/_view/www/src/samples/SampleError.mjs +0 -99
- inspect_ai/_view/www/src/samples/SampleList.mjs +0 -427
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -172
- inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -34
- inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -20
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -771
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -399
- inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -64
- inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -38
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -756
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -141
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -151
- inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -71
- inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -271
- inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -46
- inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -33
- inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -59
- inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -32
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +0 -216
- inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -107
- inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -74
- inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -100
- inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -187
- inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -133
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -88
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +0 -459
- inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -44
- inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -53
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +0 -254
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +0 -313
- inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -418
- inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -72
- inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -159
- inspect_ai/_view/www/src/utils/Format.mjs +0 -260
- inspect_ai/_view/www/src/utils/Git.mjs +0 -12
- inspect_ai/_view/www/src/utils/Html.mjs +0 -21
- inspect_ai/_view/www/src/utils/attachments.mjs +0 -31
- inspect_ai/_view/www/src/utils/debugging.mjs +0 -23
- inspect_ai/_view/www/src/utils/http.mjs +0 -18
- inspect_ai/_view/www/src/utils/queue.mjs +0 -67
- inspect_ai/_view/www/src/utils/sync.mjs +0 -101
- inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -17
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -516
- inspect_ai/tool/beta/__init__.py +0 -5
- inspect_ai-0.3.62.dist-info/RECORD +0 -481
- /inspect_ai/{tool/beta/_computer/_resources/tool → _eval}/__init__.py +0 -0
- /inspect_ai/{tool/beta/_computer/_resources/tool/requirements.txt → _util/__init__.py} +0 -0
- /inspect_ai/_view/www/src/{constants.mjs → constants.ts} +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/__init__.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_computer_split.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/Dockerfile +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/README.md +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/entrypoint.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/novnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/x11vnc_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xfce_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/entrypoint/xvfb_startup.sh +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/globalStorage/state.vscdb +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/Code/User/settings.json +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-panel.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/.config/xfce4/xfconf/xfce-perchannel-xml/xfce4-screensaver.xml +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Terminal.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_logger.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_run.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/_tool_result.py +0 -0
- /inspect_ai/tool/{beta → _tools}/_computer/_resources/tool/computer_tool.py +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/WHEEL +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.62.dist-info → inspect_ai-0.3.64.dist-info}/top_level.txt +0 -0
@@ -123,11 +123,11 @@ async def computer_sandbox() -> SandboxEnvironment:
|
|
123
123
|
else:
|
124
124
|
raise PrerequisiteError(
|
125
125
|
dedent("""
|
126
|
-
The computer tool service was not found in any of the sandboxes for this sample. Please add the computer tool service to your configuration. For example, the following Docker compose file uses the aisiuk/inspect-computer-tool:latest
|
126
|
+
The computer tool service was not found in any of the sandboxes for this sample. Please add the computer tool service to your configuration. For example, the following Docker compose file uses the aisiuk/inspect-computer-tool:latest image as its default sandbox:
|
127
127
|
|
128
128
|
services:
|
129
129
|
default:
|
130
|
-
image: "aisiuk/inspect-computer-tool:latest
|
130
|
+
image: "aisiuk/inspect-computer-tool:latest"
|
131
131
|
init: true
|
132
132
|
""").strip()
|
133
133
|
)
|
@@ -2,10 +2,7 @@ from typing import Awaitable, Callable
|
|
2
2
|
|
3
3
|
from inspect_ai._util.content import Content, ContentImage, ContentText
|
4
4
|
from inspect_ai.tool import Tool, ToolResult, tool
|
5
|
-
from inspect_ai.tool._tool import
|
6
|
-
TOOL_INIT_MODEL_INPUT,
|
7
|
-
ToolParsingError,
|
8
|
-
)
|
5
|
+
from inspect_ai.tool._tool import TOOL_INIT_MODEL_INPUT, ToolParsingError
|
9
6
|
from inspect_ai.tool._tool_call import ToolCallModelInput
|
10
7
|
|
11
8
|
from . import _common as common
|
@@ -16,6 +13,17 @@ ActionFunction = Callable[[str], ToolResult | Awaitable[ToolResult]]
|
|
16
13
|
|
17
14
|
@tool
|
18
15
|
def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool:
|
16
|
+
"""Desktop computer tool.
|
17
|
+
|
18
|
+
See documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-computer>.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
max_screenshots: The maximum number of screenshots to play
|
22
|
+
back to the model as input. Defaults to 1 (set to `None` to have no limit).
|
23
|
+
timeout: Timeout in seconds for computer tool actions.
|
24
|
+
Defaults to 180 (set to `None` for no timeout).
|
25
|
+
"""
|
26
|
+
|
19
27
|
async def execute(
|
20
28
|
action: Action,
|
21
29
|
text: str | None = None,
|
@@ -84,7 +92,7 @@ def computer(max_screenshots: int | None = 1, timeout: int | None = 180) -> Tool
|
|
84
92
|
if coordinate is not None:
|
85
93
|
raise ToolParsingError(f"coordinate is not accepted for {action}")
|
86
94
|
if not isinstance(text, str):
|
87
|
-
raise ToolParsingError(
|
95
|
+
raise ToolParsingError(f"{text} must be a string")
|
88
96
|
|
89
97
|
if action == "key":
|
90
98
|
return await common.press_key(text, timeout=timeout)
|
File without changes
|
@@ -138,7 +138,7 @@ class X11Client:
|
|
138
138
|
if coordinate is not None:
|
139
139
|
raise ToolError(f"coordinate is not accepted for {action}")
|
140
140
|
if not isinstance(text, str):
|
141
|
-
raise ToolError(
|
141
|
+
raise ToolError(f"{text} must be a string")
|
142
142
|
|
143
143
|
if action == "key":
|
144
144
|
return await self.shell(
|
File without changes
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from inspect_ai.util import sandbox
|
1
|
+
from inspect_ai.util import sandbox as sandbox_env
|
2
2
|
|
3
3
|
from .._tool import Tool, tool
|
4
4
|
from .._tool_call import ToolCall, ToolCallContent, ToolCallView, ToolCallViewer
|
@@ -20,14 +20,17 @@ def code_viewer(language: str, code_param: str) -> ToolCallViewer:
|
|
20
20
|
|
21
21
|
|
22
22
|
@tool(viewer=code_viewer("bash", "cmd"))
|
23
|
-
def bash(
|
23
|
+
def bash(
|
24
|
+
timeout: int | None = None, user: str | None = None, sandbox: str | None = None
|
25
|
+
) -> Tool:
|
24
26
|
"""Bash shell command execution tool.
|
25
27
|
|
26
28
|
Execute bash shell commands using a sandbox environment (e.g. "docker").
|
27
29
|
|
28
30
|
Args:
|
29
|
-
timeout
|
30
|
-
user
|
31
|
+
timeout: Timeout (in seconds) for command.
|
32
|
+
user: User to execute commands as.
|
33
|
+
sandbox: Optional sandbox environmnent name.
|
31
34
|
|
32
35
|
Returns:
|
33
36
|
String with command output (stdout) or command error (stderr).
|
@@ -44,7 +47,7 @@ def bash(timeout: int | None = None, user: str | None = None) -> Tool:
|
|
44
47
|
The output of the command.
|
45
48
|
"""
|
46
49
|
# execute the command
|
47
|
-
result = await sandbox
|
50
|
+
result = await sandbox_env(sandbox).exec(
|
48
51
|
cmd=["bash", "--login", "-c", cmd], timeout=timeout, user=user
|
49
52
|
)
|
50
53
|
# return output (including stderr if any)
|
@@ -57,14 +60,17 @@ def bash(timeout: int | None = None, user: str | None = None) -> Tool:
|
|
57
60
|
|
58
61
|
|
59
62
|
@tool(viewer=code_viewer("python", "code"))
|
60
|
-
def python(
|
63
|
+
def python(
|
64
|
+
timeout: int | None = None, user: str | None = None, sandbox: str | None = None
|
65
|
+
) -> Tool:
|
61
66
|
"""Python code execution tool.
|
62
67
|
|
63
68
|
Execute Python code using a sandbox environment (e.g. "docker").
|
64
69
|
|
65
70
|
Args:
|
66
|
-
timeout
|
67
|
-
user
|
71
|
+
timeout: Timeout (in seconds) for command.
|
72
|
+
user: User to execute commands as.
|
73
|
+
sandbox: Optional sandbox environmnent name.
|
68
74
|
|
69
75
|
Returns:
|
70
76
|
String with command output (stdout) or command error (stderr).
|
@@ -74,8 +80,14 @@ def python(timeout: int | None = None, user: str | None = None) -> Tool:
|
|
74
80
|
"""
|
75
81
|
Use the python function to execute Python code.
|
76
82
|
|
77
|
-
The
|
78
|
-
|
83
|
+
The Python tool executes single-run Python scripts. Important notes:
|
84
|
+
1. Each execution is independent - no state is preserved between runs
|
85
|
+
2. You must explicitly use print() statements to see any output
|
86
|
+
3. Simply writing expressions (like in notebooks) will not display results
|
87
|
+
4. The script cannot accept interactive input during execution
|
88
|
+
5. Return statements alone won't produce visible output
|
89
|
+
6. All variables and imports are cleared between executions
|
90
|
+
7. Standard output (via print()) is the only way to see results
|
79
91
|
|
80
92
|
Args:
|
81
93
|
code (str): The python code to execute.
|
@@ -83,7 +95,7 @@ def python(timeout: int | None = None, user: str | None = None) -> Tool:
|
|
83
95
|
Returns:
|
84
96
|
The output of the Python code.
|
85
97
|
"""
|
86
|
-
result = await sandbox
|
98
|
+
result = await sandbox_env(sandbox).exec(
|
87
99
|
cmd=["python3"], input=code, timeout=timeout, user=user
|
88
100
|
)
|
89
101
|
# return output (including stderr if any)
|
@@ -40,7 +40,7 @@ The result will be printed out in _stdout_ in the following format:
|
|
40
40
|
error: <an ERROR message if one occured>
|
41
41
|
info: <general info about the container>
|
42
42
|
web_url: <the URL of the page the browser is currently at>
|
43
|
-
|
43
|
+
web_at: <accessibility tree of the visible elements of the page>
|
44
44
|
```
|
45
45
|
|
46
46
|
|
@@ -57,7 +57,7 @@ The tool consists of the following components:
|
|
57
57
|
* _web_environment.py_ - an environment which gets instantiated by the servicer and which launches the browser, stores its state and maps client commands to Playwright API.
|
58
58
|
* _playwright_crawler.py_ - a wrapper over the sync Playwright API.
|
59
59
|
|
60
|
-
* [WebClient](web_client.py) - a simple stateless client to
|
60
|
+
* [WebClient](web_client.py) - a simple stateless client to interact with the server. When launched, the client:
|
61
61
|
1. creates a connection with the server;
|
62
62
|
2. sends user command to the server;
|
63
63
|
3. receives the response in the form of observations and prints them to stdout;
|
@@ -16,10 +16,12 @@ from inspect_ai.util._store_model import StoreModel, store_as
|
|
16
16
|
def web_browser(interactive: bool = True) -> list[Tool]:
|
17
17
|
"""Tools used for web browser navigation.
|
18
18
|
|
19
|
+
See documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-web-browser>.
|
20
|
+
|
19
21
|
Args:
|
20
|
-
interactive
|
21
|
-
|
22
|
-
|
22
|
+
interactive: Provide interactive tools (enable
|
23
|
+
clicking, typing, and submitting forms). Defaults
|
24
|
+
to True.
|
23
25
|
|
24
26
|
Returns:
|
25
27
|
List of tools used for web browser navigation.
|
@@ -41,14 +41,16 @@ def web_search(
|
|
41
41
|
A web search is conducted using the specified provider, the results are parsed for relevance
|
42
42
|
using the specified model, and the top 'num_results' relevant pages are returned.
|
43
43
|
|
44
|
+
See further documentation at <https://inspect.ai-safety-institute.org.uk/tools.html#sec-web-search>.
|
45
|
+
|
44
46
|
Args:
|
45
|
-
provider
|
47
|
+
provider: Search provider (defaults to "google", currently
|
46
48
|
the only provider). Possible future providers include "brave" and "bing".
|
47
|
-
num_results
|
48
|
-
max_provider_calls
|
49
|
-
max_connections
|
49
|
+
num_results: Number of web search result pages to return to the model.
|
50
|
+
max_provider_calls: Maximum number of search calls to make to the search provider.
|
51
|
+
max_connections: Maximum number of concurrent connections to API
|
50
52
|
endpoint of search provider.
|
51
|
-
model
|
53
|
+
model: Model used to parse web pages for relevance.
|
52
54
|
|
53
55
|
Returns:
|
54
56
|
A tool that can be registered for use by models to search the web.
|
inspect_ai/tool/beta.py
ADDED
inspect_ai/util/_concurrency.py
CHANGED
@@ -23,12 +23,12 @@ def concurrency(
|
|
23
23
|
for launching subprocesses is handled via the `subprocess` function.
|
24
24
|
|
25
25
|
Args:
|
26
|
-
name
|
26
|
+
name: Name for concurrency context. This serves as the
|
27
27
|
display name for the context, and also the unique context
|
28
28
|
key (if the `key` parameter is omitted)
|
29
|
-
concurrency
|
29
|
+
concurrency: Maximum number of coroutines that can
|
30
30
|
enter the context.
|
31
|
-
key
|
31
|
+
key: Unique context key for this context. Optional.
|
32
32
|
Used if the unique key isn't human readable -- e.g. includes
|
33
33
|
api tokens or account ids so that the more readable `name`
|
34
34
|
can be presented to users e.g in console UI>
|
inspect_ai/util/_panel.py
CHANGED
inspect_ai/util/_resource.py
CHANGED
@@ -33,18 +33,18 @@ def resource(
|
|
33
33
|
`resource("templates/prompt.txt", type="file")`
|
34
34
|
|
35
35
|
Args:
|
36
|
-
resource
|
37
|
-
|
38
|
-
|
39
|
-
type
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
fs_options
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
36
|
+
resource: Path to local or remote (e.g. s3://)
|
37
|
+
resource, or for `type="auto"` (the default),
|
38
|
+
a string containing the literal resource value.
|
39
|
+
type: For "auto" (the default),
|
40
|
+
interpret the resource as a literal string if its not
|
41
|
+
a valid path. For "file", always interpret it as
|
42
|
+
a file path.
|
43
|
+
fs_options: Optional. Additional
|
44
|
+
arguments to pass through to the `fsspec` filesystem
|
45
|
+
provider (e.g. `S3FileSystem`). Use `{"anon": True }`
|
46
|
+
if you are accessing a public S3 bucket with no
|
47
|
+
credentials.
|
48
48
|
|
49
49
|
Returns:
|
50
50
|
Text content of resource.
|
@@ -3,12 +3,13 @@ import os
|
|
3
3
|
import shlex
|
4
4
|
from logging import getLogger
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any, Literal,
|
6
|
+
from typing import Any, Literal, cast
|
7
7
|
|
8
8
|
import yaml
|
9
9
|
from pydantic import BaseModel
|
10
10
|
|
11
11
|
from inspect_ai._util.error import PrerequisiteError
|
12
|
+
from inspect_ai._util.trace import trace_message
|
12
13
|
from inspect_ai.util._display import display_type
|
13
14
|
from inspect_ai.util._subprocess import ExecResult, subprocess
|
14
15
|
|
@@ -16,26 +17,39 @@ from .prereqs import (
|
|
16
17
|
DOCKER_COMPOSE_REQUIRED_VERSION_PULL_POLICY,
|
17
18
|
validate_docker_compose,
|
18
19
|
)
|
20
|
+
from .service import ComposeService, services_healthcheck_time
|
19
21
|
from .util import ComposeProject, is_inspect_project
|
20
22
|
|
21
23
|
logger = getLogger(__name__)
|
22
24
|
|
23
25
|
# How long to wait for compose environment to pass a health check
|
24
|
-
COMPOSE_WAIT =
|
26
|
+
COMPOSE_WAIT = 120
|
25
27
|
|
26
28
|
|
27
|
-
async def compose_up(
|
29
|
+
async def compose_up(
|
30
|
+
project: ComposeProject, services: dict[str, ComposeService]
|
31
|
+
) -> None:
|
32
|
+
# compute the maximum amount of time we will
|
33
|
+
up_command = ["up", "--detach", "--wait"]
|
34
|
+
|
35
|
+
# are there healthchecks in the service definitions? if so then peg our timeout
|
36
|
+
# at the maximum total wait time. otherwise, pick a reasonable default
|
37
|
+
healthcheck_time = services_healthcheck_time(services)
|
38
|
+
if healthcheck_time > 0:
|
39
|
+
timeout: int = healthcheck_time
|
40
|
+
trace_message(logger, "Docker", "Docker services heathcheck timeout: {timeout}")
|
41
|
+
else:
|
42
|
+
timeout = COMPOSE_WAIT
|
43
|
+
|
44
|
+
# align global wait timeout to maximum healthcheck timeout
|
45
|
+
up_command.extend(["--wait-timeout", str(timeout + 1)])
|
46
|
+
|
28
47
|
# Start the environment. Note that we don't check the result because docker will
|
29
48
|
# return a non-zero exit code for services that exit (even successfully) when
|
30
49
|
# passing the --wait flag (see https://github.com/docker/compose/issues/10596).
|
31
50
|
# In practice, we will catch any errors when calling compose_check_running()
|
32
51
|
# immediately after we call compose_up().
|
33
|
-
await compose_command(
|
34
|
-
["up", "--detach", "--wait", "--wait-timeout", COMPOSE_WAIT],
|
35
|
-
project=project,
|
36
|
-
# wait up to 5 minutes for container to go up (compose wait + 3 minutes)
|
37
|
-
timeout=300,
|
38
|
-
)
|
52
|
+
await compose_command(up_command, project=project, timeout=timeout)
|
39
53
|
|
40
54
|
|
41
55
|
async def compose_down(project: ComposeProject, quiet: bool = True) -> None:
|
@@ -191,17 +205,6 @@ async def compose_exec(
|
|
191
205
|
)
|
192
206
|
|
193
207
|
|
194
|
-
ComposeService = TypedDict(
|
195
|
-
"ComposeService",
|
196
|
-
{
|
197
|
-
"image": str | None,
|
198
|
-
"build": str | None,
|
199
|
-
"x-default": bool | None,
|
200
|
-
"x-local": bool | None,
|
201
|
-
},
|
202
|
-
)
|
203
|
-
|
204
|
-
|
205
208
|
async def compose_services(project: ComposeProject) -> dict[str, ComposeService]:
|
206
209
|
result = await compose_command(["config"], project=project, timeout=60)
|
207
210
|
if not result.success:
|
@@ -42,7 +42,8 @@ def find_compose_file(parent: str = "") -> str | None:
|
|
42
42
|
|
43
43
|
|
44
44
|
def is_dockerfile(file: str) -> bool:
|
45
|
-
|
45
|
+
path = Path(file)
|
46
|
+
return path.name == DOCKERFILE or path.suffix == f".{DOCKERFILE}"
|
46
47
|
|
47
48
|
|
48
49
|
def has_dockerfile(parent: str = "") -> bool:
|
@@ -1,3 +1,4 @@
|
|
1
|
+
import base64
|
1
2
|
import errno
|
2
3
|
import json
|
3
4
|
import os
|
@@ -8,6 +9,7 @@ from typing import Literal, Union, cast, overload
|
|
8
9
|
|
9
10
|
from typing_extensions import override
|
10
11
|
|
12
|
+
from inspect_ai._util.error import PrerequisiteError
|
11
13
|
from inspect_ai.util._subprocess import ExecResult, subprocess
|
12
14
|
|
13
15
|
from ..environment import (
|
@@ -34,7 +36,6 @@ from .compose import (
|
|
34
36
|
compose_build,
|
35
37
|
compose_check_running,
|
36
38
|
compose_cleanup_images,
|
37
|
-
compose_command,
|
38
39
|
compose_cp,
|
39
40
|
compose_exec,
|
40
41
|
compose_ps,
|
@@ -85,6 +86,14 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
85
86
|
|
86
87
|
services = await compose_services(project)
|
87
88
|
for name, service in services.items():
|
89
|
+
# if the service has an explicit container_name then
|
90
|
+
# error (as this won't work w/ epochs > 1)
|
91
|
+
container_name = service.get("container_name", None)
|
92
|
+
if container_name:
|
93
|
+
raise PrerequisiteError(
|
94
|
+
f"ERROR: Docker service '{name}' includes an explicitly configured container_name ('{container_name}'). This is not permitted, as container names should be provisioned by Docker compose and an explicit container_name will not work with epochs > 1."
|
95
|
+
)
|
96
|
+
|
88
97
|
# build internal images
|
89
98
|
image = service.get("image", None)
|
90
99
|
if image and is_internal_image(image):
|
@@ -139,7 +148,7 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
139
148
|
services = await compose_services(project)
|
140
149
|
|
141
150
|
# start the services
|
142
|
-
await compose_up(project)
|
151
|
+
await compose_up(project, services)
|
143
152
|
|
144
153
|
# check to ensure that the services are running
|
145
154
|
running_services = await compose_check_running(
|
@@ -270,103 +279,50 @@ class DockerSandboxEnvironment(SandboxEnvironment):
|
|
270
279
|
|
271
280
|
@override
|
272
281
|
async def write_file(self, file: str, contents: str | bytes) -> None:
|
273
|
-
# exec function w/ timeout
|
274
|
-
async def exec(cmd: list[str]) -> ExecResult[str]:
|
275
|
-
return await self.exec(cmd, timeout=60)
|
276
|
-
|
277
282
|
# resolve relative file paths
|
278
283
|
file = self.container_file(file)
|
279
284
|
|
280
|
-
#
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
285
|
+
# ensure that the directory exists
|
286
|
+
parent = Path(file).parent.as_posix()
|
287
|
+
if parent != ".":
|
288
|
+
result = await self.exec(["mkdir", "-p", parent])
|
289
|
+
if not result.success:
|
290
|
+
msg = f"Failed to create container directory {parent}: {result.stderr}"
|
291
|
+
raise RuntimeError(msg)
|
287
292
|
|
288
|
-
# write
|
293
|
+
# write the file
|
289
294
|
if isinstance(contents, str):
|
290
|
-
|
295
|
+
result = await self.exec(
|
296
|
+
["sh", "-e", "-c", 'tee -- "$1"', "write_file_script", file],
|
297
|
+
input=contents,
|
298
|
+
)
|
291
299
|
else:
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
container_tmpfile = (
|
309
|
-
f".tmp_inspect_sandbox_{os.path.basename(local_tmpfile.name)}"
|
310
|
-
)
|
311
|
-
|
312
|
-
# compose cp will leave the file owned by root
|
313
|
-
await compose_cp(
|
314
|
-
src=local_tmpfile.name,
|
315
|
-
dest=f"{self._service}:{self.container_file(container_tmpfile)}",
|
316
|
-
project=self._project,
|
317
|
-
)
|
318
|
-
|
319
|
-
local_tmpfile.close() # this will also delete the file
|
320
|
-
|
321
|
-
if not hasattr(self, "_docker_user"):
|
322
|
-
uid = (await exec(["id", "-u"])).stdout.strip()
|
323
|
-
gid = (await exec(["id", "-g"])).stdout.strip()
|
324
|
-
self._docker_user = (uid, gid)
|
325
|
-
|
326
|
-
await compose_command(
|
327
|
-
[
|
328
|
-
"exec",
|
329
|
-
"--user",
|
330
|
-
"root",
|
331
|
-
self._service,
|
332
|
-
"chown",
|
333
|
-
f"{self._docker_user[0]}:{self._docker_user[1]}",
|
334
|
-
container_tmpfile,
|
335
|
-
],
|
336
|
-
project=self._project,
|
337
|
-
timeout=60,
|
338
|
-
)
|
339
|
-
|
340
|
-
parent = PurePosixPath(file).parent
|
341
|
-
|
342
|
-
# We do these steps in a shell script for efficiency to avoid round-trips to docker.
|
343
|
-
res_cp = await exec(
|
344
|
-
[
|
345
|
-
"sh",
|
346
|
-
"-e",
|
347
|
-
"-c",
|
348
|
-
'mkdir -p -- "$1"; cp -T -- "$2" "$3"; rm -- "$2"',
|
349
|
-
"copy_script",
|
350
|
-
str(parent),
|
351
|
-
container_tmpfile,
|
352
|
-
file,
|
353
|
-
]
|
354
|
-
)
|
355
|
-
|
356
|
-
if res_cp.returncode != 0:
|
357
|
-
if "Permission denied" in res_cp.stderr:
|
358
|
-
ls_result = await exec(["ls", "-la", "."])
|
359
|
-
error_string = f"Permission was denied. Error details: {res_cp.stderr}; ls -la: {ls_result.stdout}; {self._docker_user=}"
|
300
|
+
base64_contents = base64.b64encode(contents).decode("US-ASCII")
|
301
|
+
result = await self.exec(
|
302
|
+
[
|
303
|
+
"sh",
|
304
|
+
"-e",
|
305
|
+
"-c",
|
306
|
+
'base64 -d | tee -- "$1" > /dev/null',
|
307
|
+
"write_file_script",
|
308
|
+
file,
|
309
|
+
],
|
310
|
+
input=base64_contents,
|
311
|
+
)
|
312
|
+
if result.returncode != 0:
|
313
|
+
if "permission denied" in result.stderr.casefold():
|
314
|
+
ls_result = await self.exec(["ls", "-la", "."])
|
315
|
+
error_string = f"Permission was denied. Error details: {result.stderr}; ls -la: {ls_result.stdout}"
|
360
316
|
raise PermissionError(error_string)
|
361
317
|
elif (
|
362
|
-
"cannot overwrite directory" in
|
363
|
-
or "is a directory" in
|
318
|
+
"cannot overwrite directory" in result.stderr.casefold()
|
319
|
+
or "is a directory" in result.stderr.casefold()
|
364
320
|
):
|
365
321
|
raise IsADirectoryError(
|
366
322
|
f"Failed to write file: {file} because it is a directory already"
|
367
323
|
)
|
368
324
|
else:
|
369
|
-
raise RuntimeError(f"failed to copy during write_file: {
|
325
|
+
raise RuntimeError(f"failed to copy during write_file: {result}")
|
370
326
|
|
371
327
|
@overload
|
372
328
|
async def read_file(self, file: str, text: Literal[True] = True) -> str: ...
|
@@ -0,0 +1,100 @@
|
|
1
|
+
import re
|
2
|
+
from dataclasses import dataclass
|
3
|
+
from typing import TypedDict
|
4
|
+
|
5
|
+
|
6
|
+
class ComposeServiceHealthcheck(TypedDict, total=False):
|
7
|
+
start_period: str
|
8
|
+
interval: str
|
9
|
+
retries: int
|
10
|
+
timeout: str
|
11
|
+
|
12
|
+
|
13
|
+
ComposeService = TypedDict(
|
14
|
+
"ComposeService",
|
15
|
+
{
|
16
|
+
"image": str,
|
17
|
+
"build": str,
|
18
|
+
"container_name": str,
|
19
|
+
"x-default": bool,
|
20
|
+
"x-local": bool,
|
21
|
+
"healthcheck": ComposeServiceHealthcheck,
|
22
|
+
},
|
23
|
+
total=False,
|
24
|
+
)
|
25
|
+
|
26
|
+
|
27
|
+
def services_healthcheck_time(services: dict[str, ComposeService]) -> int:
|
28
|
+
max_time = 0
|
29
|
+
|
30
|
+
for _, service in services.items():
|
31
|
+
service_time = service_healthcheck_time(service)
|
32
|
+
max_time = max(max_time, service_time)
|
33
|
+
|
34
|
+
return max_time
|
35
|
+
|
36
|
+
|
37
|
+
def service_healthcheck_time(service: ComposeService) -> int:
|
38
|
+
"""
|
39
|
+
Calculate the maximum time a single service's healthcheck could take.
|
40
|
+
|
41
|
+
The total time is:
|
42
|
+
(retries * (interval + timeout))
|
43
|
+
|
44
|
+
Default values (from Docker documentation):
|
45
|
+
- retries: 3
|
46
|
+
- interval: 30s
|
47
|
+
- timeout: 30s
|
48
|
+
"""
|
49
|
+
healthcheck = service.get("healthcheck", None)
|
50
|
+
if healthcheck is None:
|
51
|
+
return 0
|
52
|
+
|
53
|
+
# Parse duration strings with defaults
|
54
|
+
retries = healthcheck.get("retries", 3)
|
55
|
+
interval = parse_duration(healthcheck.get("interval", "30s"))
|
56
|
+
timeout = parse_duration(healthcheck.get("timeout", "30s"))
|
57
|
+
|
58
|
+
# Calculate total time in seconds
|
59
|
+
total_time = retries * (interval.seconds + timeout.seconds)
|
60
|
+
|
61
|
+
return int(total_time)
|
62
|
+
|
63
|
+
|
64
|
+
@dataclass
|
65
|
+
class Duration:
|
66
|
+
nanoseconds: int
|
67
|
+
|
68
|
+
@property
|
69
|
+
def seconds(self) -> float:
|
70
|
+
return self.nanoseconds / 1_000_000_000
|
71
|
+
|
72
|
+
|
73
|
+
def parse_duration(duration_str: str) -> Duration:
|
74
|
+
"""Parse a Docker compose style duration string."""
|
75
|
+
if not duration_str:
|
76
|
+
return Duration(0)
|
77
|
+
|
78
|
+
units = {
|
79
|
+
"ns": 1,
|
80
|
+
"us": 1_000,
|
81
|
+
"ms": 1_000_000,
|
82
|
+
"s": 1_000_000_000,
|
83
|
+
"m": 60_000_000_000,
|
84
|
+
"h": 3_600_000_000_000,
|
85
|
+
}
|
86
|
+
|
87
|
+
duration_str = "".join(duration_str.split())
|
88
|
+
pattern = re.compile(r"(\d+)([a-z]+)")
|
89
|
+
matches = pattern.findall(duration_str)
|
90
|
+
|
91
|
+
if not matches:
|
92
|
+
raise ValueError(f"Invalid duration format: {duration_str}")
|
93
|
+
|
94
|
+
total_nanoseconds = 0
|
95
|
+
for number, unit in matches:
|
96
|
+
if unit not in units:
|
97
|
+
raise ValueError(f"Invalid unit: {unit}")
|
98
|
+
total_nanoseconds += int(number) * units[unit]
|
99
|
+
|
100
|
+
return Duration(total_nanoseconds)
|