inspect-ai 0.3.90__py3-none-any.whl → 0.3.92__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +13 -0
- inspect_ai/_cli/eval.py +44 -0
- inspect_ai/_display/textual/widgets/samples.py +49 -4
- inspect_ai/_display/textual/widgets/vscode.py +4 -2
- inspect_ai/_eval/eval.py +41 -28
- inspect_ai/_eval/evalset.py +4 -0
- inspect_ai/_eval/loader.py +4 -5
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +6 -3
- inspect_ai/_eval/task/log.py +6 -0
- inspect_ai/_eval/task/run.py +108 -41
- inspect_ai/_eval/task/sandbox.py +19 -5
- inspect_ai/_util/_async.py +1 -1
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/environ.py +32 -0
- inspect_ai/_util/file.py +8 -1
- inspect_ai/_util/httpx.py +105 -22
- inspect_ai/_util/registry.py +83 -9
- inspect_ai/_util/text.py +81 -17
- inspect_ai/_util/transcript.py +9 -6
- inspect_ai/_util/vscode.py +7 -2
- inspect_ai/_view/schema.py +1 -1
- inspect_ai/_view/www/babel.config.js +11 -0
- inspect_ai/_view/www/dist/assets/index.css +3640 -3563
- inspect_ai/_view/www/dist/assets/index.js +59204 -52519
- inspect_ai/_view/www/eslint.config.mjs +10 -1
- inspect_ai/_view/www/jest.config.mjs +21 -0
- inspect_ai/_view/www/log-schema.json +111 -2
- inspect_ai/_view/www/package.json +19 -5
- inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
- inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
- inspect_ai/_view/www/src/app/App.tsx +168 -0
- inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
- inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
- inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
- inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
- inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
- inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
- inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
- inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
- inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
- inspect_ai/_view/www/src/app/routing/url.ts +43 -0
- inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
- inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +12 -4
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
- inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
- inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +11 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -7
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
- inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
- inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
- inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
- inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
- inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
- inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
- inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
- inspect_ai/_view/www/src/components/Card.tsx +1 -1
- inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
- inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
- inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
- inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
- inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
- inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
- inspect_ai/_view/www/src/constants.ts +10 -9
- inspect_ai/_view/www/src/index.tsx +27 -11
- inspect_ai/_view/www/src/state/appSlice.ts +44 -5
- inspect_ai/_view/www/src/state/hooks.ts +30 -7
- inspect_ai/_view/www/src/state/logSlice.ts +7 -5
- inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
- inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
- inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
- inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
- inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
- inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
- inspect_ai/_view/www/src/state/store.ts +9 -7
- inspect_ai/_view/www/src/state/utils.ts +1 -1
- inspect_ai/_view/www/src/tests/README.md +49 -0
- inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
- inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
- inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
- inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
- inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
- inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
- inspect_ai/_view/www/src/utils/format.ts +8 -2
- inspect_ai/_view/www/src/utils/path.ts +14 -2
- inspect_ai/_view/www/src/utils/polling.ts +1 -2
- inspect_ai/_view/www/src/utils/uri.ts +32 -0
- inspect_ai/_view/www/yarn.lock +3310 -382
- inspect_ai/agent/_handoff.py +6 -3
- inspect_ai/agent/_human/agent.py +5 -3
- inspect_ai/agent/_human/install.py +16 -7
- inspect_ai/agent/_human/panel.py +14 -1
- inspect_ai/agent/_human/service.py +5 -1
- inspect_ai/agent/_react.py +161 -128
- inspect_ai/agent/_types.py +15 -4
- inspect_ai/approval/_policy.py +2 -2
- inspect_ai/log/_file.py +30 -11
- inspect_ai/log/_log.py +7 -1
- inspect_ai/log/_recorders/eval.py +3 -0
- inspect_ai/log/_recorders/types.py +1 -0
- inspect_ai/log/_samples.py +4 -0
- inspect_ai/model/_call_tools.py +33 -17
- inspect_ai/model/_generate_config.py +10 -2
- inspect_ai/model/_model.py +41 -21
- inspect_ai/model/_model_output.py +2 -1
- inspect_ai/model/_openai.py +10 -8
- inspect_ai/model/_openai_responses.py +95 -42
- inspect_ai/model/_providers/anthropic.py +14 -12
- inspect_ai/model/_providers/google.py +191 -95
- inspect_ai/model/_providers/hf.py +1 -1
- inspect_ai/model/_providers/mistral.py +2 -3
- inspect_ai/model/_providers/openai.py +54 -17
- inspect_ai/model/_providers/openai_o1.py +1 -1
- inspect_ai/model/_providers/openai_responses.py +28 -16
- inspect_ai/model/_providers/openrouter.py +14 -0
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/util/chatapi.py +17 -7
- inspect_ai/model/_providers/vllm.py +1 -1
- inspect_ai/scorer/_metric.py +17 -1
- inspect_ai/scorer/_model.py +51 -6
- inspect_ai/scorer/_scorer.py +1 -1
- inspect_ai/solver/_human_agent.py +3 -0
- inspect_ai/solver/_plan.py +1 -1
- inspect_ai/solver/_solver.py +1 -1
- inspect_ai/solver/_use_tools.py +14 -8
- inspect_ai/tool/__init__.py +16 -1
- inspect_ai/tool/_json_rpc_helpers.py +285 -0
- inspect_ai/tool/_mcp/__init__.py +13 -0
- inspect_ai/tool/_mcp/_context.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +293 -0
- inspect_ai/tool/_mcp/_sandbox.py +104 -0
- inspect_ai/tool/_mcp/_types.py +31 -0
- inspect_ai/tool/_mcp/connection.py +60 -0
- inspect_ai/tool/_mcp/sampling.py +118 -0
- inspect_ai/tool/_mcp/server.py +112 -0
- inspect_ai/tool/_mcp/tools.py +34 -0
- inspect_ai/tool/_tool.py +13 -0
- inspect_ai/tool/_tool_def.py +24 -7
- inspect_ai/tool/_tool_support_helpers.py +129 -153
- inspect_ai/tool/_tools/_bash_session.py +11 -11
- inspect_ai/tool/_tools/_text_editor.py +6 -6
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
- inspect_ai/util/_anyio.py +31 -20
- inspect_ai/util/_json.py +20 -2
- inspect_ai/util/_sandbox/context.py +18 -7
- inspect_ai/util/_sandbox/docker/compose.py +1 -1
- inspect_ai/util/_sandbox/docker/docker.py +92 -21
- inspect_ai/util/_sandbox/environment.py +33 -2
- inspect_ai/util/_sandbox/events.py +2 -2
- inspect_ai/util/_sandbox/service.py +13 -3
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/METADATA +6 -2
- inspect_ai-0.3.92.dist-info/RECORD +732 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/App.tsx +0 -316
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
- inspect_ai-0.3.90.dist-info/RECORD +0 -705
- /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
- /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/top_level.txt +0 -0
@@ -4,197 +4,173 @@ This module provides helper code for handling JSON-RPC communication between the
|
|
4
4
|
It includes definitions for JSON-RPC request and response models, as well as functions to create and parse JSON-RPC requests and responses.
|
5
5
|
"""
|
6
6
|
|
7
|
-
import json
|
8
|
-
from itertools import count
|
9
7
|
from textwrap import dedent
|
10
|
-
from typing import
|
11
|
-
|
12
|
-
from pydantic import BaseModel, RootModel
|
8
|
+
from typing import Type
|
13
9
|
|
14
10
|
from inspect_ai._util.error import PrerequisiteError
|
15
|
-
from inspect_ai.tool._tool import ToolError, ToolParsingError
|
16
11
|
from inspect_ai.util import sandbox_with
|
17
12
|
from inspect_ai.util._sandbox.environment import SandboxEnvironment
|
18
13
|
|
14
|
+
from ._json_rpc_helpers import (
|
15
|
+
BaseModelT,
|
16
|
+
JSONRPCParamsType,
|
17
|
+
JSONRPCTransport,
|
18
|
+
ScalarT,
|
19
|
+
_rpc_call_description,
|
20
|
+
create_json_rpc_request,
|
21
|
+
)
|
22
|
+
from ._json_rpc_helpers import exec_model_request as model_request
|
23
|
+
from ._json_rpc_helpers import exec_notification as notification_helper
|
24
|
+
from ._json_rpc_helpers import exec_scalar_request as scalar_request
|
19
25
|
|
20
|
-
class JSONRPCResponseBase(BaseModel):
|
21
|
-
jsonrpc: Literal["2.0"]
|
22
|
-
id: int | float | str
|
23
|
-
|
24
|
-
|
25
|
-
class JSONRPCSuccessResponse(JSONRPCResponseBase):
|
26
|
-
result: object
|
27
|
-
|
28
|
-
|
29
|
-
class JSONRPCError(BaseModel):
|
30
|
-
"""See: https://www.jsonrpc.org/specification#error_object"""
|
31
|
-
|
32
|
-
code: int
|
33
|
-
message: str
|
34
|
-
data: object | None = None
|
35
|
-
|
36
|
-
|
37
|
-
class JSONRPCErrorResponse(JSONRPCResponseBase):
|
38
|
-
error: JSONRPCError
|
39
|
-
|
40
|
-
|
41
|
-
class JSONRPCResponse(RootModel[JSONRPCSuccessResponse | JSONRPCErrorResponse]):
|
42
|
-
pass
|
43
26
|
|
27
|
+
async def exec_scalar_request(
|
28
|
+
sandbox: SandboxEnvironment,
|
29
|
+
method: str,
|
30
|
+
params: JSONRPCParamsType,
|
31
|
+
result_type: Type[ScalarT],
|
32
|
+
timeout: int | None = None,
|
33
|
+
user: str | None = None,
|
34
|
+
) -> ScalarT:
|
35
|
+
return await scalar_request(
|
36
|
+
method,
|
37
|
+
params,
|
38
|
+
result_type,
|
39
|
+
transport=ToolSupportSandboxTransport(sandbox, timeout, user),
|
40
|
+
)
|
44
41
|
|
45
|
-
BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
|
46
|
-
StrOrModelT = TypeVar("StrOrModelT", bound=str | BaseModel)
|
47
42
|
|
48
|
-
|
43
|
+
async def exec_model_request(
|
44
|
+
sandbox: SandboxEnvironment,
|
45
|
+
method: str,
|
46
|
+
params: JSONRPCParamsType,
|
47
|
+
result_type: Type[BaseModelT],
|
48
|
+
timeout: int | None = None,
|
49
|
+
user: str | None = None,
|
50
|
+
) -> BaseModelT:
|
51
|
+
return await model_request(
|
52
|
+
method,
|
53
|
+
params,
|
54
|
+
result_type,
|
55
|
+
transport=ToolSupportSandboxTransport(sandbox, timeout, user),
|
56
|
+
)
|
49
57
|
|
50
58
|
|
51
|
-
async def
|
59
|
+
async def exec_notification(
|
52
60
|
sandbox: SandboxEnvironment,
|
53
61
|
method: str,
|
54
|
-
params:
|
55
|
-
result_cls: Type[StrOrModelT],
|
62
|
+
params: JSONRPCParamsType,
|
56
63
|
timeout: int | None = None,
|
57
64
|
user: str | None = None,
|
58
|
-
) ->
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
Note that the JSON RPC request is sent to the exec'ed program via stdin.
|
65
|
+
) -> None:
|
66
|
+
return await notification_helper(
|
67
|
+
method, params, transport=ToolSupportSandboxTransport(sandbox, timeout, user)
|
68
|
+
)
|
63
69
|
|
64
|
-
Args:
|
65
|
-
sandbox (SandboxEnvironment): The sandbox environment to execute the command in.
|
66
|
-
method (str): The JSON-RPC method to call.
|
67
|
-
params (dict[str, object] | tuple[object, ...]): The parameters for the JSON-RPC method.
|
68
|
-
result_cls (Type[BaseModelT]): The class to use for parsing the result.
|
69
|
-
timeout (int | None, optional): The timeout for the execution. Defaults to None.
|
70
|
-
user: Optional username or UID to run the command as.
|
71
70
|
|
72
|
-
|
73
|
-
|
71
|
+
class ToolSupportSandboxTransport(JSONRPCTransport):
|
72
|
+
"""
|
73
|
+
A transport callable that uses a sandbox for RPC communication.
|
74
74
|
|
75
|
-
|
76
|
-
|
77
|
-
|
75
|
+
This class implements the TransportCallable protocol and encapsulates
|
76
|
+
the sandbox, timeout, and user parameters needed for sandbox-based
|
77
|
+
RPC communication.
|
78
78
|
"""
|
79
|
-
exec_result = await sandbox.exec(
|
80
|
-
[SANDBOX_CLI, "exec"],
|
81
|
-
input=_create_json_rpc_request(method, params),
|
82
|
-
timeout=timeout,
|
83
|
-
user=user,
|
84
|
-
)
|
85
79
|
|
86
|
-
|
87
|
-
|
88
|
-
|
80
|
+
def __init__(
|
81
|
+
self,
|
82
|
+
sandbox: SandboxEnvironment,
|
83
|
+
timeout: int | None = None,
|
84
|
+
user: str | None = None,
|
85
|
+
):
|
86
|
+
"""
|
87
|
+
Initialize a new SandboxTransportCallable.
|
88
|
+
|
89
|
+
Args:
|
90
|
+
sandbox (SandboxEnvironment): The sandbox environment to use.
|
91
|
+
timeout (int | None, optional): The timeout for executions. Defaults to None.
|
92
|
+
user (str | None, optional): Username or UID to run commands as. Defaults to None.
|
93
|
+
"""
|
94
|
+
self.sandbox = sandbox
|
95
|
+
self.timeout = timeout
|
96
|
+
self.user = user
|
97
|
+
|
98
|
+
async def __call__(
|
99
|
+
self, method: str, params: JSONRPCParamsType, is_notification: bool
|
100
|
+
) -> str:
|
101
|
+
"""
|
102
|
+
Execute an RPC request using the sandbox transport.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
method (str): The JSON-RPC method to call.
|
106
|
+
params (dict[str, object] | tuple[object, ...]): The parameters for the JSON-RPC method.
|
107
|
+
is_notification (bool): Whether this is a notification (no response expected).
|
108
|
+
|
109
|
+
Returns:
|
110
|
+
str: The response from the RPC call.
|
111
|
+
|
112
|
+
Raises:
|
113
|
+
RuntimeError: If the sandbox execution fails.
|
114
|
+
"""
|
115
|
+
exec_result = await self.sandbox.exec(
|
116
|
+
[SANDBOX_CLI, "exec"],
|
117
|
+
input=create_json_rpc_request(method, params, is_notification),
|
118
|
+
timeout=self.timeout,
|
119
|
+
user=self.user,
|
89
120
|
)
|
90
121
|
|
91
|
-
|
92
|
-
case JSONRPCError(code=-32601 | -32602, message=message):
|
93
|
-
raise ToolParsingError(message)
|
94
|
-
case JSONRPCError(code=-32000, message=message):
|
95
|
-
raise ToolError(message)
|
96
|
-
case JSONRPCError(code=code, message=message):
|
97
|
-
raise RuntimeError(
|
98
|
-
f"Error executing tool command {_rpc_call_description(method, params)}: {code=} {message}"
|
99
|
-
)
|
100
|
-
# case result_cls() as model: yields a mypy error since it has narrowed model down
|
101
|
-
# to BaseModel and not BaseModelT. ???
|
102
|
-
case model if isinstance(model, result_cls):
|
103
|
-
return model
|
104
|
-
case not_possible:
|
122
|
+
if not exec_result.success:
|
105
123
|
raise RuntimeError(
|
106
|
-
f"
|
124
|
+
f"Sandbox.exec failure executing {_rpc_call_description(method, params)}: {exec_result.stderr}"
|
107
125
|
)
|
126
|
+
return exec_result.stdout
|
108
127
|
|
109
128
|
|
110
129
|
SANDBOX_CLI = "inspect-tool-support"
|
111
130
|
INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB = "aisiuk/inspect-tool-support"
|
112
131
|
|
113
132
|
|
114
|
-
async def tool_container_sandbox(
|
115
|
-
|
116
|
-
|
133
|
+
async def tool_container_sandbox(
|
134
|
+
tool_name: str, *, sandbox_name: str | None = None
|
135
|
+
) -> SandboxEnvironment:
|
136
|
+
if sb := await sandbox_with(SANDBOX_CLI, True, name=sandbox_name):
|
117
137
|
return sb
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
def _create_json_rpc_request(
|
140
|
-
method: str, params: dict[str, object] | tuple[object, ...]
|
141
|
-
) -> str:
|
142
|
-
return json.dumps(
|
143
|
-
{
|
144
|
-
"jsonrpc": "2.0",
|
145
|
-
"method": method,
|
146
|
-
"id": next(id_generator),
|
147
|
-
"params": list(params) if isinstance(params, tuple) else params,
|
148
|
-
}
|
138
|
+
|
139
|
+
# This sort of programmatic sentence building will not cut it if we ever
|
140
|
+
# support other languages.
|
141
|
+
raise PrerequisiteError(
|
142
|
+
dedent(f"""
|
143
|
+
The {tool_name} service was not found in {"any of the sandboxes" if sandbox_name is None else f"the sandbox '{sandbox_name}'"} for this sample. Please add the {tool_name} to your configuration.
|
144
|
+
|
145
|
+
For example, the following Docker compose file uses the {INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB} reference image as its default sandbox:
|
146
|
+
|
147
|
+
services:
|
148
|
+
default:
|
149
|
+
image: "{INSPECT_TOOL_SUPPORT_IMAGE_DOCKERHUB}"
|
150
|
+
init: true
|
151
|
+
|
152
|
+
Alternatively, you can include the service into your own Dockerfile:
|
153
|
+
|
154
|
+
ENV PATH="$PATH:/opt/inspect_tool_support/bin"
|
155
|
+
RUN python -m venv /opt/inspect_tool_support && \\
|
156
|
+
/opt/inspect_tool_support/bin/pip install inspect-tool-support && \\
|
157
|
+
/opt/inspect_tool_support/bin/inspect-tool-support post-install
|
158
|
+
""").strip()
|
149
159
|
)
|
150
160
|
|
151
161
|
|
152
|
-
def
|
153
|
-
|
154
|
-
) ->
|
162
|
+
def create_sandbox_transport(
|
163
|
+
sandbox: SandboxEnvironment, timeout: int | None = None, user: str | None = None
|
164
|
+
) -> JSONRPCTransport:
|
155
165
|
"""
|
156
|
-
|
166
|
+
Create a transport callable that uses a sandbox for RPC communication.
|
157
167
|
|
158
168
|
Args:
|
159
|
-
|
160
|
-
|
169
|
+
sandbox (SandboxEnvironment): The sandbox environment to use.
|
170
|
+
timeout (int | None, optional): The timeout for executions. Defaults to None.
|
171
|
+
user (str | None, optional): Username or UID to run commands as. Defaults to None.
|
161
172
|
|
162
173
|
Returns:
|
163
|
-
|
164
|
-
|
165
|
-
Examples:
|
166
|
-
>>> _rpc_call_description("subtract", {"minuend": 42, "subtrahend": 23})
|
167
|
-
'subtract(minuend: 42, subtrahend: 23)'
|
168
|
-
|
169
|
-
>>> _rpc_call_description("subtract", (42, 23))
|
170
|
-
'subtract(42, 23)'
|
174
|
+
TransportCallable: A transport callable that conforms to the TransportCallable protocol.
|
171
175
|
"""
|
172
|
-
|
173
|
-
list(map(str, params))
|
174
|
-
if isinstance(params, tuple)
|
175
|
-
else [f"{k}: {v}" for k, v in params.items()]
|
176
|
-
)
|
177
|
-
return f"{method}({', '.join(normalized_params)})"
|
178
|
-
|
179
|
-
|
180
|
-
def _parse_json_rpc_response(
|
181
|
-
response_str: str,
|
182
|
-
result_cls: Type[StrOrModelT],
|
183
|
-
) -> StrOrModelT | JSONRPCError:
|
184
|
-
match JSONRPCResponse.model_validate_json(response_str).root:
|
185
|
-
case JSONRPCErrorResponse(error=error):
|
186
|
-
return error
|
187
|
-
case JSONRPCSuccessResponse(result=rpc_result):
|
188
|
-
# TODO: Wow. Is there really no way to convince Python to narrow these types
|
189
|
-
# and avoid the cast's
|
190
|
-
if result_cls is str:
|
191
|
-
if not isinstance(rpc_result, str):
|
192
|
-
raise ValueError(f"Expected string result, got {type(rpc_result)}")
|
193
|
-
return cast(StrOrModelT, rpc_result)
|
194
|
-
else:
|
195
|
-
return cast(
|
196
|
-
StrOrModelT,
|
197
|
-
cast(BaseModel, result_cls).model_validate(rpc_result, strict=True),
|
198
|
-
)
|
199
|
-
case _:
|
200
|
-
raise ValueError(f"Unexpected JSON RPC response: {response_str}")
|
176
|
+
return ToolSupportSandboxTransport(sandbox=sandbox, timeout=timeout, user=user)
|
@@ -3,7 +3,7 @@ from shortuuid import uuid
|
|
3
3
|
|
4
4
|
from inspect_ai.tool import ToolResult
|
5
5
|
from inspect_ai.tool._tool_support_helpers import (
|
6
|
-
|
6
|
+
exec_model_request,
|
7
7
|
tool_container_sandbox,
|
8
8
|
)
|
9
9
|
from inspect_ai.util import StoreModel, store_as
|
@@ -98,11 +98,11 @@ def bash_session(*, timeout: int | None = None, instance: str | None = uuid()) -
|
|
98
98
|
|
99
99
|
if not store.session_id:
|
100
100
|
store.session_id = (
|
101
|
-
await
|
102
|
-
sandbox,
|
103
|
-
"bash_session_new_session",
|
104
|
-
{},
|
105
|
-
NewSessionResult,
|
101
|
+
await exec_model_request(
|
102
|
+
sandbox=sandbox,
|
103
|
+
method="bash_session_new_session",
|
104
|
+
params={},
|
105
|
+
result_type=NewSessionResult,
|
106
106
|
timeout=timeout,
|
107
107
|
)
|
108
108
|
).session_name
|
@@ -110,11 +110,11 @@ def bash_session(*, timeout: int | None = None, instance: str | None = uuid()) -
|
|
110
110
|
params["session_name"] = store.session_id
|
111
111
|
|
112
112
|
result = (
|
113
|
-
await
|
114
|
-
sandbox,
|
115
|
-
"bash_session",
|
116
|
-
params,
|
117
|
-
BashResult,
|
113
|
+
await exec_model_request(
|
114
|
+
sandbox=sandbox,
|
115
|
+
method="bash_session",
|
116
|
+
params=params,
|
117
|
+
result_type=BashResult,
|
118
118
|
timeout=timeout,
|
119
119
|
)
|
120
120
|
).root
|
@@ -5,7 +5,7 @@ from pydantic import BaseModel, Discriminator, RootModel
|
|
5
5
|
|
6
6
|
from inspect_ai.tool import ToolResult
|
7
7
|
from inspect_ai.tool._tool_support_helpers import (
|
8
|
-
|
8
|
+
exec_scalar_request,
|
9
9
|
tool_container_sandbox,
|
10
10
|
)
|
11
11
|
|
@@ -110,11 +110,11 @@ def text_editor(timeout: int | None = None, user: str | None = None) -> Tool:
|
|
110
110
|
if k in inspect.signature(execute).parameters
|
111
111
|
}
|
112
112
|
|
113
|
-
return await
|
114
|
-
sandbox,
|
115
|
-
"text_editor",
|
116
|
-
params,
|
117
|
-
TextEditorResult,
|
113
|
+
return await exec_scalar_request(
|
114
|
+
sandbox=sandbox,
|
115
|
+
method="text_editor",
|
116
|
+
params=params,
|
117
|
+
result_type=TextEditorResult,
|
118
118
|
timeout=timeout,
|
119
119
|
)
|
120
120
|
|
@@ -9,7 +9,7 @@ from inspect_ai.tool._tool import Tool, ToolError, ToolResult, tool
|
|
9
9
|
from inspect_ai.tool._tool_call import ToolCall, ToolCallContent, ToolCallView
|
10
10
|
from inspect_ai.tool._tool_info import parse_tool_info
|
11
11
|
from inspect_ai.tool._tool_support_helpers import (
|
12
|
-
|
12
|
+
exec_model_request,
|
13
13
|
tool_container_sandbox,
|
14
14
|
)
|
15
15
|
from inspect_ai.tool._tool_with import tool_with
|
@@ -414,18 +414,18 @@ async def _web_browser_cmd(
|
|
414
414
|
|
415
415
|
if not store.session_id:
|
416
416
|
store.session_id = (
|
417
|
-
await
|
418
|
-
sandbox_env,
|
419
|
-
"web_new_session",
|
420
|
-
{"headful": False},
|
421
|
-
NewSessionResult,
|
417
|
+
await exec_model_request(
|
418
|
+
sandbox=sandbox_env,
|
419
|
+
method="web_new_session",
|
420
|
+
params={"headful": False},
|
421
|
+
result_type=NewSessionResult,
|
422
422
|
)
|
423
423
|
).session_name
|
424
424
|
|
425
425
|
params["session_name"] = store.session_id
|
426
426
|
|
427
|
-
crawler_result = await
|
428
|
-
sandbox_env, tool_name, params, CrawlerResult
|
427
|
+
crawler_result = await exec_model_request(
|
428
|
+
sandbox=sandbox_env, method=tool_name, params=params, result_type=CrawlerResult
|
429
429
|
)
|
430
430
|
if crawler_result.error and crawler_result.error.strip() != "":
|
431
431
|
raise ToolError(crawler_result.error)
|
inspect_ai/util/_anyio.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import itertools
|
1
2
|
import sys
|
2
3
|
|
3
4
|
if sys.version_info < (3, 11):
|
@@ -5,23 +6,33 @@ if sys.version_info < (3, 11):
|
|
5
6
|
|
6
7
|
|
7
8
|
def inner_exception(exc: Exception) -> Exception:
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
9
|
+
return _flatten_exception(exc)[0]
|
10
|
+
|
11
|
+
|
12
|
+
def _flatten_exception(exc: Exception) -> list[Exception]:
|
13
|
+
"""Recursively flatten an exception to get all related (__context__) and contained (ExceptionGroup) exceptions."""
|
14
|
+
context_to_follow = (
|
15
|
+
[exc.__context__]
|
16
|
+
# conceptually, if __cause__ is present, it means that this exception
|
17
|
+
# wraps the cause - rather than cause being a separate error. We'll
|
18
|
+
# follow __context__ only if __cause__ is None
|
19
|
+
if exc.__cause__ is None and isinstance(exc.__context__, Exception)
|
20
|
+
else []
|
21
|
+
)
|
22
|
+
|
23
|
+
(maybe_this_exception, children_to_follow) = (
|
24
|
+
([], exc.exceptions)
|
25
|
+
# if it's a group, follow the children discarding the group
|
26
|
+
if isinstance(exc, ExceptionGroup)
|
27
|
+
else ([exc], [])
|
28
|
+
)
|
29
|
+
|
30
|
+
# We have to use a set since the same exception is likely to be included in
|
31
|
+
# both __context__ and .exceptions
|
32
|
+
other_exceptions = [
|
33
|
+
flattened_e
|
34
|
+
for e in set(itertools.chain(context_to_follow, children_to_follow))
|
35
|
+
for flattened_e in _flatten_exception(e)
|
36
|
+
]
|
37
|
+
|
38
|
+
return maybe_this_exception + other_exceptions
|
inspect_ai/util/_json.py
CHANGED
@@ -2,12 +2,14 @@ import types
|
|
2
2
|
import typing
|
3
3
|
from copy import deepcopy
|
4
4
|
from dataclasses import is_dataclass
|
5
|
+
from datetime import date, datetime, time
|
5
6
|
from typing import (
|
6
7
|
Any,
|
7
8
|
Dict,
|
8
9
|
List,
|
9
10
|
Literal,
|
10
11
|
Optional,
|
12
|
+
Set,
|
11
13
|
Tuple,
|
12
14
|
Type,
|
13
15
|
Union,
|
@@ -30,6 +32,9 @@ class JSONSchema(BaseModel):
|
|
30
32
|
type: JSONType | None = Field(default=None)
|
31
33
|
"""JSON type of tool parameter."""
|
32
34
|
|
35
|
+
format: str | None = Field(default=None)
|
36
|
+
"""Format of the parameter (e.g. date-time)."""
|
37
|
+
|
33
38
|
description: str | None = Field(default=None)
|
34
39
|
"""Parameter description."""
|
35
40
|
|
@@ -80,7 +85,13 @@ def json_schema(t: Type[Any]) -> JSONSchema:
|
|
80
85
|
return JSONSchema(type="string")
|
81
86
|
elif t is bool:
|
82
87
|
return JSONSchema(type="boolean")
|
83
|
-
elif t is
|
88
|
+
elif t is datetime:
|
89
|
+
return JSONSchema(type="string", format="date-time")
|
90
|
+
elif t is date:
|
91
|
+
return JSONSchema(type="string", format="date")
|
92
|
+
elif t is time:
|
93
|
+
return JSONSchema(type="string", format="time")
|
94
|
+
elif t is list or t is set:
|
84
95
|
return JSONSchema(type="array", items=JSONSchema())
|
85
96
|
elif t is dict:
|
86
97
|
return JSONSchema(type="object", additionalProperties=JSONSchema())
|
@@ -94,7 +105,14 @@ def json_schema(t: Type[Any]) -> JSONSchema:
|
|
94
105
|
return JSONSchema(type="null")
|
95
106
|
else:
|
96
107
|
return JSONSchema()
|
97
|
-
elif
|
108
|
+
elif (
|
109
|
+
origin is list
|
110
|
+
or origin is List
|
111
|
+
or origin is tuple
|
112
|
+
or origin is Tuple
|
113
|
+
or origin is set
|
114
|
+
or origin is Set
|
115
|
+
):
|
98
116
|
return JSONSchema(
|
99
117
|
type="array", items=json_schema(args[0]) if args else JSONSchema()
|
100
118
|
)
|
@@ -24,7 +24,7 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
|
|
24
24
|
"""Get the SandboxEnvironment for the current sample.
|
25
25
|
|
26
26
|
Args:
|
27
|
-
name (str | None): Optional sandbox
|
27
|
+
name (str | None): Optional sandbox environment name.
|
28
28
|
|
29
29
|
Return:
|
30
30
|
SandboxEnvironment instance.
|
@@ -45,12 +45,14 @@ def sandbox(name: str | None = None) -> SandboxEnvironment:
|
|
45
45
|
environment = environments.get(name, None)
|
46
46
|
if not environment:
|
47
47
|
raise ValueError(
|
48
|
-
f"SandboxEnvironment '{name}' is not a
|
48
|
+
f"SandboxEnvironment '{name}' is not a recognized environment name."
|
49
49
|
)
|
50
50
|
return environment
|
51
51
|
|
52
52
|
|
53
|
-
async def sandbox_with(
|
53
|
+
async def sandbox_with(
|
54
|
+
file: str, on_path: bool = False, *, name: str | None = None
|
55
|
+
) -> SandboxEnvironment | None:
|
54
56
|
"""Get the SandboxEnvironment for the current sample that has the specified file.
|
55
57
|
|
56
58
|
Args:
|
@@ -58,9 +60,12 @@ async def sandbox_with(file: str, on_path: bool = False) -> SandboxEnvironment |
|
|
58
60
|
True, file should be a filename that exists on the system path.
|
59
61
|
on_path (bool): If True, file is a filename to be verified using "which".
|
60
62
|
If False, file is a path to be checked within the sandbox environments.
|
63
|
+
name (str | None): Optional sandbox environment name.
|
64
|
+
|
61
65
|
|
62
66
|
Return:
|
63
|
-
SandboxEnvironment instance or None if
|
67
|
+
SandboxEnvironment instance or None if none of the sandboxes (or the named
|
68
|
+
sandbox) had the file.
|
64
69
|
"""
|
65
70
|
# get environments and with mapping
|
66
71
|
environments = sandbox_environments_context_var.get(None)
|
@@ -71,13 +76,19 @@ async def sandbox_with(file: str, on_path: bool = False) -> SandboxEnvironment |
|
|
71
76
|
raise_no_sandbox()
|
72
77
|
|
73
78
|
# if we've already discovered the sandbox for this file then return it
|
74
|
-
environment_with_key = f"{file}:{on_path}"
|
79
|
+
environment_with_key = f"{name or ''}:{file}:{on_path}"
|
75
80
|
environment = environments_with.get(environment_with_key, None)
|
76
81
|
if environment is not None:
|
77
82
|
return environment
|
78
83
|
|
79
|
-
# look in each sandbox
|
80
|
-
for
|
84
|
+
# look in each (or the named) sandbox
|
85
|
+
for environment in (
|
86
|
+
environments.values()
|
87
|
+
if name is None
|
88
|
+
else [named_env]
|
89
|
+
if (named_env := environments.get(name, None))
|
90
|
+
else []
|
91
|
+
):
|
81
92
|
try:
|
82
93
|
if on_path:
|
83
94
|
# can we find the file on the path?
|