inspect-ai 0.3.90__py3-none-any.whl → 0.3.91__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +13 -0
- inspect_ai/_cli/eval.py +40 -0
- inspect_ai/_display/textual/widgets/samples.py +49 -4
- inspect_ai/_display/textual/widgets/vscode.py +4 -2
- inspect_ai/_eval/eval.py +41 -28
- inspect_ai/_eval/evalset.py +4 -0
- inspect_ai/_eval/loader.py +4 -5
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +6 -3
- inspect_ai/_eval/task/log.py +6 -0
- inspect_ai/_eval/task/run.py +108 -41
- inspect_ai/_eval/task/sandbox.py +19 -5
- inspect_ai/_util/_async.py +1 -1
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/environ.py +32 -0
- inspect_ai/_util/file.py +8 -1
- inspect_ai/_util/httpx.py +105 -22
- inspect_ai/_util/registry.py +83 -9
- inspect_ai/_util/text.py +81 -17
- inspect_ai/_util/transcript.py +9 -6
- inspect_ai/_util/vscode.py +7 -2
- inspect_ai/_view/schema.py +1 -1
- inspect_ai/_view/www/babel.config.js +11 -0
- inspect_ai/_view/www/dist/assets/index.css +3640 -3563
- inspect_ai/_view/www/dist/assets/index.js +59204 -52519
- inspect_ai/_view/www/eslint.config.mjs +10 -1
- inspect_ai/_view/www/jest.config.mjs +21 -0
- inspect_ai/_view/www/log-schema.json +111 -2
- inspect_ai/_view/www/package.json +19 -5
- inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
- inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
- inspect_ai/_view/www/src/app/App.tsx +168 -0
- inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
- inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
- inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
- inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
- inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
- inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
- inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
- inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
- inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
- inspect_ai/_view/www/src/app/routing/url.ts +43 -0
- inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
- inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +12 -4
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
- inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
- inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +11 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -7
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
- inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
- inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
- inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
- inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
- inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
- inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
- inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
- inspect_ai/_view/www/src/components/Card.tsx +1 -1
- inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
- inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
- inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
- inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
- inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
- inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
- inspect_ai/_view/www/src/constants.ts +10 -9
- inspect_ai/_view/www/src/index.tsx +27 -11
- inspect_ai/_view/www/src/state/appSlice.ts +44 -5
- inspect_ai/_view/www/src/state/hooks.ts +30 -7
- inspect_ai/_view/www/src/state/logSlice.ts +7 -5
- inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
- inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
- inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
- inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
- inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
- inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
- inspect_ai/_view/www/src/state/store.ts +9 -7
- inspect_ai/_view/www/src/state/utils.ts +1 -1
- inspect_ai/_view/www/src/tests/README.md +49 -0
- inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
- inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
- inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
- inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
- inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
- inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
- inspect_ai/_view/www/src/utils/format.ts +8 -2
- inspect_ai/_view/www/src/utils/path.ts +14 -2
- inspect_ai/_view/www/src/utils/polling.ts +1 -2
- inspect_ai/_view/www/src/utils/uri.ts +32 -0
- inspect_ai/_view/www/yarn.lock +3310 -382
- inspect_ai/agent/_handoff.py +6 -3
- inspect_ai/agent/_human/agent.py +5 -3
- inspect_ai/agent/_human/install.py +16 -7
- inspect_ai/agent/_human/panel.py +14 -1
- inspect_ai/agent/_human/service.py +5 -1
- inspect_ai/agent/_react.py +161 -128
- inspect_ai/agent/_types.py +15 -4
- inspect_ai/approval/_policy.py +2 -2
- inspect_ai/log/_file.py +30 -11
- inspect_ai/log/_log.py +7 -1
- inspect_ai/log/_recorders/eval.py +3 -0
- inspect_ai/log/_recorders/types.py +1 -0
- inspect_ai/log/_samples.py +4 -0
- inspect_ai/model/_call_tools.py +33 -17
- inspect_ai/model/_generate_config.py +10 -2
- inspect_ai/model/_model.py +41 -21
- inspect_ai/model/_model_output.py +2 -1
- inspect_ai/model/_openai.py +10 -8
- inspect_ai/model/_openai_responses.py +83 -42
- inspect_ai/model/_providers/anthropic.py +14 -12
- inspect_ai/model/_providers/google.py +191 -95
- inspect_ai/model/_providers/hf.py +1 -1
- inspect_ai/model/_providers/mistral.py +2 -3
- inspect_ai/model/_providers/openai.py +54 -17
- inspect_ai/model/_providers/openai_o1.py +1 -1
- inspect_ai/model/_providers/openai_responses.py +28 -16
- inspect_ai/model/_providers/openrouter.py +14 -0
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/util/chatapi.py +17 -7
- inspect_ai/model/_providers/vllm.py +1 -1
- inspect_ai/scorer/_metric.py +17 -1
- inspect_ai/scorer/_model.py +51 -6
- inspect_ai/scorer/_scorer.py +1 -1
- inspect_ai/solver/_human_agent.py +3 -0
- inspect_ai/solver/_plan.py +1 -1
- inspect_ai/solver/_solver.py +1 -1
- inspect_ai/solver/_use_tools.py +14 -8
- inspect_ai/tool/__init__.py +16 -1
- inspect_ai/tool/_json_rpc_helpers.py +285 -0
- inspect_ai/tool/_mcp/__init__.py +13 -0
- inspect_ai/tool/_mcp/_context.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +293 -0
- inspect_ai/tool/_mcp/_sandbox.py +104 -0
- inspect_ai/tool/_mcp/_types.py +31 -0
- inspect_ai/tool/_mcp/connection.py +60 -0
- inspect_ai/tool/_mcp/sampling.py +118 -0
- inspect_ai/tool/_mcp/server.py +112 -0
- inspect_ai/tool/_mcp/tools.py +34 -0
- inspect_ai/tool/_tool.py +13 -0
- inspect_ai/tool/_tool_def.py +24 -7
- inspect_ai/tool/_tool_support_helpers.py +129 -153
- inspect_ai/tool/_tools/_bash_session.py +11 -11
- inspect_ai/tool/_tools/_text_editor.py +6 -6
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
- inspect_ai/util/_anyio.py +31 -20
- inspect_ai/util/_json.py +20 -2
- inspect_ai/util/_sandbox/context.py +18 -7
- inspect_ai/util/_sandbox/docker/compose.py +1 -1
- inspect_ai/util/_sandbox/docker/docker.py +92 -21
- inspect_ai/util/_sandbox/environment.py +33 -2
- inspect_ai/util/_sandbox/events.py +2 -2
- inspect_ai/util/_sandbox/service.py +13 -3
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/METADATA +6 -2
- inspect_ai-0.3.91.dist-info/RECORD +732 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/App.tsx +0 -316
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
- inspect_ai-0.3.90.dist-info/RECORD +0 -705
- /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
- /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/top_level.txt +0 -0
inspect_ai/agent/_handoff.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Any
|
1
|
+
from typing import Any, Sequence
|
2
2
|
|
3
3
|
from inspect_ai._util.registry import (
|
4
4
|
RegistryInfo,
|
@@ -6,7 +6,8 @@ from inspect_ai._util.registry import (
|
|
6
6
|
registry_unqualified_name,
|
7
7
|
set_registry_info,
|
8
8
|
)
|
9
|
-
from inspect_ai.tool._tool import Tool, ToolResult
|
9
|
+
from inspect_ai.tool._tool import Tool, ToolResult, ToolSource
|
10
|
+
from inspect_ai.tool._tool_def import ToolDef
|
10
11
|
from inspect_ai.tool._tool_description import ToolDescription, set_tool_description
|
11
12
|
|
12
13
|
from ._agent import Agent
|
@@ -86,7 +87,9 @@ class AgentTool(Tool):
|
|
86
87
|
raise RuntimeError("AgentTool should not be called directly")
|
87
88
|
|
88
89
|
|
89
|
-
def has_handoff(
|
90
|
+
def has_handoff(
|
91
|
+
tools: Sequence[Tool | ToolDef | ToolSource] | None,
|
92
|
+
) -> bool:
|
90
93
|
if tools:
|
91
94
|
return any([isinstance(tool, AgentTool) for tool in tools])
|
92
95
|
else:
|
inspect_ai/agent/_human/agent.py
CHANGED
@@ -18,6 +18,7 @@ def human_cli(
|
|
18
18
|
answer: bool | str = True,
|
19
19
|
intermediate_scoring: bool = False,
|
20
20
|
record_session: bool = True,
|
21
|
+
user: str | None = None,
|
21
22
|
) -> Agent:
|
22
23
|
"""Human CLI agent for tasks that run in a sandbox.
|
23
24
|
|
@@ -37,6 +38,7 @@ def human_cli(
|
|
37
38
|
that the answer matches the expected format.
|
38
39
|
intermediate_scoring: Allow the human agent to check their score while working.
|
39
40
|
record_session: Record all user commands and outputs in the sandbox bash session.
|
41
|
+
user: User to login as. Defaults to the sandbox environment's default user.
|
40
42
|
|
41
43
|
Returns:
|
42
44
|
Agent: Human CLI agent.
|
@@ -48,7 +50,7 @@ def human_cli(
|
|
48
50
|
async with agent_lock:
|
49
51
|
# ensure that we have a sandbox to work with
|
50
52
|
try:
|
51
|
-
connection = await sandbox().connection()
|
53
|
+
connection = await sandbox().connection(user=user)
|
52
54
|
except ProcessLookupError:
|
53
55
|
raise RuntimeError("Human agent must run in a task with a sandbox.")
|
54
56
|
except NotImplementedError:
|
@@ -66,13 +68,13 @@ def human_cli(
|
|
66
68
|
)
|
67
69
|
|
68
70
|
# install agent tools
|
69
|
-
await install_human_agent(commands, record_session)
|
71
|
+
await install_human_agent(user, commands, record_session)
|
70
72
|
|
71
73
|
# hookup the view ui
|
72
74
|
view.connect(connection)
|
73
75
|
|
74
76
|
# run sandbox service
|
75
|
-
return await run_human_agent_service(state, commands, view)
|
77
|
+
return await run_human_agent_service(user, state, commands, view)
|
76
78
|
|
77
79
|
# support both fullscreen ui and fallback
|
78
80
|
if display_type() == "full":
|
@@ -17,7 +17,9 @@ RECORD_SESSION_DIR = "/var/tmp/user-sessions"
|
|
17
17
|
|
18
18
|
|
19
19
|
async def install_human_agent(
|
20
|
-
|
20
|
+
user: str | None,
|
21
|
+
commands: list[HumanAgentCommand],
|
22
|
+
record_session: bool,
|
21
23
|
) -> None:
|
22
24
|
# see if we have already installed
|
23
25
|
if not (await sandbox().exec(["mkdir", HUMAN_AGENT_DIR])).success:
|
@@ -35,7 +37,7 @@ async def install_human_agent(
|
|
35
37
|
await checked_write_file(f"{INSTALL_DIR}/{BASHRC}", bash_rc, executable=True)
|
36
38
|
|
37
39
|
# write and run installation script
|
38
|
-
install_sh = human_agent_install_sh()
|
40
|
+
install_sh = human_agent_install_sh(user)
|
39
41
|
await checked_write_file(f"{INSTALL_DIR}/{INSTALL_SH}", install_sh, executable=True)
|
40
42
|
await checked_exec(["bash", f"./{INSTALL_SH}"], cwd=INSTALL_DIR)
|
41
43
|
await checked_exec(["rm", "-rf", INSTALL_DIR])
|
@@ -177,8 +179,8 @@ def human_agent_bashrc(commands: list[HumanAgentCommand], record_session: bool)
|
|
177
179
|
INSTRUCTIONS = dedent("""
|
178
180
|
if [ -z "$INSTRUCTIONS_SHOWN" ]; then
|
179
181
|
export INSTRUCTIONS_SHOWN=1
|
180
|
-
task instructions > instructions.txt
|
181
|
-
cat instructions.txt
|
182
|
+
task instructions > ~/instructions.txt
|
183
|
+
cat ~/instructions.txt
|
182
184
|
fi
|
183
185
|
""").lstrip()
|
184
186
|
|
@@ -190,7 +192,7 @@ def human_agent_bashrc(commands: list[HumanAgentCommand], record_session: bool)
|
|
190
192
|
return "\n".join([TERMINAL_CHECK, COMMANDS, RECORDING, INSTRUCTIONS, CLOCK])
|
191
193
|
|
192
194
|
|
193
|
-
def human_agent_install_sh() -> str:
|
195
|
+
def human_agent_install_sh(user: str | None) -> str:
|
194
196
|
return dedent(f"""
|
195
197
|
#!/usr/bin/env bash
|
196
198
|
|
@@ -201,8 +203,15 @@ def human_agent_install_sh() -> str:
|
|
201
203
|
# copy command script
|
202
204
|
cp {TASK_PY} $HUMAN_AGENT
|
203
205
|
|
204
|
-
#
|
205
|
-
|
206
|
+
# get user's home directory
|
207
|
+
USER="{user or ""}"
|
208
|
+
if [ -z "$USER" ]; then
|
209
|
+
USER=$(whoami)
|
210
|
+
fi
|
211
|
+
USER_HOME=$(getent passwd $USER | cut -d: -f6)
|
212
|
+
|
213
|
+
# append to user's .bashrc
|
214
|
+
cat {BASHRC} >> $USER_HOME/{BASHRC}
|
206
215
|
""")
|
207
216
|
|
208
217
|
|
inspect_ai/agent/_human/panel.py
CHANGED
@@ -35,6 +35,7 @@ class HumanAgentPanel(InputPanel):
|
|
35
35
|
VSCODE_LINKS_ID = "vscode-links"
|
36
36
|
LOGIN_VSCODE_TERMINAL_ID = "login-vscode-terminal"
|
37
37
|
LOGIN_VSCODE_WINDOW_ID = "login-vscode-window"
|
38
|
+
LOGIN_VSCODE_WINDOW_LABEL_ID = "login-vscode-window-label"
|
38
39
|
COMMAND_INSTRUCTIONS_ID = "command-instructions"
|
39
40
|
SANDBOX_COMMAND_ID = "sandbox-command"
|
40
41
|
|
@@ -88,7 +89,11 @@ class HumanAgentPanel(InputPanel):
|
|
88
89
|
markup=False,
|
89
90
|
)
|
90
91
|
with Horizontal(id=self.VSCODE_LINKS_ID):
|
91
|
-
yield Label(
|
92
|
+
yield Label(
|
93
|
+
"Login:",
|
94
|
+
classes=self.LINK_LABEL_CLASS,
|
95
|
+
id=self.LOGIN_VSCODE_WINDOW_LABEL_ID,
|
96
|
+
)
|
92
97
|
yield VSCodeLink(
|
93
98
|
"VS Code Window",
|
94
99
|
id=self.LOGIN_VSCODE_WINDOW_ID,
|
@@ -146,6 +151,14 @@ class HumanAgentPanel(InputPanel):
|
|
146
151
|
window_btn = cast(
|
147
152
|
VSCodeLink, self.query_one(f"#{self.LOGIN_VSCODE_WINDOW_ID}")
|
148
153
|
)
|
154
|
+
window_lbl = cast(
|
155
|
+
Label, self.query_one(f"#{self.LOGIN_VSCODE_WINDOW_LABEL_ID}")
|
156
|
+
)
|
157
|
+
window_btn_and_lbl_display = (
|
158
|
+
vscode and connection.vscode_command is not None
|
159
|
+
)
|
160
|
+
window_btn.display = window_btn_and_lbl_display
|
161
|
+
window_lbl.display = window_btn_and_lbl_display
|
149
162
|
if connection.vscode_command is not None:
|
150
163
|
window_btn.commands = [
|
151
164
|
VSCodeCommand(
|
@@ -10,7 +10,10 @@ from .view import HumanAgentView
|
|
10
10
|
|
11
11
|
|
12
12
|
async def run_human_agent_service(
|
13
|
-
|
13
|
+
user: str | None,
|
14
|
+
state: AgentState,
|
15
|
+
commands: list[HumanAgentCommand],
|
16
|
+
view: HumanAgentView | None,
|
14
17
|
) -> AgentState:
|
15
18
|
# initialise agent state
|
16
19
|
instructions = "\n\n".join([message.text for message in state.messages]).strip()
|
@@ -39,6 +42,7 @@ async def run_human_agent_service(
|
|
39
42
|
methods=methods,
|
40
43
|
until=task_is_completed,
|
41
44
|
sandbox=sandbox(),
|
45
|
+
user=user,
|
42
46
|
)
|
43
47
|
|
44
48
|
# set the answer if we have one
|
inspect_ai/agent/_react.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
from logging import getLogger
|
2
|
-
from typing import Literal, cast
|
2
|
+
from typing import Literal, Sequence, cast
|
3
3
|
|
4
4
|
from inspect_ai._util._async import is_callable_coroutine
|
5
5
|
from inspect_ai.model._call_tools import execute_tools
|
@@ -13,9 +13,10 @@ from inspect_ai.model._chat_message import (
|
|
13
13
|
from inspect_ai.model._model import Model, get_model
|
14
14
|
from inspect_ai.model._trim import trim_messages
|
15
15
|
from inspect_ai.scorer._score import score
|
16
|
-
from inspect_ai.tool.
|
16
|
+
from inspect_ai.tool._mcp.connection import mcp_connection
|
17
|
+
from inspect_ai.tool._tool import Tool, ToolResult, ToolSource, tool
|
18
|
+
from inspect_ai.tool._tool_def import ToolDef
|
17
19
|
from inspect_ai.tool._tool_info import parse_tool_info
|
18
|
-
from inspect_ai.tool._tool_with import tool_with
|
19
20
|
|
20
21
|
from ._agent import Agent, AgentState, agent, agent_with
|
21
22
|
from ._filter import MessageFilter
|
@@ -37,7 +38,7 @@ def react(
|
|
37
38
|
name: str | None = None,
|
38
39
|
description: str | None = None,
|
39
40
|
prompt: str | AgentPrompt | None = AgentPrompt(),
|
40
|
-
tools:
|
41
|
+
tools: Sequence[Tool | ToolDef | ToolSource] | None = None,
|
41
42
|
model: str | Model | Agent | None = None,
|
42
43
|
attempts: int | AgentAttempts = 1,
|
43
44
|
submit: AgentSubmit = AgentSubmit(),
|
@@ -88,6 +89,31 @@ def react(
|
|
88
89
|
Returns:
|
89
90
|
ReAct agent.
|
90
91
|
"""
|
92
|
+
|
93
|
+
# default submit tool
|
94
|
+
@tool(name="submit")
|
95
|
+
def default_submit_tool() -> Tool:
|
96
|
+
async def execute(answer: str) -> ToolResult:
|
97
|
+
"""Submit an answer for evaluation.
|
98
|
+
|
99
|
+
Args:
|
100
|
+
answer (str): Submitted answer
|
101
|
+
"""
|
102
|
+
return answer
|
103
|
+
|
104
|
+
return execute
|
105
|
+
|
106
|
+
# resolve tools
|
107
|
+
tools = list(tools) if tools is not None else []
|
108
|
+
|
109
|
+
# resolve submit tool
|
110
|
+
submit_tool = ToolDef(
|
111
|
+
submit.tool or default_submit_tool(),
|
112
|
+
name=submit.name,
|
113
|
+
description=submit.description,
|
114
|
+
)
|
115
|
+
tools.append(submit_tool)
|
116
|
+
|
91
117
|
# resolve prompt / system message
|
92
118
|
prompt = AgentPrompt(prompt) if isinstance(prompt, str) else prompt
|
93
119
|
if prompt:
|
@@ -98,7 +124,7 @@ def react(
|
|
98
124
|
prompt_lines.append(prompt.handoff_prompt)
|
99
125
|
if prompt.assistant_prompt:
|
100
126
|
prompt_lines.append(prompt.assistant_prompt)
|
101
|
-
prompt_content = "\n\n".join(prompt_lines).format(submit=
|
127
|
+
prompt_content = "\n\n".join(prompt_lines).format(submit=submit_tool.name)
|
102
128
|
system_message: ChatMessage | None = ChatMessageSystem(content=prompt_content)
|
103
129
|
else:
|
104
130
|
system_message = None
|
@@ -106,151 +132,146 @@ def react(
|
|
106
132
|
# resolve attempts
|
107
133
|
attempts = AgentAttempts(attempts) if isinstance(attempts, int) else attempts
|
108
134
|
|
109
|
-
# submission tool
|
110
|
-
@tool
|
111
|
-
def submit_tool() -> Tool:
|
112
|
-
async def execute(answer: str) -> ToolResult:
|
113
|
-
"""Submit an answer for evaluation.
|
114
|
-
|
115
|
-
Args:
|
116
|
-
answer (str): Submitted answer
|
117
|
-
"""
|
118
|
-
return answer
|
119
|
-
|
120
|
-
return execute
|
121
|
-
|
122
|
-
# helper to extract a submitted answer
|
123
135
|
def submission(tool_results: list[ChatMessage]) -> str | None:
|
124
136
|
return next(
|
125
137
|
(
|
126
138
|
result.text
|
127
139
|
for result in tool_results
|
128
140
|
if isinstance(result, ChatMessageTool)
|
129
|
-
and result.function ==
|
141
|
+
and result.function == submit_tool.name
|
130
142
|
),
|
131
143
|
None,
|
132
144
|
)
|
133
145
|
|
134
|
-
# resolve tools
|
135
|
-
tools = tools or []
|
136
|
-
tools.append(tool_with(submit_tool(), submit.name, submit.description))
|
137
|
-
|
138
146
|
async def execute(state: AgentState) -> AgentState:
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
147
|
+
async with mcp_connection(tools):
|
148
|
+
# prepend system message if we have one
|
149
|
+
if system_message:
|
150
|
+
state.messages.insert(0, system_message)
|
151
|
+
|
152
|
+
# resolve overflow handling
|
153
|
+
if truncation == "auto":
|
154
|
+
overflow = cast(MessageFilter | None, trim_messages)
|
155
|
+
elif truncation == "disabled":
|
156
|
+
overflow = None
|
157
|
+
else:
|
158
|
+
overflow = truncation
|
159
|
+
|
160
|
+
# track attempts
|
161
|
+
attempt_count = 0
|
162
|
+
|
163
|
+
# main loop = will terminate after submit (subject to max_attempts)
|
164
|
+
# or if a message or token limit is hit
|
165
|
+
while True:
|
166
|
+
# generate output and append assistant message
|
167
|
+
state = await _agent_generate(model, state, tools)
|
168
|
+
|
169
|
+
# check for context window overflow
|
170
|
+
if state.output.stop_reason == "model_length":
|
171
|
+
from inspect_ai.log._transcript import transcript
|
172
|
+
|
173
|
+
if overflow is not None:
|
174
|
+
previous_messages = state.messages[:-1]
|
175
|
+
state.messages = await overflow(previous_messages)
|
176
|
+
if len(state.messages) < len(previous_messages):
|
177
|
+
transcript().info(
|
178
|
+
"Agent exceeded model context window, truncating messages and continuing."
|
179
|
+
)
|
180
|
+
continue
|
181
|
+
|
182
|
+
# no overflow policy or overflow didn't reduce conversation length
|
183
|
+
transcript().info("Agent terminated: model context window exceeded")
|
184
|
+
break
|
185
|
+
|
186
|
+
# resolve tool calls (if any)
|
187
|
+
if state.output.message.tool_calls:
|
188
|
+
# call tool functions
|
189
|
+
messages, output = await execute_tools(state.messages, tools)
|
190
|
+
state.messages.extend(messages)
|
191
|
+
if output:
|
192
|
+
state.output = output
|
193
|
+
|
194
|
+
# check for a submission
|
195
|
+
answer = submission(messages)
|
196
|
+
if answer is not None:
|
197
|
+
# set the output to the answer for scoring
|
198
|
+
state.output.completion = (
|
199
|
+
f"{state.output.completion}\n\n{answer}".strip()
|
170
200
|
)
|
171
|
-
continue
|
172
|
-
|
173
|
-
# no overflow policy or overflow didn't reduce conversation length
|
174
|
-
transcript().info("Agent terminated: model context window exceeded")
|
175
|
-
break
|
176
|
-
|
177
|
-
# resolve tool calls (if any)
|
178
|
-
if state.output.message.tool_calls:
|
179
|
-
# call tool functions
|
180
|
-
messages, output = await execute_tools(state.messages, tools)
|
181
|
-
state.messages.extend(messages)
|
182
|
-
if output:
|
183
|
-
state.output = output
|
184
|
-
|
185
|
-
# check for a submission
|
186
|
-
answer = submission(messages)
|
187
|
-
if answer is not None:
|
188
|
-
# set the output to the answer for scoring
|
189
|
-
state.output.completion = (
|
190
|
-
f"{state.output.completion}\n\n{answer}".strip()
|
191
|
-
)
|
192
201
|
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
202
|
+
# exit if we are at max_attempts
|
203
|
+
attempt_count += 1
|
204
|
+
if attempt_count >= attempts.attempts:
|
205
|
+
break
|
197
206
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
207
|
+
# exit if the submission is successful
|
208
|
+
answer_scores = await score(state)
|
209
|
+
if attempts.score_value(answer_scores[0].value) == 1.0:
|
210
|
+
break
|
202
211
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
212
|
+
# otherwise notify the model that it was incorrect and continue
|
213
|
+
else:
|
214
|
+
if callable(attempts.incorrect_message):
|
215
|
+
if not is_callable_coroutine(
|
216
|
+
attempts.incorrect_message
|
217
|
+
):
|
218
|
+
raise ValueError(
|
219
|
+
"The incorrect_message function must be async."
|
220
|
+
)
|
221
|
+
response_message: str = (
|
222
|
+
await attempts.incorrect_message(
|
223
|
+
state, answer_scores
|
224
|
+
)
|
209
225
|
)
|
210
|
-
|
211
|
-
|
226
|
+
else:
|
227
|
+
response_message = attempts.incorrect_message
|
228
|
+
|
229
|
+
state.messages.append(
|
230
|
+
ChatMessageUser(content=response_message)
|
212
231
|
)
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
232
|
+
|
233
|
+
# call the on_continue hook (if any)
|
234
|
+
if callable(on_continue):
|
235
|
+
if not is_callable_coroutine(on_continue):
|
236
|
+
raise ValueError("The on_continue function must be async.")
|
237
|
+
do_continue = await cast(AgentContinue, on_continue)(state)
|
238
|
+
if do_continue is True:
|
239
|
+
# if there were no tool calls we need to send back a user message
|
240
|
+
if not state.output.message.tool_calls:
|
241
|
+
state.messages.append(
|
242
|
+
ChatMessageUser(
|
243
|
+
content=DEFAULT_CONTINUE_PROMPT.format(
|
244
|
+
submit=submit_tool.name
|
245
|
+
)
|
246
|
+
)
|
247
|
+
)
|
248
|
+
elif isinstance(do_continue, str):
|
226
249
|
state.messages.append(
|
227
250
|
ChatMessageUser(
|
228
|
-
content=
|
229
|
-
submit=submit.name
|
230
|
-
)
|
251
|
+
content=do_continue.format(submit=submit_tool.name)
|
231
252
|
)
|
232
253
|
)
|
233
|
-
|
254
|
+
else: # do_continue is False
|
255
|
+
break
|
256
|
+
|
257
|
+
# if there is no on_continue hook then add a user message if there were no tool calls
|
258
|
+
elif not state.output.message.tool_calls:
|
259
|
+
continue_msg = (
|
260
|
+
DEFAULT_CONTINUE_PROMPT
|
261
|
+
if on_continue is None
|
262
|
+
else str(on_continue)
|
263
|
+
)
|
234
264
|
state.messages.append(
|
235
|
-
ChatMessageUser(
|
265
|
+
ChatMessageUser(
|
266
|
+
content=continue_msg.format(submit=submit_tool.name)
|
267
|
+
)
|
236
268
|
)
|
237
|
-
else: # do_continue is False
|
238
|
-
break
|
239
269
|
|
240
|
-
#
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
state.messages.append(
|
246
|
-
ChatMessageUser(content=continue_msg.format(submit=submit.name))
|
247
|
-
)
|
248
|
-
|
249
|
-
# once we are complete, remove submit tool calls from the history
|
250
|
-
# (as they will potentially confuse parent agents who also have
|
251
|
-
# their own submit tools that they are 'watching' for)
|
252
|
-
state.messages = _remove_submit_tool(state.messages, submit.name)
|
253
|
-
return state
|
270
|
+
# once we are complete, remove submit tool calls from the history
|
271
|
+
# (as they will potentially confuse parent agents who also have
|
272
|
+
# their own submit tools that they are 'watching' for)
|
273
|
+
state.messages = _remove_submit_tool(state.messages, submit_tool.name)
|
274
|
+
return state
|
254
275
|
|
255
276
|
if name is not None or description is not None:
|
256
277
|
return agent_with(execute, name=name, description=description)
|
@@ -259,12 +280,24 @@ def react(
|
|
259
280
|
|
260
281
|
|
261
282
|
async def _agent_generate(
|
262
|
-
model: str | Model | Agent | None,
|
283
|
+
model: str | Model | Agent | None,
|
284
|
+
state: AgentState,
|
285
|
+
tools: Sequence[Tool | ToolDef | ToolSource],
|
263
286
|
) -> AgentState:
|
264
287
|
# convert model to agent
|
265
288
|
if isinstance(model, str | Model) or model is None:
|
266
289
|
model = _model_generate(model)
|
267
290
|
|
291
|
+
# resolve tools
|
292
|
+
resolved_tools: list[Tool] = []
|
293
|
+
for t in tools:
|
294
|
+
if isinstance(t, ToolSource):
|
295
|
+
resolved_tools.extend(await t.tools())
|
296
|
+
elif isinstance(t, ToolDef):
|
297
|
+
resolved_tools.append(t.as_tool())
|
298
|
+
else:
|
299
|
+
resolved_tools.append(t)
|
300
|
+
|
268
301
|
# confirm we have a tools param
|
269
302
|
agent_tool_info = parse_tool_info(model)
|
270
303
|
if "tools" not in agent_tool_info.parameters.properties:
|
@@ -273,7 +306,7 @@ async def _agent_generate(
|
|
273
306
|
)
|
274
307
|
|
275
308
|
# call the agent
|
276
|
-
return await model(state,
|
309
|
+
return await model(state, resolved_tools)
|
277
310
|
|
278
311
|
|
279
312
|
def _model_generate(model: str | Model | None) -> Agent:
|
inspect_ai/agent/_types.py
CHANGED
@@ -2,6 +2,7 @@ from typing import Awaitable, Callable, NamedTuple, TypeAlias
|
|
2
2
|
|
3
3
|
from inspect_ai.agent._agent import AgentState
|
4
4
|
from inspect_ai.scorer._metric import Score, ValueToFloat, value_to_float
|
5
|
+
from inspect_ai.tool._tool import Tool
|
5
6
|
|
6
7
|
DEFAULT_HANDOFF_PROMPT = """
|
7
8
|
You are part of a multi-agent system designed to make agent coordination and
|
@@ -80,8 +81,18 @@ class AgentAttempts(NamedTuple):
|
|
80
81
|
class AgentSubmit(NamedTuple):
|
81
82
|
"""Configure the submit tool of a react agent."""
|
82
83
|
|
83
|
-
name: str =
|
84
|
-
"""Name for submit tool."""
|
84
|
+
name: str | None = None
|
85
|
+
"""Name for submit tool (defaults to 'submit')."""
|
85
86
|
|
86
|
-
description: str
|
87
|
-
"""Description of submit tool."""
|
87
|
+
description: str | None = None
|
88
|
+
"""Description of submit tool (defaults to 'Submit an answer for evaluation')."""
|
89
|
+
|
90
|
+
tool: Tool | None = None
|
91
|
+
"""Alternate implementation for submit tool.
|
92
|
+
|
93
|
+
The tool can provide its `name` and `description` internally,
|
94
|
+
or these values can be overriden by the `name` and `description`
|
95
|
+
fields in `AgentSubmit`
|
96
|
+
|
97
|
+
The tool should return the `answer` provided to it for scoring.
|
98
|
+
"""
|
inspect_ai/approval/_policy.py
CHANGED
@@ -2,7 +2,7 @@ import fnmatch
|
|
2
2
|
import sys
|
3
3
|
from dataclasses import dataclass
|
4
4
|
from pathlib import Path
|
5
|
-
from typing import Any, Generator
|
5
|
+
from typing import Any, Generator
|
6
6
|
|
7
7
|
from pydantic import BaseModel, Field, model_validator
|
8
8
|
|
@@ -140,7 +140,7 @@ def approval_policies_from_config(
|
|
140
140
|
def create_approval_policy(
|
141
141
|
name: str, tools: str | list[str], params: dict[str, Any] = {}
|
142
142
|
) -> ApprovalPolicy:
|
143
|
-
approver =
|
143
|
+
approver = registry_create("approver", name, **params)
|
144
144
|
return ApprovalPolicy(approver, tools)
|
145
145
|
|
146
146
|
# map config -> policy
|