inspect-ai 0.3.90__py3-none-any.whl → 0.3.91__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +13 -0
- inspect_ai/_cli/eval.py +40 -0
- inspect_ai/_display/textual/widgets/samples.py +49 -4
- inspect_ai/_display/textual/widgets/vscode.py +4 -2
- inspect_ai/_eval/eval.py +41 -28
- inspect_ai/_eval/evalset.py +4 -0
- inspect_ai/_eval/loader.py +4 -5
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +6 -3
- inspect_ai/_eval/task/log.py +6 -0
- inspect_ai/_eval/task/run.py +108 -41
- inspect_ai/_eval/task/sandbox.py +19 -5
- inspect_ai/_util/_async.py +1 -1
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/environ.py +32 -0
- inspect_ai/_util/file.py +8 -1
- inspect_ai/_util/httpx.py +105 -22
- inspect_ai/_util/registry.py +83 -9
- inspect_ai/_util/text.py +81 -17
- inspect_ai/_util/transcript.py +9 -6
- inspect_ai/_util/vscode.py +7 -2
- inspect_ai/_view/schema.py +1 -1
- inspect_ai/_view/www/babel.config.js +11 -0
- inspect_ai/_view/www/dist/assets/index.css +3640 -3563
- inspect_ai/_view/www/dist/assets/index.js +59204 -52519
- inspect_ai/_view/www/eslint.config.mjs +10 -1
- inspect_ai/_view/www/jest.config.mjs +21 -0
- inspect_ai/_view/www/log-schema.json +111 -2
- inspect_ai/_view/www/package.json +19 -5
- inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
- inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
- inspect_ai/_view/www/src/app/App.tsx +168 -0
- inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
- inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
- inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
- inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
- inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
- inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
- inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
- inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
- inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
- inspect_ai/_view/www/src/app/routing/url.ts +43 -0
- inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
- inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +12 -4
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
- inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
- inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +11 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -7
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
- inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
- inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
- inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
- inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
- inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
- inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
- inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
- inspect_ai/_view/www/src/components/Card.tsx +1 -1
- inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
- inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
- inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
- inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
- inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
- inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
- inspect_ai/_view/www/src/constants.ts +10 -9
- inspect_ai/_view/www/src/index.tsx +27 -11
- inspect_ai/_view/www/src/state/appSlice.ts +44 -5
- inspect_ai/_view/www/src/state/hooks.ts +30 -7
- inspect_ai/_view/www/src/state/logSlice.ts +7 -5
- inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
- inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
- inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
- inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
- inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
- inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
- inspect_ai/_view/www/src/state/store.ts +9 -7
- inspect_ai/_view/www/src/state/utils.ts +1 -1
- inspect_ai/_view/www/src/tests/README.md +49 -0
- inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
- inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
- inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
- inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
- inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
- inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
- inspect_ai/_view/www/src/utils/format.ts +8 -2
- inspect_ai/_view/www/src/utils/path.ts +14 -2
- inspect_ai/_view/www/src/utils/polling.ts +1 -2
- inspect_ai/_view/www/src/utils/uri.ts +32 -0
- inspect_ai/_view/www/yarn.lock +3310 -382
- inspect_ai/agent/_handoff.py +6 -3
- inspect_ai/agent/_human/agent.py +5 -3
- inspect_ai/agent/_human/install.py +16 -7
- inspect_ai/agent/_human/panel.py +14 -1
- inspect_ai/agent/_human/service.py +5 -1
- inspect_ai/agent/_react.py +161 -128
- inspect_ai/agent/_types.py +15 -4
- inspect_ai/approval/_policy.py +2 -2
- inspect_ai/log/_file.py +30 -11
- inspect_ai/log/_log.py +7 -1
- inspect_ai/log/_recorders/eval.py +3 -0
- inspect_ai/log/_recorders/types.py +1 -0
- inspect_ai/log/_samples.py +4 -0
- inspect_ai/model/_call_tools.py +33 -17
- inspect_ai/model/_generate_config.py +10 -2
- inspect_ai/model/_model.py +41 -21
- inspect_ai/model/_model_output.py +2 -1
- inspect_ai/model/_openai.py +10 -8
- inspect_ai/model/_openai_responses.py +83 -42
- inspect_ai/model/_providers/anthropic.py +14 -12
- inspect_ai/model/_providers/google.py +191 -95
- inspect_ai/model/_providers/hf.py +1 -1
- inspect_ai/model/_providers/mistral.py +2 -3
- inspect_ai/model/_providers/openai.py +54 -17
- inspect_ai/model/_providers/openai_o1.py +1 -1
- inspect_ai/model/_providers/openai_responses.py +28 -16
- inspect_ai/model/_providers/openrouter.py +14 -0
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/util/chatapi.py +17 -7
- inspect_ai/model/_providers/vllm.py +1 -1
- inspect_ai/scorer/_metric.py +17 -1
- inspect_ai/scorer/_model.py +51 -6
- inspect_ai/scorer/_scorer.py +1 -1
- inspect_ai/solver/_human_agent.py +3 -0
- inspect_ai/solver/_plan.py +1 -1
- inspect_ai/solver/_solver.py +1 -1
- inspect_ai/solver/_use_tools.py +14 -8
- inspect_ai/tool/__init__.py +16 -1
- inspect_ai/tool/_json_rpc_helpers.py +285 -0
- inspect_ai/tool/_mcp/__init__.py +13 -0
- inspect_ai/tool/_mcp/_context.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +293 -0
- inspect_ai/tool/_mcp/_sandbox.py +104 -0
- inspect_ai/tool/_mcp/_types.py +31 -0
- inspect_ai/tool/_mcp/connection.py +60 -0
- inspect_ai/tool/_mcp/sampling.py +118 -0
- inspect_ai/tool/_mcp/server.py +112 -0
- inspect_ai/tool/_mcp/tools.py +34 -0
- inspect_ai/tool/_tool.py +13 -0
- inspect_ai/tool/_tool_def.py +24 -7
- inspect_ai/tool/_tool_support_helpers.py +129 -153
- inspect_ai/tool/_tools/_bash_session.py +11 -11
- inspect_ai/tool/_tools/_text_editor.py +6 -6
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
- inspect_ai/util/_anyio.py +31 -20
- inspect_ai/util/_json.py +20 -2
- inspect_ai/util/_sandbox/context.py +18 -7
- inspect_ai/util/_sandbox/docker/compose.py +1 -1
- inspect_ai/util/_sandbox/docker/docker.py +92 -21
- inspect_ai/util/_sandbox/environment.py +33 -2
- inspect_ai/util/_sandbox/events.py +2 -2
- inspect_ai/util/_sandbox/service.py +13 -3
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/METADATA +6 -2
- inspect_ai-0.3.91.dist-info/RECORD +732 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/App.tsx +0 -316
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
- inspect_ai-0.3.90.dist-info/RECORD +0 -705
- /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
- /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.91.dist-info}/top_level.txt +0 -0
inspect_ai/log/_file.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import os
|
2
2
|
import re
|
3
3
|
from logging import getLogger
|
4
|
+
from pathlib import Path
|
4
5
|
from typing import Any, Callable, Generator, Literal
|
5
6
|
|
6
7
|
from pydantic import BaseModel
|
@@ -97,7 +98,7 @@ def list_eval_logs(
|
|
97
98
|
|
98
99
|
def write_eval_log(
|
99
100
|
log: EvalLog,
|
100
|
-
location: str | FileInfo | None = None,
|
101
|
+
location: str | Path | FileInfo | None = None,
|
101
102
|
format: Literal["eval", "json", "auto"] = "auto",
|
102
103
|
) -> None:
|
103
104
|
"""Write an evaluation log.
|
@@ -121,7 +122,7 @@ def write_eval_log(
|
|
121
122
|
|
122
123
|
async def write_eval_log_async(
|
123
124
|
log: EvalLog,
|
124
|
-
location: str | FileInfo | None = None,
|
125
|
+
location: str | Path | FileInfo | None = None,
|
125
126
|
format: Literal["eval", "json", "auto"] = "auto",
|
126
127
|
) -> None:
|
127
128
|
"""Write an evaluation log.
|
@@ -140,7 +141,13 @@ async def write_eval_log_async(
|
|
140
141
|
raise ValueError(
|
141
142
|
"EvalLog passe to write_eval_log does not have a location, so you must pass an explicit location"
|
142
143
|
)
|
143
|
-
location =
|
144
|
+
location = (
|
145
|
+
location
|
146
|
+
if isinstance(location, str)
|
147
|
+
else location.as_posix()
|
148
|
+
if isinstance(location, Path)
|
149
|
+
else location.name
|
150
|
+
)
|
144
151
|
|
145
152
|
logger.debug(f"Writing eval log to {location}")
|
146
153
|
|
@@ -197,7 +204,7 @@ def write_log_dir_manifest(
|
|
197
204
|
|
198
205
|
|
199
206
|
def read_eval_log(
|
200
|
-
log_file: str | EvalLogInfo,
|
207
|
+
log_file: str | Path | EvalLogInfo,
|
201
208
|
header_only: bool = False,
|
202
209
|
resolve_attachments: bool = False,
|
203
210
|
format: Literal["eval", "json", "auto"] = "auto",
|
@@ -235,7 +242,7 @@ def read_eval_log(
|
|
235
242
|
|
236
243
|
|
237
244
|
async def read_eval_log_async(
|
238
|
-
log_file: str | EvalLogInfo,
|
245
|
+
log_file: str | Path | EvalLogInfo,
|
239
246
|
header_only: bool = False,
|
240
247
|
resolve_attachments: bool = False,
|
241
248
|
format: Literal["eval", "json", "auto"] = "auto",
|
@@ -255,7 +262,13 @@ async def read_eval_log_async(
|
|
255
262
|
EvalLog object read from file.
|
256
263
|
"""
|
257
264
|
# resolve to file path
|
258
|
-
log_file =
|
265
|
+
log_file = (
|
266
|
+
log_file
|
267
|
+
if isinstance(log_file, str)
|
268
|
+
else log_file.as_posix()
|
269
|
+
if isinstance(log_file, Path)
|
270
|
+
else log_file.name
|
271
|
+
)
|
259
272
|
logger.debug(f"Reading eval log from {log_file}")
|
260
273
|
|
261
274
|
# get recorder type
|
@@ -291,7 +304,7 @@ def read_eval_log_headers(
|
|
291
304
|
|
292
305
|
|
293
306
|
async def read_eval_log_headers_async(
|
294
|
-
log_files: list[str] | list[EvalLogInfo],
|
307
|
+
log_files: list[str] | list[Path] | list[EvalLogInfo],
|
295
308
|
) -> list[EvalLog]:
|
296
309
|
return [
|
297
310
|
await read_eval_log_async(log_file, header_only=True) for log_file in log_files
|
@@ -299,7 +312,7 @@ async def read_eval_log_headers_async(
|
|
299
312
|
|
300
313
|
|
301
314
|
def read_eval_log_sample(
|
302
|
-
log_file: str | EvalLogInfo,
|
315
|
+
log_file: str | Path | EvalLogInfo,
|
303
316
|
id: int | str,
|
304
317
|
epoch: int = 1,
|
305
318
|
resolve_attachments: bool = False,
|
@@ -336,7 +349,7 @@ def read_eval_log_sample(
|
|
336
349
|
|
337
350
|
|
338
351
|
async def read_eval_log_sample_async(
|
339
|
-
log_file: str | EvalLogInfo,
|
352
|
+
log_file: str | Path | EvalLogInfo,
|
340
353
|
id: int | str,
|
341
354
|
epoch: int = 1,
|
342
355
|
resolve_attachments: bool = False,
|
@@ -360,7 +373,13 @@ async def read_eval_log_sample_async(
|
|
360
373
|
IndexError: If the passed id and epoch are not found.
|
361
374
|
"""
|
362
375
|
# resolve to file path
|
363
|
-
log_file =
|
376
|
+
log_file = (
|
377
|
+
log_file
|
378
|
+
if isinstance(log_file, str)
|
379
|
+
else log_file.as_posix()
|
380
|
+
if isinstance(log_file, Path)
|
381
|
+
else log_file.name
|
382
|
+
)
|
364
383
|
|
365
384
|
if format == "auto":
|
366
385
|
recorder_type = recorder_type_for_location(log_file)
|
@@ -375,7 +394,7 @@ async def read_eval_log_sample_async(
|
|
375
394
|
|
376
395
|
|
377
396
|
def read_eval_log_samples(
|
378
|
-
log_file: str | EvalLogInfo,
|
397
|
+
log_file: str | Path | EvalLogInfo,
|
379
398
|
all_samples_required: bool = True,
|
380
399
|
resolve_attachments: bool = False,
|
381
400
|
format: Literal["eval", "json", "auto"] = "auto",
|
inspect_ai/log/_log.py
CHANGED
@@ -87,6 +87,9 @@ class EvalConfig(BaseModel):
|
|
87
87
|
of samples fails.
|
88
88
|
"""
|
89
89
|
|
90
|
+
retry_on_error: int | None = Field(default=None)
|
91
|
+
"""Number of times to retry samples if they encounter errors."""
|
92
|
+
|
90
93
|
message_limit: int | None = Field(default=None)
|
91
94
|
"""Maximum messages to allow per sample."""
|
92
95
|
|
@@ -255,6 +258,9 @@ class EvalSample(BaseModel):
|
|
255
258
|
error: EvalError | None = Field(default=None)
|
256
259
|
"""Error that halted sample."""
|
257
260
|
|
261
|
+
error_retries: list[EvalError] | None = Field(default=None)
|
262
|
+
"""Errors that were retried for this sample."""
|
263
|
+
|
258
264
|
attachments: dict[str, str] = Field(default_factory=dict)
|
259
265
|
"""Attachments referenced from messages and events.
|
260
266
|
|
@@ -703,7 +709,7 @@ def rich_traceback(
|
|
703
709
|
exc_value=exc_value,
|
704
710
|
traceback=exc_traceback,
|
705
711
|
suppress=[click, asyncio, tenacity, sys.modules[PKG_NAME]],
|
706
|
-
show_locals=
|
712
|
+
show_locals=os.environ.get("INSPECT_TRACEBACK_LOCALS", None) == "1",
|
707
713
|
width=CONSOLE_DISPLAY_WIDTH,
|
708
714
|
)
|
709
715
|
return rich_tb
|
@@ -20,6 +20,7 @@ class SampleSummary(BaseModel):
|
|
20
20
|
scores: dict[str, Score] | None = Field(default=None)
|
21
21
|
error: str | None = Field(default=None)
|
22
22
|
limit: str | None = Field(default=None)
|
23
|
+
retries: int | None = Field(default=None)
|
23
24
|
|
24
25
|
@model_validator(mode="after")
|
25
26
|
def thin_scores(self) -> "SampleSummary":
|
inspect_ai/log/_samples.py
CHANGED
@@ -18,6 +18,7 @@ class ActiveSample:
|
|
18
18
|
self,
|
19
19
|
*,
|
20
20
|
task: str,
|
21
|
+
log_location: str,
|
21
22
|
model: str,
|
22
23
|
sample: Sample,
|
23
24
|
epoch: int,
|
@@ -33,6 +34,7 @@ class ActiveSample:
|
|
33
34
|
self.started: float | None = None
|
34
35
|
self.completed: float | None = None
|
35
36
|
self.task = task
|
37
|
+
self.log_location = log_location
|
36
38
|
self.model = model
|
37
39
|
self.sample = sample
|
38
40
|
self.epoch = epoch
|
@@ -76,6 +78,7 @@ def init_active_samples() -> None:
|
|
76
78
|
async def active_sample(
|
77
79
|
*,
|
78
80
|
task: str,
|
81
|
+
log_location: str,
|
79
82
|
model: str,
|
80
83
|
sample: Sample,
|
81
84
|
epoch: int,
|
@@ -89,6 +92,7 @@ async def active_sample(
|
|
89
92
|
# create the sample
|
90
93
|
active = ActiveSample(
|
91
94
|
task=task,
|
95
|
+
log_location=log_location,
|
92
96
|
model=model,
|
93
97
|
sample=sample,
|
94
98
|
epoch=epoch,
|
inspect_ai/model/_call_tools.py
CHANGED
@@ -3,6 +3,7 @@ import json
|
|
3
3
|
import types
|
4
4
|
from copy import copy
|
5
5
|
from dataclasses import is_dataclass
|
6
|
+
from datetime import date, datetime, time
|
6
7
|
from logging import getLogger
|
7
8
|
from textwrap import dedent
|
8
9
|
from types import UnionType
|
@@ -13,6 +14,8 @@ from typing import (
|
|
13
14
|
List,
|
14
15
|
NamedTuple,
|
15
16
|
Optional,
|
17
|
+
Sequence,
|
18
|
+
Set,
|
16
19
|
Tuple,
|
17
20
|
Type,
|
18
21
|
Union,
|
@@ -45,7 +48,12 @@ from inspect_ai._util.working import sample_waiting_time
|
|
45
48
|
from inspect_ai.model._display import display_conversation_message
|
46
49
|
from inspect_ai.model._model_output import ModelOutput
|
47
50
|
from inspect_ai.tool import Tool, ToolCall, ToolError, ToolInfo
|
48
|
-
from inspect_ai.tool._tool import
|
51
|
+
from inspect_ai.tool._tool import (
|
52
|
+
ToolApprovalError,
|
53
|
+
ToolParsingError,
|
54
|
+
ToolResult,
|
55
|
+
ToolSource,
|
56
|
+
)
|
49
57
|
from inspect_ai.tool._tool_call import ToolCallContent, ToolCallError
|
50
58
|
from inspect_ai.tool._tool_def import ToolDef, tool_defs
|
51
59
|
from inspect_ai.tool._tool_info import parse_docstring
|
@@ -83,7 +91,7 @@ class ExecuteToolsResult(NamedTuple):
|
|
83
91
|
|
84
92
|
async def execute_tools(
|
85
93
|
messages: list[ChatMessage],
|
86
|
-
tools:
|
94
|
+
tools: Sequence[Tool | ToolDef | ToolSource] | ToolSource,
|
87
95
|
max_output: int | None = None,
|
88
96
|
) -> ExecuteToolsResult:
|
89
97
|
"""Perform tool calls in the last assistant message.
|
@@ -108,7 +116,7 @@ async def execute_tools(
|
|
108
116
|
transcript,
|
109
117
|
)
|
110
118
|
|
111
|
-
tdefs = tool_defs(tools)
|
119
|
+
tdefs = await tool_defs(tools)
|
112
120
|
|
113
121
|
async def call_tool_task(
|
114
122
|
call: ToolCall,
|
@@ -385,7 +393,6 @@ async def call_tool(
|
|
385
393
|
|
386
394
|
# normal tool call
|
387
395
|
else:
|
388
|
-
arguments = tool_params(call.arguments, tool_def.tool)
|
389
396
|
result: ToolResult = await tool_def.tool(**arguments)
|
390
397
|
return result, [], None, None
|
391
398
|
|
@@ -498,10 +505,7 @@ def prepend_agent_name(
|
|
498
505
|
|
499
506
|
|
500
507
|
def tools_info(
|
501
|
-
tools:
|
502
|
-
| list[ToolDef]
|
503
|
-
| list[ToolInfo]
|
504
|
-
| list[Tool | ToolDef | ToolInfo],
|
508
|
+
tools: Sequence[Tool | ToolDef | ToolInfo],
|
505
509
|
) -> list[ToolInfo]:
|
506
510
|
tools_info: list[ToolInfo] = []
|
507
511
|
for tool in tools:
|
@@ -521,16 +525,14 @@ def tools_info(
|
|
521
525
|
|
522
526
|
|
523
527
|
def disable_parallel_tools(
|
524
|
-
tools:
|
525
|
-
| list[ToolDef]
|
526
|
-
| list[ToolInfo]
|
527
|
-
| list[Tool | ToolDef | ToolInfo],
|
528
|
+
tools: Sequence[Tool | ToolDef | ToolInfo | ToolSource] | ToolSource,
|
528
529
|
) -> bool:
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
530
|
+
if not isinstance(tools, ToolSource):
|
531
|
+
for tool in tools:
|
532
|
+
if isinstance(tool, Tool):
|
533
|
+
tool = ToolDef(tool)
|
534
|
+
if isinstance(tool, ToolDef) and not tool.parallel:
|
535
|
+
return True
|
534
536
|
return False
|
535
537
|
|
536
538
|
|
@@ -598,6 +600,15 @@ def tool_param(type_hint: Type[Any], input: Any) -> Any:
|
|
598
600
|
raise ToolParsingError(
|
599
601
|
f"Unable to convert '{input}' to {type_hint.__name__}"
|
600
602
|
)
|
603
|
+
elif type_hint == datetime:
|
604
|
+
if input.endswith("Z"):
|
605
|
+
# convert trailing Z to +00:00
|
606
|
+
input = input[:-1] + "+00:00"
|
607
|
+
return datetime.fromisoformat(input)
|
608
|
+
elif type_hint == date:
|
609
|
+
return date.fromisoformat(input)
|
610
|
+
elif type_hint == time:
|
611
|
+
return time.fromisoformat(input)
|
601
612
|
elif is_typeddict(type_hint):
|
602
613
|
typeddict_data: dict[str, Any] = {}
|
603
614
|
annotations = get_type_hints(type_hint)
|
@@ -619,6 +630,11 @@ def tool_param(type_hint: Type[Any], input: Any) -> Any:
|
|
619
630
|
return [tool_param(args[0], x) for x in input]
|
620
631
|
else:
|
621
632
|
return input
|
633
|
+
elif origin is set or origin is Set:
|
634
|
+
if args:
|
635
|
+
return {tool_param(args[0], x) for x in input}
|
636
|
+
else:
|
637
|
+
return set(input)
|
622
638
|
elif origin is tuple or origin is Tuple:
|
623
639
|
if args:
|
624
640
|
return tuple([tool_param(args[0], x) for x in input])
|
@@ -29,7 +29,7 @@ class GenerateConfigArgs(TypedDict, total=False):
|
|
29
29
|
"""Type for kwargs that selectively override GenerateConfig."""
|
30
30
|
|
31
31
|
max_retries: int | None
|
32
|
-
"""Maximum number of times to retry request (defaults to
|
32
|
+
"""Maximum number of times to retry request (defaults to unlimited)."""
|
33
33
|
|
34
34
|
timeout: int | None
|
35
35
|
"""Request timeout (in seconds)."""
|
@@ -97,6 +97,9 @@ class GenerateConfigArgs(TypedDict, total=False):
|
|
97
97
|
reasoning_tokens: int | None
|
98
98
|
"""Maximum number of tokens to use for reasoning. Anthropic Claude models only."""
|
99
99
|
|
100
|
+
reasoning_summary: Literal["concise", "detailed", "auto"] | None
|
101
|
+
"""Provide summary of reasoning steps (defaults to no summary). Use 'auto' to access the most detailed summarizer available for the current model. OpenAI reasoning models only."""
|
102
|
+
|
100
103
|
reasoning_history: Literal["none", "all", "last", "auto"] | None
|
101
104
|
"""Include reasoning in chat message history sent to generate."""
|
102
105
|
|
@@ -108,7 +111,7 @@ class GenerateConfig(BaseModel):
|
|
108
111
|
"""Model generation options."""
|
109
112
|
|
110
113
|
max_retries: int | None = Field(default=None)
|
111
|
-
"""Maximum number of times to retry request (defaults to
|
114
|
+
"""Maximum number of times to retry request (defaults to unlimited)."""
|
112
115
|
|
113
116
|
timeout: int | None = Field(default=None)
|
114
117
|
"""Request timeout (in seconds)."""
|
@@ -176,6 +179,11 @@ class GenerateConfig(BaseModel):
|
|
176
179
|
reasoning_tokens: int | None = Field(default=None)
|
177
180
|
"""Maximum number of tokens to use for reasoning. Anthropic Claude models only."""
|
178
181
|
|
182
|
+
reasoning_summary: Literal["concise", "detailed", "auto"] | None = Field(
|
183
|
+
default=None
|
184
|
+
)
|
185
|
+
"""Provide summary of reasoning steps (defaults to no summary). Use 'auto' to access the most detailed summarizer available for the current model. OpenAI reasoning models only."""
|
186
|
+
|
179
187
|
reasoning_history: Literal["none", "all", "last", "auto"] | None = Field(
|
180
188
|
default=None
|
181
189
|
)
|
inspect_ai/model/_model.py
CHANGED
@@ -9,7 +9,15 @@ from contextvars import ContextVar
|
|
9
9
|
from copy import copy, deepcopy
|
10
10
|
from datetime import datetime
|
11
11
|
from types import TracebackType
|
12
|
-
from typing import
|
12
|
+
from typing import (
|
13
|
+
Any,
|
14
|
+
AsyncIterator,
|
15
|
+
Callable,
|
16
|
+
Literal,
|
17
|
+
Sequence,
|
18
|
+
Type,
|
19
|
+
cast,
|
20
|
+
)
|
13
21
|
|
14
22
|
from pydantic_core import to_jsonable_python
|
15
23
|
from tenacity import (
|
@@ -45,6 +53,7 @@ from inspect_ai._util.retry import report_http_retry
|
|
45
53
|
from inspect_ai._util.trace import trace_action
|
46
54
|
from inspect_ai._util.working import report_sample_waiting_time, sample_working_time
|
47
55
|
from inspect_ai.tool import Tool, ToolChoice, ToolFunction, ToolInfo
|
56
|
+
from inspect_ai.tool._tool import ToolSource
|
48
57
|
from inspect_ai.tool._tool_call import ToolCallModelInputHints
|
49
58
|
from inspect_ai.tool._tool_def import ToolDef, tool_defs
|
50
59
|
from inspect_ai.util import concurrency
|
@@ -54,7 +63,9 @@ from ._call_tools import (
|
|
54
63
|
disable_parallel_tools,
|
55
64
|
execute_tools,
|
56
65
|
tool_call_view,
|
57
|
-
|
66
|
+
)
|
67
|
+
from ._call_tools import (
|
68
|
+
tools_info as get_tools_info,
|
58
69
|
)
|
59
70
|
from ._chat_message import (
|
60
71
|
ChatMessage,
|
@@ -326,10 +337,7 @@ class Model:
|
|
326
337
|
async def generate(
|
327
338
|
self,
|
328
339
|
input: str | list[ChatMessage],
|
329
|
-
tools:
|
330
|
-
| list[ToolDef]
|
331
|
-
| list[ToolInfo]
|
332
|
-
| list[Tool | ToolDef | ToolInfo] = [],
|
340
|
+
tools: Sequence[Tool | ToolDef | ToolInfo | ToolSource] | ToolSource = [],
|
333
341
|
tool_choice: ToolChoice | None = None,
|
334
342
|
config: GenerateConfig = GenerateConfig(),
|
335
343
|
cache: bool | CachePolicy = False,
|
@@ -422,7 +430,7 @@ class Model:
|
|
422
430
|
async def generate_loop(
|
423
431
|
self,
|
424
432
|
input: str | list[ChatMessage],
|
425
|
-
tools:
|
433
|
+
tools: Sequence[Tool | ToolDef | ToolSource] | ToolSource = [],
|
426
434
|
config: GenerateConfig = GenerateConfig(),
|
427
435
|
cache: bool | CachePolicy = False,
|
428
436
|
) -> tuple[list[ChatMessage], ModelOutput]:
|
@@ -471,10 +479,7 @@ class Model:
|
|
471
479
|
async def _generate(
|
472
480
|
self,
|
473
481
|
input: list[ChatMessage],
|
474
|
-
tools:
|
475
|
-
| list[ToolDef]
|
476
|
-
| list[ToolInfo]
|
477
|
-
| list[Tool | ToolDef | ToolInfo],
|
482
|
+
tools: Sequence[Tool | ToolDef | ToolInfo | ToolSource] | ToolSource,
|
478
483
|
tool_choice: ToolChoice | None,
|
479
484
|
config: GenerateConfig,
|
480
485
|
cache: bool | CachePolicy = False,
|
@@ -482,15 +487,30 @@ class Model:
|
|
482
487
|
# default to 'auto' for tool_choice (same as underlying model apis)
|
483
488
|
tool_choice = tool_choice if tool_choice else "auto"
|
484
489
|
|
490
|
+
# resolve top level tool source
|
491
|
+
if isinstance(tools, ToolSource):
|
492
|
+
tools = await tools.tools()
|
493
|
+
|
494
|
+
# resolve tool sources
|
495
|
+
resolved_tools: list[Tool | ToolDef | ToolInfo] = []
|
496
|
+
for tool in tools:
|
497
|
+
if isinstance(tool, ToolSource):
|
498
|
+
source_tools = await tool.tools()
|
499
|
+
resolved_tools.extend(source_tools)
|
500
|
+
else:
|
501
|
+
resolved_tools.append(tool)
|
502
|
+
|
485
503
|
# extract tool defs if we can
|
486
|
-
tdefs = tool_defs(
|
504
|
+
tdefs = await tool_defs(
|
505
|
+
[tool for tool in resolved_tools if not isinstance(tool, ToolInfo)]
|
506
|
+
)
|
487
507
|
|
488
508
|
# resolve all tools into tool_info
|
489
|
-
|
509
|
+
tools_info = get_tools_info(resolved_tools)
|
490
510
|
|
491
511
|
# if we have a specific tool selected then filter out the others
|
492
512
|
if isinstance(tool_choice, ToolFunction):
|
493
|
-
|
513
|
+
tools_info = [tool for tool in tools_info if tool.name == tool_choice.name]
|
494
514
|
|
495
515
|
# if tool_choice is "none" or if there are no tools then fully purge
|
496
516
|
# the tools (as some models (e.g. openai and mistral) get confused
|
@@ -498,11 +518,11 @@ class Model:
|
|
498
518
|
# (they both 'semi' use the tool by placing the arguments in JSON
|
499
519
|
# in their output!). on the other hand, anthropic actually errors if
|
500
520
|
# there are tools anywhere in the message stream and no tools defined.
|
501
|
-
if tool_choice == "none" or len(
|
521
|
+
if tool_choice == "none" or len(tools_info) == 0:
|
502
522
|
# allow model providers to implement a tools_required() method to
|
503
523
|
# force tools to be passed (we need this for anthropic)
|
504
524
|
if not self.api.tools_required():
|
505
|
-
|
525
|
+
tools_info = []
|
506
526
|
tool_choice = "none"
|
507
527
|
|
508
528
|
# handle reasoning history
|
@@ -569,13 +589,13 @@ class Model:
|
|
569
589
|
model=str(self),
|
570
590
|
policy=policy,
|
571
591
|
tool_choice=tool_choice,
|
572
|
-
tools=
|
592
|
+
tools=tools_info,
|
573
593
|
)
|
574
594
|
existing = cache_fetch(cache_entry)
|
575
595
|
if isinstance(existing, ModelOutput):
|
576
596
|
self._record_model_interaction(
|
577
597
|
input=input,
|
578
|
-
tools=
|
598
|
+
tools=tools_info,
|
579
599
|
tool_choice=tool_choice,
|
580
600
|
config=config,
|
581
601
|
cache="read",
|
@@ -593,7 +613,7 @@ class Model:
|
|
593
613
|
# (we'll update it with the results once we have them)
|
594
614
|
complete = self._record_model_interaction(
|
595
615
|
input=input,
|
596
|
-
tools=
|
616
|
+
tools=tools_info,
|
597
617
|
tool_choice=tool_choice,
|
598
618
|
config=config,
|
599
619
|
cache="write" if cache else None,
|
@@ -604,7 +624,7 @@ class Model:
|
|
604
624
|
try:
|
605
625
|
result = await self.api.generate(
|
606
626
|
input=input,
|
607
|
-
tools=
|
627
|
+
tools=tools_info,
|
608
628
|
tool_choice=tool_choice,
|
609
629
|
config=config,
|
610
630
|
)
|
@@ -1371,7 +1391,7 @@ def combine_messages(
|
|
1371
1391
|
def log_model_retry(model_name: str, retry_state: RetryCallState) -> None:
|
1372
1392
|
logger.log(
|
1373
1393
|
HTTP,
|
1374
|
-
f"-> {model_name} retry {retry_state.attempt_number}
|
1394
|
+
f"-> {model_name} retry {retry_state.attempt_number} (retrying in {retry_state.upcoming_sleep:,.0f} seconds)",
|
1375
1395
|
)
|
1376
1396
|
|
1377
1397
|
|
@@ -3,6 +3,7 @@ from typing import Any, Literal, Type
|
|
3
3
|
|
4
4
|
from pydantic import BaseModel, Field, JsonValue, model_validator
|
5
5
|
|
6
|
+
from inspect_ai._util.content import Content
|
6
7
|
from inspect_ai.tool._tool_call import ToolCall
|
7
8
|
|
8
9
|
from ._chat_message import ChatMessageAssistant
|
@@ -165,7 +166,7 @@ class ModelOutput(BaseModel):
|
|
165
166
|
@staticmethod
|
166
167
|
def from_content(
|
167
168
|
model: str,
|
168
|
-
content: str,
|
169
|
+
content: str | list[Content],
|
169
170
|
stop_reason: StopReason = "stop",
|
170
171
|
error: str | None = None,
|
171
172
|
) -> "ModelOutput":
|
inspect_ai/model/_openai.py
CHANGED
@@ -82,16 +82,16 @@ def is_o_series(name: str) -> bool:
|
|
82
82
|
return not is_gpt(name) and bool(re.search(r"o\d+", name))
|
83
83
|
|
84
84
|
|
85
|
-
def
|
86
|
-
return "o1
|
85
|
+
def is_o1(name: str) -> bool:
|
86
|
+
return "o1" in name and not is_o1_early(name)
|
87
87
|
|
88
88
|
|
89
|
-
def
|
90
|
-
return "o1-mini" in name
|
89
|
+
def is_o1_early(name: str) -> bool:
|
90
|
+
return "o1-mini" in name or "o1-preview" in name
|
91
91
|
|
92
92
|
|
93
|
-
def
|
94
|
-
return "
|
93
|
+
def is_o3_mini(name: str) -> bool:
|
94
|
+
return "o3-mini" in name
|
95
95
|
|
96
96
|
|
97
97
|
def is_computer_use_preview(name: str) -> bool:
|
@@ -423,10 +423,12 @@ def chat_messages_from_openai(
|
|
423
423
|
"reasoning", None
|
424
424
|
)
|
425
425
|
if reasoning is not None:
|
426
|
+
# normalize content to an array
|
426
427
|
if isinstance(content, str):
|
427
428
|
content = [ContentText(text=content, refusal=refusal)]
|
428
|
-
|
429
|
-
|
429
|
+
|
430
|
+
# insert reasoning
|
431
|
+
content.insert(0, ContentReasoning(reasoning=str(reasoning)))
|
430
432
|
|
431
433
|
# return message
|
432
434
|
if "tool_calls" in message:
|