inspect-ai 0.3.89__py3-none-any.whl → 0.3.91__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +13 -0
- inspect_ai/_cli/eval.py +40 -0
- inspect_ai/_display/textual/widgets/samples.py +49 -4
- inspect_ai/_display/textual/widgets/vscode.py +4 -2
- inspect_ai/_eval/eval.py +41 -28
- inspect_ai/_eval/evalset.py +4 -0
- inspect_ai/_eval/loader.py +4 -5
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +6 -3
- inspect_ai/_eval/task/log.py +6 -0
- inspect_ai/_eval/task/run.py +108 -53
- inspect_ai/_eval/task/sandbox.py +19 -5
- inspect_ai/_util/_async.py +1 -1
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/environ.py +32 -0
- inspect_ai/_util/file.py +8 -1
- inspect_ai/_util/httpx.py +105 -22
- inspect_ai/_util/registry.py +83 -9
- inspect_ai/_util/text.py +81 -17
- inspect_ai/_util/transcript.py +9 -6
- inspect_ai/_util/vscode.py +7 -2
- inspect_ai/_view/schema.py +1 -1
- inspect_ai/_view/www/babel.config.js +11 -0
- inspect_ai/_view/www/dist/assets/index.css +3583 -3508
- inspect_ai/_view/www/dist/assets/index.js +59212 -52521
- inspect_ai/_view/www/eslint.config.mjs +10 -1
- inspect_ai/_view/www/jest.config.mjs +21 -0
- inspect_ai/_view/www/log-schema.json +111 -2
- inspect_ai/_view/www/package.json +19 -5
- inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
- inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
- inspect_ai/_view/www/src/app/App.tsx +168 -0
- inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
- inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
- inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
- inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
- inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
- inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
- inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
- inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
- inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
- inspect_ai/_view/www/src/app/routing/url.ts +43 -0
- inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
- inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +13 -5
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
- inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
- inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +22 -8
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -9
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
- inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
- inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
- inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
- inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
- inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
- inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
- inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
- inspect_ai/_view/www/src/components/Card.tsx +1 -1
- inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
- inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
- inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
- inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
- inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
- inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
- inspect_ai/_view/www/src/constants.ts +10 -9
- inspect_ai/_view/www/src/index.tsx +27 -11
- inspect_ai/_view/www/src/state/appSlice.ts +44 -5
- inspect_ai/_view/www/src/state/hooks.ts +30 -7
- inspect_ai/_view/www/src/state/logSlice.ts +7 -5
- inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
- inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
- inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
- inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
- inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
- inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
- inspect_ai/_view/www/src/state/store.ts +9 -7
- inspect_ai/_view/www/src/state/utils.ts +1 -1
- inspect_ai/_view/www/src/tests/README.md +49 -0
- inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
- inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
- inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
- inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
- inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
- inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
- inspect_ai/_view/www/src/utils/format.ts +8 -2
- inspect_ai/_view/www/src/utils/path.ts +14 -2
- inspect_ai/_view/www/src/utils/polling.ts +1 -2
- inspect_ai/_view/www/src/utils/uri.ts +32 -0
- inspect_ai/_view/www/yarn.lock +3310 -382
- inspect_ai/agent/_handoff.py +6 -3
- inspect_ai/agent/_human/agent.py +5 -3
- inspect_ai/agent/_human/install.py +16 -7
- inspect_ai/agent/_human/panel.py +14 -1
- inspect_ai/agent/_human/service.py +5 -1
- inspect_ai/agent/_react.py +161 -128
- inspect_ai/agent/_types.py +15 -4
- inspect_ai/approval/_policy.py +2 -2
- inspect_ai/log/_file.py +30 -11
- inspect_ai/log/_log.py +7 -1
- inspect_ai/log/_recorders/eval.py +3 -0
- inspect_ai/log/_recorders/types.py +1 -0
- inspect_ai/log/_samples.py +4 -0
- inspect_ai/model/_call_tools.py +33 -17
- inspect_ai/model/_generate_config.py +10 -2
- inspect_ai/model/_model.py +41 -21
- inspect_ai/model/_model_output.py +2 -1
- inspect_ai/model/_openai.py +10 -8
- inspect_ai/model/_openai_responses.py +83 -42
- inspect_ai/model/_providers/anthropic.py +14 -12
- inspect_ai/model/_providers/google.py +191 -95
- inspect_ai/model/_providers/hf.py +1 -1
- inspect_ai/model/_providers/mistral.py +2 -3
- inspect_ai/model/_providers/openai.py +54 -17
- inspect_ai/model/_providers/openai_o1.py +1 -1
- inspect_ai/model/_providers/openai_responses.py +28 -16
- inspect_ai/model/_providers/openrouter.py +14 -0
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/util/chatapi.py +17 -7
- inspect_ai/model/_providers/vllm.py +1 -1
- inspect_ai/scorer/_metric.py +17 -1
- inspect_ai/scorer/_model.py +51 -6
- inspect_ai/scorer/_scorer.py +1 -1
- inspect_ai/solver/_human_agent.py +3 -0
- inspect_ai/solver/_plan.py +1 -1
- inspect_ai/solver/_solver.py +1 -1
- inspect_ai/solver/_use_tools.py +14 -8
- inspect_ai/tool/__init__.py +16 -1
- inspect_ai/tool/_json_rpc_helpers.py +285 -0
- inspect_ai/tool/_mcp/__init__.py +13 -0
- inspect_ai/tool/_mcp/_context.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +293 -0
- inspect_ai/tool/_mcp/_sandbox.py +104 -0
- inspect_ai/tool/_mcp/_types.py +31 -0
- inspect_ai/tool/_mcp/connection.py +60 -0
- inspect_ai/tool/_mcp/sampling.py +118 -0
- inspect_ai/tool/_mcp/server.py +112 -0
- inspect_ai/tool/_mcp/tools.py +34 -0
- inspect_ai/tool/_tool.py +13 -0
- inspect_ai/tool/_tool_def.py +24 -7
- inspect_ai/tool/_tool_support_helpers.py +129 -153
- inspect_ai/tool/_tools/_bash_session.py +11 -11
- inspect_ai/tool/_tools/_text_editor.py +6 -6
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
- inspect_ai/util/_anyio.py +31 -20
- inspect_ai/util/_json.py +20 -2
- inspect_ai/util/_sandbox/context.py +18 -7
- inspect_ai/util/_sandbox/docker/compose.py +1 -1
- inspect_ai/util/_sandbox/docker/docker.py +92 -21
- inspect_ai/util/_sandbox/environment.py +33 -2
- inspect_ai/util/_sandbox/events.py +2 -2
- inspect_ai/util/_sandbox/service.py +13 -3
- {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/METADATA +6 -2
- inspect_ai-0.3.91.dist-info/RECORD +732 -0
- {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/App.tsx +0 -316
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
- inspect_ai-0.3.89.dist-info/RECORD +0 -705
- /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
- /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
- {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/top_level.txt +0 -0
@@ -56,17 +56,17 @@ from inspect_ai.tool._tool_info import ToolInfo
|
|
56
56
|
|
57
57
|
|
58
58
|
async def openai_responses_inputs(
|
59
|
-
messages: list[ChatMessage], model: str
|
59
|
+
messages: list[ChatMessage], model: str, store: bool
|
60
60
|
) -> list[ResponseInputItemParam]:
|
61
61
|
return [
|
62
62
|
item
|
63
63
|
for message in messages
|
64
|
-
for item in await _openai_input_item_from_chat_message(message, model)
|
64
|
+
for item in await _openai_input_item_from_chat_message(message, model, store)
|
65
65
|
]
|
66
66
|
|
67
67
|
|
68
68
|
async def _openai_input_item_from_chat_message(
|
69
|
-
message: ChatMessage, model: str
|
69
|
+
message: ChatMessage, model: str, store: bool
|
70
70
|
) -> list[ResponseInputItemParam]:
|
71
71
|
if message.role == "system":
|
72
72
|
content = await _openai_responses_content_list_param(message.content)
|
@@ -84,7 +84,7 @@ async def _openai_input_item_from_chat_message(
|
|
84
84
|
)
|
85
85
|
]
|
86
86
|
elif message.role == "assistant":
|
87
|
-
return _openai_input_items_from_chat_message_assistant(message)
|
87
|
+
return _openai_input_items_from_chat_message_assistant(message, store)
|
88
88
|
elif message.role == "tool":
|
89
89
|
if message.internal:
|
90
90
|
internal = _model_tool_call_for_internal(message.internal)
|
@@ -208,7 +208,7 @@ def openai_responses_chat_choices(
|
|
208
208
|
|
209
209
|
class _AssistantInternal(TypedDict):
|
210
210
|
output_message_id: str | None
|
211
|
-
|
211
|
+
tool_message_ids: dict[str, str]
|
212
212
|
|
213
213
|
|
214
214
|
def _chat_message_assistant_from_openai_response(
|
@@ -237,7 +237,7 @@ def _chat_message_assistant_from_openai_response(
|
|
237
237
|
# collect output and tool calls
|
238
238
|
message_content: list[Content] = []
|
239
239
|
tool_calls: list[ToolCall] = []
|
240
|
-
internal = _AssistantInternal(output_message_id=None,
|
240
|
+
internal = _AssistantInternal(output_message_id=None, tool_message_ids={})
|
241
241
|
for output in response.output:
|
242
242
|
match output:
|
243
243
|
case ResponseOutputMessage(content=content, id=id):
|
@@ -252,24 +252,28 @@ def _chat_message_assistant_from_openai_response(
|
|
252
252
|
]
|
253
253
|
)
|
254
254
|
case ResponseReasoningItem(summary=summary, id=id):
|
255
|
-
assert internal["reasoning_id"] is None, "Multiple reasoning items"
|
256
|
-
internal["reasoning_id"] = id
|
257
255
|
message_content.append(
|
258
|
-
ContentReasoning(
|
256
|
+
ContentReasoning(
|
257
|
+
reasoning="\n".join([s.text for s in summary]), signature=id
|
258
|
+
)
|
259
259
|
)
|
260
260
|
case _:
|
261
261
|
stop_reason = "tool_calls"
|
262
262
|
match output:
|
263
263
|
case ResponseFunctionToolCall():
|
264
|
+
if output.id is not None:
|
265
|
+
internal["tool_message_ids"][output.call_id] = output.id
|
264
266
|
tool_calls.append(
|
265
267
|
parse_tool_call(
|
266
268
|
output.call_id,
|
267
|
-
output.name,
|
269
|
+
_from_responses_tool_alias(output.name),
|
268
270
|
output.arguments,
|
269
271
|
tools,
|
270
272
|
)
|
271
273
|
)
|
272
274
|
case ResponseComputerToolCall():
|
275
|
+
if output.id is not None:
|
276
|
+
internal["tool_message_ids"][output.call_id] = output.id
|
273
277
|
tool_calls.append(
|
274
278
|
tool_call_from_openai_computer_tool_call(output)
|
275
279
|
)
|
@@ -290,7 +294,7 @@ def _chat_message_assistant_from_openai_response(
|
|
290
294
|
|
291
295
|
|
292
296
|
def _openai_input_items_from_chat_message_assistant(
|
293
|
-
message: ChatMessageAssistant,
|
297
|
+
message: ChatMessageAssistant, store: bool
|
294
298
|
) -> list[ResponseInputItemParam]:
|
295
299
|
"""
|
296
300
|
Transform a `ChatMessageAssistant` into OpenAI `ResponseInputItem`'s for playback to the model.
|
@@ -300,12 +304,17 @@ def _openai_input_items_from_chat_message_assistant(
|
|
300
304
|
field of the `ChatMessageAssistant` to help it provide the proper id's the
|
301
305
|
items in the returned list.
|
302
306
|
"""
|
303
|
-
|
304
|
-
# a single ReasoningItem for each Response/ChatMessageAssistant.
|
305
|
-
reasoning_item: ResponseReasoningItemParam | None = None
|
306
|
-
output_message: ResponseOutputMessageParam | None = None
|
307
|
+
(output_message_id, tool_message_ids) = _ids_from_assistant_internal(message)
|
307
308
|
|
308
|
-
|
309
|
+
# if we are not storing messages on the server then blank these out
|
310
|
+
if not store:
|
311
|
+
output_message_id = None
|
312
|
+
tool_message_ids = {}
|
313
|
+
|
314
|
+
# items to return -- ensure we use a single output message (and just chain
|
315
|
+
# additional content on to it)
|
316
|
+
items: list[ResponseInputItemParam] = []
|
317
|
+
output_message: ResponseOutputMessageParam | None = None
|
309
318
|
|
310
319
|
for content in (
|
311
320
|
list[ContentText | ContentReasoning]([ContentText(text=message.content)])
|
@@ -316,13 +325,21 @@ def _openai_input_items_from_chat_message_assistant(
|
|
316
325
|
):
|
317
326
|
match content:
|
318
327
|
case ContentReasoning(reasoning=reasoning):
|
319
|
-
assert
|
320
|
-
|
321
|
-
reasoning_item = ResponseReasoningItemParam(
|
322
|
-
type="reasoning",
|
323
|
-
id=reasoning_id,
|
324
|
-
summary=[Summary(type="summary_text", text=reasoning)],
|
328
|
+
assert content.signature is not None, (
|
329
|
+
"reasoning_id must be saved in signature"
|
325
330
|
)
|
331
|
+
# if items are not stored on the server then there is no
|
332
|
+
# sense appending the reasoning item as its just a pointer
|
333
|
+
if store:
|
334
|
+
items.append(
|
335
|
+
ResponseReasoningItemParam(
|
336
|
+
type="reasoning",
|
337
|
+
id=content.signature,
|
338
|
+
summary=[Summary(type="summary_text", text=reasoning)]
|
339
|
+
if reasoning
|
340
|
+
else [],
|
341
|
+
)
|
342
|
+
)
|
326
343
|
case ContentText(text=text, refusal=refusal):
|
327
344
|
new_content = (
|
328
345
|
ResponseOutputRefusalParam(type="refusal", refusal=text)
|
@@ -332,22 +349,23 @@ def _openai_input_items_from_chat_message_assistant(
|
|
332
349
|
)
|
333
350
|
)
|
334
351
|
if output_message is None:
|
335
|
-
assert output_message_id is not None, "Missing output message id"
|
336
352
|
output_message = ResponseOutputMessageParam(
|
337
353
|
type="message",
|
338
354
|
role="assistant",
|
339
|
-
|
355
|
+
# this actually can be `None`, and it will in fact be `None` when the
|
356
|
+
# assistant message is synthesized by the scaffold as opposed to being
|
357
|
+
# replayed from the model (or when store=False)
|
358
|
+
id=output_message_id, # type: ignore[typeddict-item]
|
340
359
|
content=[new_content],
|
341
360
|
status="completed",
|
342
361
|
)
|
362
|
+
items.append(output_message)
|
343
363
|
else:
|
344
364
|
output_message["content"] = chain(
|
345
365
|
output_message["content"], [new_content]
|
346
366
|
)
|
347
367
|
|
348
|
-
return
|
349
|
-
item for item in (reasoning_item, output_message) if item
|
350
|
-
] + _tool_call_items_from_assistant_message(message)
|
368
|
+
return items + _tool_call_items_from_assistant_message(message, tool_message_ids)
|
351
369
|
|
352
370
|
|
353
371
|
def _model_tool_call_for_internal(
|
@@ -380,7 +398,7 @@ def _maybe_native_tool_param(
|
|
380
398
|
|
381
399
|
|
382
400
|
def _tool_call_items_from_assistant_message(
|
383
|
-
message: ChatMessageAssistant,
|
401
|
+
message: ChatMessageAssistant, tool_message_ids: dict[str, str]
|
384
402
|
) -> list[ResponseInputItemParam]:
|
385
403
|
tool_calls: list[ResponseInputItemParam] = []
|
386
404
|
for call in message.tool_calls or []:
|
@@ -392,26 +410,36 @@ def _tool_call_items_from_assistant_message(
|
|
392
410
|
)
|
393
411
|
)
|
394
412
|
else:
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
)
|
413
|
+
# create param
|
414
|
+
tool_call_param: ResponseFunctionToolCallParam = dict(
|
415
|
+
type="function_call",
|
416
|
+
call_id=call.id,
|
417
|
+
name=_responses_tool_alias(call.function),
|
418
|
+
arguments=call.function,
|
402
419
|
)
|
403
420
|
|
421
|
+
# add id if available
|
422
|
+
tool_message_id = tool_message_ids.get(call.id, None)
|
423
|
+
if tool_message_id is not None:
|
424
|
+
tool_call_param["id"] = tool_message_id
|
425
|
+
|
426
|
+
# append the param
|
427
|
+
tool_calls.append(tool_call_param)
|
428
|
+
|
404
429
|
return tool_calls
|
405
430
|
|
406
431
|
|
407
432
|
def _ids_from_assistant_internal(
|
408
433
|
message: ChatMessageAssistant,
|
409
|
-
) -> tuple[str | None, str
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
434
|
+
) -> tuple[str | None, dict[str, str]]:
|
435
|
+
if message.internal is not None:
|
436
|
+
assert isinstance(message.internal, dict), (
|
437
|
+
"OpenAI ChatMessageAssistant internal must be an _AssistantInternal"
|
438
|
+
)
|
439
|
+
internal = cast(_AssistantInternal, message.internal)
|
440
|
+
return (internal["output_message_id"], internal["tool_message_ids"])
|
441
|
+
else:
|
442
|
+
return None, {}
|
415
443
|
|
416
444
|
|
417
445
|
_ResponseToolCallParam = (
|
@@ -430,8 +458,21 @@ def _tool_param_for_tool_info(
|
|
430
458
|
# standard tool implementation
|
431
459
|
return _maybe_native_tool_param(tool, config) or FunctionToolParam(
|
432
460
|
type="function",
|
433
|
-
name=tool.name,
|
461
|
+
name=_responses_tool_alias(tool.name),
|
434
462
|
description=tool.description,
|
435
463
|
parameters=tool.parameters.model_dump(exclude_none=True),
|
436
464
|
strict=False, # default parameters don't work in strict mode
|
437
465
|
)
|
466
|
+
|
467
|
+
|
468
|
+
# these functions enables us to 'escape' built in tool names like 'python'
|
469
|
+
|
470
|
+
_responses_tool_aliases = {"python": "python_exec"}
|
471
|
+
|
472
|
+
|
473
|
+
def _responses_tool_alias(name: str) -> str:
|
474
|
+
return _responses_tool_aliases.get(name, name)
|
475
|
+
|
476
|
+
|
477
|
+
def _from_responses_tool_alias(name: str) -> str:
|
478
|
+
return next((k for k, v in _responses_tool_aliases.items() if v == name), name)
|
@@ -5,8 +5,6 @@ from copy import copy
|
|
5
5
|
from logging import getLogger
|
6
6
|
from typing import Any, Literal, Optional, Tuple, cast
|
7
7
|
|
8
|
-
import httpcore
|
9
|
-
import httpx
|
10
8
|
from anthropic import (
|
11
9
|
APIConnectionError,
|
12
10
|
APIStatusError,
|
@@ -51,9 +49,11 @@ from inspect_ai._util.error import exception_message
|
|
51
49
|
from inspect_ai._util.http import is_retryable_http_status
|
52
50
|
from inspect_ai._util.images import file_as_data_uri
|
53
51
|
from inspect_ai._util.logger import warn_once
|
52
|
+
from inspect_ai._util.trace import trace_message
|
54
53
|
from inspect_ai._util.url import data_uri_mime_type, data_uri_to_base64
|
55
54
|
from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
|
56
55
|
|
56
|
+
from ..._util.httpx import httpx_should_retry
|
57
57
|
from .._chat_message import ChatMessage, ChatMessageAssistant, ChatMessageSystem
|
58
58
|
from .._generate_config import GenerateConfig
|
59
59
|
from .._model import ModelAPI
|
@@ -330,13 +330,9 @@ class AnthropicAPI(ModelAPI):
|
|
330
330
|
def should_retry(self, ex: Exception) -> bool:
|
331
331
|
if isinstance(ex, APIStatusError):
|
332
332
|
return is_retryable_http_status(ex.status_code)
|
333
|
-
elif
|
334
|
-
|
335
|
-
|
336
|
-
| APITimeoutError
|
337
|
-
| httpx.RemoteProtocolError
|
338
|
-
| httpcore.RemoteProtocolError,
|
339
|
-
):
|
333
|
+
elif httpx_should_retry(ex):
|
334
|
+
return True
|
335
|
+
elif isinstance(ex, APIConnectionError | APITimeoutError):
|
340
336
|
return True
|
341
337
|
else:
|
342
338
|
return False
|
@@ -944,9 +940,15 @@ async def count_tokens(
|
|
944
940
|
messages=[{"role": "user", "content": text}],
|
945
941
|
)
|
946
942
|
return response.input_tokens
|
947
|
-
except Exception as
|
948
|
-
|
949
|
-
|
943
|
+
except Exception as ex:
|
944
|
+
warn_once(
|
945
|
+
logger,
|
946
|
+
f"Unable to call count_tokens API for model {model} (falling back to estimated tokens)",
|
947
|
+
)
|
948
|
+
trace_message(
|
949
|
+
logger,
|
950
|
+
"Anthropic",
|
951
|
+
f"Unable to call count_tokens API for model {model} ({ex})",
|
950
952
|
)
|
951
953
|
words = text.split()
|
952
954
|
estimated_tokens = int(len(words) * 1.3)
|