inspect-ai 0.3.89__py3-none-any.whl → 0.3.91__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +13 -0
- inspect_ai/_cli/eval.py +40 -0
- inspect_ai/_display/textual/widgets/samples.py +49 -4
- inspect_ai/_display/textual/widgets/vscode.py +4 -2
- inspect_ai/_eval/eval.py +41 -28
- inspect_ai/_eval/evalset.py +4 -0
- inspect_ai/_eval/loader.py +4 -5
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +6 -3
- inspect_ai/_eval/task/log.py +6 -0
- inspect_ai/_eval/task/run.py +108 -53
- inspect_ai/_eval/task/sandbox.py +19 -5
- inspect_ai/_util/_async.py +1 -1
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/environ.py +32 -0
- inspect_ai/_util/file.py +8 -1
- inspect_ai/_util/httpx.py +105 -22
- inspect_ai/_util/registry.py +83 -9
- inspect_ai/_util/text.py +81 -17
- inspect_ai/_util/transcript.py +9 -6
- inspect_ai/_util/vscode.py +7 -2
- inspect_ai/_view/schema.py +1 -1
- inspect_ai/_view/www/babel.config.js +11 -0
- inspect_ai/_view/www/dist/assets/index.css +3583 -3508
- inspect_ai/_view/www/dist/assets/index.js +59212 -52521
- inspect_ai/_view/www/eslint.config.mjs +10 -1
- inspect_ai/_view/www/jest.config.mjs +21 -0
- inspect_ai/_view/www/log-schema.json +111 -2
- inspect_ai/_view/www/package.json +19 -5
- inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
- inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
- inspect_ai/_view/www/src/app/App.tsx +168 -0
- inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
- inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
- inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
- inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
- inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
- inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
- inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
- inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
- inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
- inspect_ai/_view/www/src/app/routing/url.ts +43 -0
- inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
- inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +13 -5
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
- inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
- inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +22 -8
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -9
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
- inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
- inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
- inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
- inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
- inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
- inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
- inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
- inspect_ai/_view/www/src/components/Card.tsx +1 -1
- inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
- inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
- inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
- inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
- inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
- inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
- inspect_ai/_view/www/src/constants.ts +10 -9
- inspect_ai/_view/www/src/index.tsx +27 -11
- inspect_ai/_view/www/src/state/appSlice.ts +44 -5
- inspect_ai/_view/www/src/state/hooks.ts +30 -7
- inspect_ai/_view/www/src/state/logSlice.ts +7 -5
- inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
- inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
- inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
- inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
- inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
- inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
- inspect_ai/_view/www/src/state/store.ts +9 -7
- inspect_ai/_view/www/src/state/utils.ts +1 -1
- inspect_ai/_view/www/src/tests/README.md +49 -0
- inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
- inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
- inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
- inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
- inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
- inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
- inspect_ai/_view/www/src/utils/format.ts +8 -2
- inspect_ai/_view/www/src/utils/path.ts +14 -2
- inspect_ai/_view/www/src/utils/polling.ts +1 -2
- inspect_ai/_view/www/src/utils/uri.ts +32 -0
- inspect_ai/_view/www/yarn.lock +3310 -382
- inspect_ai/agent/_handoff.py +6 -3
- inspect_ai/agent/_human/agent.py +5 -3
- inspect_ai/agent/_human/install.py +16 -7
- inspect_ai/agent/_human/panel.py +14 -1
- inspect_ai/agent/_human/service.py +5 -1
- inspect_ai/agent/_react.py +161 -128
- inspect_ai/agent/_types.py +15 -4
- inspect_ai/approval/_policy.py +2 -2
- inspect_ai/log/_file.py +30 -11
- inspect_ai/log/_log.py +7 -1
- inspect_ai/log/_recorders/eval.py +3 -0
- inspect_ai/log/_recorders/types.py +1 -0
- inspect_ai/log/_samples.py +4 -0
- inspect_ai/model/_call_tools.py +33 -17
- inspect_ai/model/_generate_config.py +10 -2
- inspect_ai/model/_model.py +41 -21
- inspect_ai/model/_model_output.py +2 -1
- inspect_ai/model/_openai.py +10 -8
- inspect_ai/model/_openai_responses.py +83 -42
- inspect_ai/model/_providers/anthropic.py +14 -12
- inspect_ai/model/_providers/google.py +191 -95
- inspect_ai/model/_providers/hf.py +1 -1
- inspect_ai/model/_providers/mistral.py +2 -3
- inspect_ai/model/_providers/openai.py +54 -17
- inspect_ai/model/_providers/openai_o1.py +1 -1
- inspect_ai/model/_providers/openai_responses.py +28 -16
- inspect_ai/model/_providers/openrouter.py +14 -0
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/util/chatapi.py +17 -7
- inspect_ai/model/_providers/vllm.py +1 -1
- inspect_ai/scorer/_metric.py +17 -1
- inspect_ai/scorer/_model.py +51 -6
- inspect_ai/scorer/_scorer.py +1 -1
- inspect_ai/solver/_human_agent.py +3 -0
- inspect_ai/solver/_plan.py +1 -1
- inspect_ai/solver/_solver.py +1 -1
- inspect_ai/solver/_use_tools.py +14 -8
- inspect_ai/tool/__init__.py +16 -1
- inspect_ai/tool/_json_rpc_helpers.py +285 -0
- inspect_ai/tool/_mcp/__init__.py +13 -0
- inspect_ai/tool/_mcp/_context.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +293 -0
- inspect_ai/tool/_mcp/_sandbox.py +104 -0
- inspect_ai/tool/_mcp/_types.py +31 -0
- inspect_ai/tool/_mcp/connection.py +60 -0
- inspect_ai/tool/_mcp/sampling.py +118 -0
- inspect_ai/tool/_mcp/server.py +112 -0
- inspect_ai/tool/_mcp/tools.py +34 -0
- inspect_ai/tool/_tool.py +13 -0
- inspect_ai/tool/_tool_def.py +24 -7
- inspect_ai/tool/_tool_support_helpers.py +129 -153
- inspect_ai/tool/_tools/_bash_session.py +11 -11
- inspect_ai/tool/_tools/_text_editor.py +6 -6
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
- inspect_ai/util/_anyio.py +31 -20
- inspect_ai/util/_json.py +20 -2
- inspect_ai/util/_sandbox/context.py +18 -7
- inspect_ai/util/_sandbox/docker/compose.py +1 -1
- inspect_ai/util/_sandbox/docker/docker.py +92 -21
- inspect_ai/util/_sandbox/environment.py +33 -2
- inspect_ai/util/_sandbox/events.py +2 -2
- inspect_ai/util/_sandbox/service.py +13 -3
- {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/METADATA +6 -2
- inspect_ai-0.3.91.dist-info/RECORD +732 -0
- {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/App.tsx +0 -316
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
- inspect_ai-0.3.89.dist-info/RECORD +0 -705
- /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
- /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
- {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.89.dist-info → inspect_ai-0.3.91.dist-info}/top_level.txt +0 -0
@@ -5,25 +5,27 @@ import os
|
|
5
5
|
from copy import copy
|
6
6
|
from io import BytesIO
|
7
7
|
from logging import getLogger
|
8
|
-
from typing import Any
|
8
|
+
from typing import Any, cast
|
9
9
|
|
10
10
|
# SDK Docs: https://googleapis.github.io/python-genai/
|
11
11
|
import anyio
|
12
|
-
from google.genai import Client
|
13
|
-
from google.genai.errors import APIError, ClientError
|
14
|
-
from google.genai.types import (
|
12
|
+
from google.genai import Client
|
13
|
+
from google.genai.errors import APIError, ClientError
|
14
|
+
from google.genai.types import (
|
15
15
|
Candidate,
|
16
16
|
Content,
|
17
|
+
ContentListUnion,
|
18
|
+
ContentListUnionDict,
|
17
19
|
File,
|
18
20
|
FinishReason,
|
19
21
|
FunctionCallingConfig,
|
22
|
+
FunctionCallingConfigMode,
|
20
23
|
FunctionDeclaration,
|
21
24
|
FunctionResponse,
|
22
25
|
GenerateContentConfig,
|
23
26
|
GenerateContentResponse,
|
24
27
|
GenerateContentResponsePromptFeedback,
|
25
28
|
GenerateContentResponseUsageMetadata,
|
26
|
-
GenerationConfig,
|
27
29
|
HarmBlockThreshold,
|
28
30
|
HarmCategory,
|
29
31
|
HttpOptions,
|
@@ -31,8 +33,10 @@ from google.genai.types import ( # type: ignore
|
|
31
33
|
SafetySetting,
|
32
34
|
SafetySettingDict,
|
33
35
|
Schema,
|
36
|
+
ThinkingConfig,
|
34
37
|
Tool,
|
35
38
|
ToolConfig,
|
39
|
+
ToolListUnion,
|
36
40
|
Type,
|
37
41
|
)
|
38
42
|
from pydantic import JsonValue
|
@@ -89,13 +93,28 @@ GOOGLE_API_KEY = "GOOGLE_API_KEY"
|
|
89
93
|
VERTEX_API_KEY = "VERTEX_API_KEY"
|
90
94
|
|
91
95
|
SAFETY_SETTINGS = "safety_settings"
|
92
|
-
DEFAULT_SAFETY_SETTINGS =
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
96
|
+
DEFAULT_SAFETY_SETTINGS: list[SafetySettingDict] = [
|
97
|
+
{
|
98
|
+
"category": HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY,
|
99
|
+
"threshold": HarmBlockThreshold.BLOCK_NONE,
|
100
|
+
},
|
101
|
+
{
|
102
|
+
"category": HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
103
|
+
"threshold": HarmBlockThreshold.BLOCK_NONE,
|
104
|
+
},
|
105
|
+
{
|
106
|
+
"category": HarmCategory.HARM_CATEGORY_HARASSMENT,
|
107
|
+
"threshold": HarmBlockThreshold.BLOCK_NONE,
|
108
|
+
},
|
109
|
+
{
|
110
|
+
"category": HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
111
|
+
"threshold": HarmBlockThreshold.BLOCK_NONE,
|
112
|
+
},
|
113
|
+
{
|
114
|
+
"category": HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
115
|
+
"threshold": HarmBlockThreshold.BLOCK_NONE,
|
116
|
+
},
|
117
|
+
]
|
99
118
|
|
100
119
|
|
101
120
|
class GoogleGenAIAPI(ModelAPI):
|
@@ -105,6 +124,7 @@ class GoogleGenAIAPI(ModelAPI):
|
|
105
124
|
base_url: str | None,
|
106
125
|
api_key: str | None,
|
107
126
|
config: GenerateConfig = GenerateConfig(),
|
127
|
+
api_version: str | None = None,
|
108
128
|
**model_args: Any,
|
109
129
|
) -> None:
|
110
130
|
super().__init__(
|
@@ -115,12 +135,30 @@ class GoogleGenAIAPI(ModelAPI):
|
|
115
135
|
config=config,
|
116
136
|
)
|
117
137
|
|
138
|
+
# record api version
|
139
|
+
self.api_version = api_version
|
140
|
+
|
118
141
|
# pick out user-provided safety settings and merge against default
|
119
|
-
self.safety_settings = DEFAULT_SAFETY_SETTINGS.copy()
|
142
|
+
self.safety_settings: list[SafetySettingDict] = DEFAULT_SAFETY_SETTINGS.copy()
|
120
143
|
if SAFETY_SETTINGS in model_args:
|
121
|
-
|
122
|
-
|
144
|
+
|
145
|
+
def update_safety_setting(
|
146
|
+
category: HarmCategory, threshold: HarmBlockThreshold
|
147
|
+
) -> None:
|
148
|
+
for setting in self.safety_settings:
|
149
|
+
if setting["category"] == category:
|
150
|
+
setting["threshold"] = threshold
|
151
|
+
break
|
152
|
+
|
153
|
+
user_safety_settings = parse_safety_settings(
|
154
|
+
model_args.get(SAFETY_SETTINGS)
|
123
155
|
)
|
156
|
+
for safety_setting in user_safety_settings:
|
157
|
+
if safety_setting["category"] and safety_setting["threshold"]:
|
158
|
+
update_safety_setting(
|
159
|
+
safety_setting["category"], safety_setting["threshold"]
|
160
|
+
)
|
161
|
+
|
124
162
|
del model_args[SAFETY_SETTINGS]
|
125
163
|
|
126
164
|
# extract any service prefix from model name
|
@@ -196,7 +234,10 @@ class GoogleGenAIAPI(ModelAPI):
|
|
196
234
|
client = Client(
|
197
235
|
vertexai=self.is_vertex(),
|
198
236
|
api_key=self.api_key,
|
199
|
-
http_options={
|
237
|
+
http_options={
|
238
|
+
"base_url": self.base_url,
|
239
|
+
"api_version": self.api_version,
|
240
|
+
},
|
200
241
|
**self.model_args,
|
201
242
|
)
|
202
243
|
|
@@ -221,7 +262,8 @@ class GoogleGenAIAPI(ModelAPI):
|
|
221
262
|
safety_settings=safety_settings_to_list(self.safety_settings),
|
222
263
|
tools=gemini_tools,
|
223
264
|
tool_config=gemini_tool_config,
|
224
|
-
system_instruction=await extract_system_message_as_parts(client, input),
|
265
|
+
system_instruction=await extract_system_message_as_parts(client, input), # type: ignore[arg-type]
|
266
|
+
thinking_config=self.chat_thinking_config(config),
|
225
267
|
)
|
226
268
|
if config.response_schema is not None:
|
227
269
|
parameters.response_mime_type = "application/json"
|
@@ -233,7 +275,7 @@ class GoogleGenAIAPI(ModelAPI):
|
|
233
275
|
|
234
276
|
def model_call() -> ModelCall:
|
235
277
|
return build_model_call(
|
236
|
-
contents=gemini_contents,
|
278
|
+
contents=gemini_contents, # type: ignore[arg-type]
|
237
279
|
safety_settings=self.safety_settings,
|
238
280
|
generation_config=parameters,
|
239
281
|
tools=gemini_tools,
|
@@ -245,7 +287,7 @@ class GoogleGenAIAPI(ModelAPI):
|
|
245
287
|
try:
|
246
288
|
response = await client.aio.models.generate_content(
|
247
289
|
model=self.service_model_name(),
|
248
|
-
contents=gemini_contents,
|
290
|
+
contents=gemini_contents, # type: ignore[arg-type]
|
249
291
|
config=parameters,
|
250
292
|
)
|
251
293
|
except ClientError as ex:
|
@@ -264,6 +306,15 @@ class GoogleGenAIAPI(ModelAPI):
|
|
264
306
|
"""Model name without any service prefix."""
|
265
307
|
return self.model_name.replace(f"{self.service}/", "", 1)
|
266
308
|
|
309
|
+
def is_gemini(self) -> bool:
|
310
|
+
return "gemini-" in self.service_model_name()
|
311
|
+
|
312
|
+
def is_gemini_1_5(self) -> bool:
|
313
|
+
return "gemini-1.5" in self.service_model_name()
|
314
|
+
|
315
|
+
def is_gemini_2_0(self) -> bool:
|
316
|
+
return "gemini-2.0" in self.service_model_name()
|
317
|
+
|
267
318
|
@override
|
268
319
|
def should_retry(self, ex: Exception) -> bool:
|
269
320
|
if isinstance(ex, APIError) and ex.code is not None:
|
@@ -293,22 +344,35 @@ class GoogleGenAIAPI(ModelAPI):
|
|
293
344
|
else:
|
294
345
|
raise ex
|
295
346
|
|
347
|
+
def chat_thinking_config(self, config: GenerateConfig) -> ThinkingConfig | None:
|
348
|
+
# thinking_config is only supported for gemini 2.5 above
|
349
|
+
has_thinking_config = (
|
350
|
+
self.is_gemini() and not self.is_gemini_1_5() and not self.is_gemini_2_0()
|
351
|
+
)
|
352
|
+
if has_thinking_config:
|
353
|
+
return ThinkingConfig(
|
354
|
+
include_thoughts=True, thinking_budget=config.reasoning_tokens
|
355
|
+
)
|
356
|
+
else:
|
357
|
+
return None
|
358
|
+
|
296
359
|
|
297
|
-
def safety_settings_to_list(
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
360
|
+
def safety_settings_to_list(
|
361
|
+
safety_settings: list[SafetySettingDict],
|
362
|
+
) -> list[SafetySetting]:
|
363
|
+
settings: list[SafetySetting] = []
|
364
|
+
for setting in safety_settings:
|
365
|
+
settings.append(
|
366
|
+
SafetySetting(category=setting["category"], threshold=setting["threshold"])
|
302
367
|
)
|
303
|
-
|
304
|
-
]
|
368
|
+
return settings
|
305
369
|
|
306
370
|
|
307
371
|
def build_model_call(
|
308
|
-
contents:
|
309
|
-
generation_config:
|
310
|
-
safety_settings: SafetySettingDict,
|
311
|
-
tools:
|
372
|
+
contents: ContentListUnion | ContentListUnionDict,
|
373
|
+
generation_config: GenerateContentConfig,
|
374
|
+
safety_settings: list[SafetySettingDict],
|
375
|
+
tools: ToolListUnion | None,
|
312
376
|
tool_config: ToolConfig | None,
|
313
377
|
response: GenerateContentResponse | None,
|
314
378
|
time: float | None,
|
@@ -364,7 +428,7 @@ def consecutive_tool_message_reducer(
|
|
364
428
|
and messages[-1].role == "function"
|
365
429
|
):
|
366
430
|
messages[-1] = Content(
|
367
|
-
role="function", parts=messages[-1].parts + message.parts
|
431
|
+
role="function", parts=(messages[-1].parts or []) + (message.parts or [])
|
368
432
|
)
|
369
433
|
else:
|
370
434
|
messages.append(message)
|
@@ -443,14 +507,16 @@ async def chat_content_to_part(
|
|
443
507
|
return Part.from_bytes(mime_type=mime_type, data=content_bytes)
|
444
508
|
else:
|
445
509
|
file = await file_for_content(client, content)
|
510
|
+
if file.uri is None:
|
511
|
+
raise RuntimeError(f"Failed to get URI for file: {file.display_name}")
|
446
512
|
return Part.from_uri(file_uri=file.uri, mime_type=file.mime_type)
|
447
513
|
|
448
514
|
|
449
515
|
async def extract_system_message_as_parts(
|
450
516
|
client: Client,
|
451
517
|
messages: list[ChatMessage],
|
452
|
-
) -> list[Part] | None:
|
453
|
-
system_parts: list[Part] = []
|
518
|
+
) -> list[File | Part | str] | None:
|
519
|
+
system_parts: list[File | Part | str] = []
|
454
520
|
for message in messages:
|
455
521
|
if message.role == "system":
|
456
522
|
content = message.content
|
@@ -466,7 +532,7 @@ async def extract_system_message_as_parts(
|
|
466
532
|
return system_parts or None
|
467
533
|
|
468
534
|
|
469
|
-
def chat_tools(tools: list[ToolInfo]) ->
|
535
|
+
def chat_tools(tools: list[ToolInfo]) -> ToolListUnion:
|
470
536
|
declarations = [
|
471
537
|
FunctionDeclaration(
|
472
538
|
name=tool.name,
|
@@ -502,6 +568,27 @@ def schema_from_param(
|
|
502
568
|
type=Type.BOOLEAN, description=param.description, nullable=nullable
|
503
569
|
)
|
504
570
|
elif param.type == "string":
|
571
|
+
if param.format == "date-time":
|
572
|
+
return Schema(
|
573
|
+
type=Type.STRING,
|
574
|
+
description=param.description,
|
575
|
+
format="^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$",
|
576
|
+
nullable=nullable,
|
577
|
+
)
|
578
|
+
elif param.format == "date":
|
579
|
+
return Schema(
|
580
|
+
type=Type.STRING,
|
581
|
+
description=param.description,
|
582
|
+
format="^[0-9]{4}-[0-9]{2}-[0-9]{2}$",
|
583
|
+
nullable=nullable,
|
584
|
+
)
|
585
|
+
elif param.format == "time":
|
586
|
+
return Schema(
|
587
|
+
type=Type.STRING,
|
588
|
+
description=param.description,
|
589
|
+
format="^[0-9]{2}:[0-9]{2}:[0-9]{2}$",
|
590
|
+
nullable=nullable,
|
591
|
+
)
|
505
592
|
return Schema(
|
506
593
|
type=Type.STRING, description=param.description, nullable=nullable
|
507
594
|
)
|
@@ -538,12 +625,15 @@ def chat_tool_config(tool_choice: ToolChoice) -> ToolConfig:
|
|
538
625
|
if isinstance(tool_choice, ToolFunction):
|
539
626
|
return ToolConfig(
|
540
627
|
function_calling_config=FunctionCallingConfig(
|
541
|
-
mode=
|
628
|
+
mode=FunctionCallingConfigMode.ANY,
|
629
|
+
allowed_function_names=[tool_choice.name],
|
542
630
|
)
|
543
631
|
)
|
544
632
|
else:
|
545
633
|
return ToolConfig(
|
546
|
-
function_calling_config=FunctionCallingConfig(
|
634
|
+
function_calling_config=FunctionCallingConfig(
|
635
|
+
mode=cast(FunctionCallingConfigMode, tool_choice.upper())
|
636
|
+
)
|
547
637
|
)
|
548
638
|
|
549
639
|
|
@@ -552,51 +642,57 @@ def completion_choice_from_candidate(
|
|
552
642
|
) -> ChatCompletionChoice:
|
553
643
|
# content can be None when the finish_reason is SAFETY
|
554
644
|
if candidate.content is None:
|
555
|
-
content
|
645
|
+
content: (
|
646
|
+
str
|
647
|
+
| list[
|
648
|
+
ContentText
|
649
|
+
| ContentReasoning
|
650
|
+
| ContentImage
|
651
|
+
| ContentAudio
|
652
|
+
| ContentVideo
|
653
|
+
]
|
654
|
+
) = ""
|
556
655
|
# content.parts can be None when the finish_reason is MALFORMED_FUNCTION_CALL
|
557
656
|
elif candidate.content.parts is None:
|
558
657
|
content = ""
|
559
658
|
else:
|
560
|
-
content =
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
# split reasoning
|
569
|
-
reasoning, content = split_reasoning(content)
|
659
|
+
content = []
|
660
|
+
for part in candidate.content.parts:
|
661
|
+
if part.text is not None:
|
662
|
+
if part.thought is True:
|
663
|
+
content.append(ContentReasoning(reasoning=part.text))
|
664
|
+
else:
|
665
|
+
content.append(ContentText(text=part.text))
|
570
666
|
|
571
667
|
# now tool calls
|
572
668
|
tool_calls: list[ToolCall] = []
|
573
669
|
if candidate.content is not None and candidate.content.parts is not None:
|
574
670
|
for part in candidate.content.parts:
|
575
671
|
if part.function_call:
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
672
|
+
if (
|
673
|
+
part.function_call is not None
|
674
|
+
and part.function_call.name is not None
|
675
|
+
and part.function_call.args is not None
|
676
|
+
):
|
677
|
+
tool_calls.append(
|
678
|
+
ToolCall(
|
679
|
+
id=part.function_call.name,
|
680
|
+
function=part.function_call.name,
|
681
|
+
arguments=part.function_call.args,
|
682
|
+
)
|
581
683
|
)
|
582
|
-
|
684
|
+
else:
|
685
|
+
raise ValueError(f"Incomplete function call: {part.function_call}")
|
583
686
|
|
584
687
|
# stop reason
|
585
|
-
stop_reason = finish_reason_to_stop_reason(
|
586
|
-
|
587
|
-
|
588
|
-
if reasoning:
|
589
|
-
choice_content: str | list[Content] = [
|
590
|
-
ContentReasoning(reasoning=reasoning),
|
591
|
-
ContentText(text=content),
|
592
|
-
]
|
593
|
-
else:
|
594
|
-
choice_content = content
|
688
|
+
stop_reason = finish_reason_to_stop_reason(
|
689
|
+
candidate.finish_reason or FinishReason.STOP
|
690
|
+
)
|
595
691
|
|
596
692
|
# build choice
|
597
693
|
choice = ChatCompletionChoice(
|
598
694
|
message=ChatMessageAssistant(
|
599
|
-
content=
|
695
|
+
content=content,
|
600
696
|
tool_calls=tool_calls if len(tool_calls) > 0 else None,
|
601
697
|
model=model,
|
602
698
|
source="generate",
|
@@ -607,21 +703,27 @@ def completion_choice_from_candidate(
|
|
607
703
|
# add logprobs if provided
|
608
704
|
if candidate.logprobs_result:
|
609
705
|
logprobs: list[Logprob] = []
|
610
|
-
|
611
|
-
candidate.logprobs_result.chosen_candidates
|
612
|
-
candidate.logprobs_result.top_candidates
|
706
|
+
if (
|
707
|
+
candidate.logprobs_result.chosen_candidates
|
708
|
+
and candidate.logprobs_result.top_candidates
|
613
709
|
):
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
710
|
+
for chosen, top in zip(
|
711
|
+
candidate.logprobs_result.chosen_candidates,
|
712
|
+
candidate.logprobs_result.top_candidates,
|
713
|
+
):
|
714
|
+
if chosen.token and chosen.log_probability:
|
715
|
+
logprobs.append(
|
716
|
+
Logprob(
|
717
|
+
token=chosen.token,
|
718
|
+
logprob=chosen.log_probability,
|
719
|
+
top_logprobs=[
|
720
|
+
TopLogprob(token=c.token, logprob=c.log_probability)
|
721
|
+
for c in (top.candidates or [])
|
722
|
+
if c.token and c.log_probability
|
723
|
+
],
|
724
|
+
)
|
725
|
+
)
|
726
|
+
choice.logprobs = Logprobs(content=logprobs)
|
625
727
|
|
626
728
|
return choice
|
627
729
|
|
@@ -632,7 +734,7 @@ def completion_choices_from_candidates(
|
|
632
734
|
) -> list[ChatCompletionChoice]:
|
633
735
|
candidates = response.candidates
|
634
736
|
if candidates:
|
635
|
-
candidates_list = sorted(candidates, key=lambda c: c.index)
|
737
|
+
candidates_list = sorted(candidates, key=lambda c: c.index or 0)
|
636
738
|
return [
|
637
739
|
completion_choice_from_candidate(model, candidate)
|
638
740
|
for candidate in candidates_list
|
@@ -661,15 +763,6 @@ def completion_choices_from_candidates(
|
|
661
763
|
]
|
662
764
|
|
663
765
|
|
664
|
-
def split_reasoning(content: str) -> tuple[str | None, str]:
|
665
|
-
separator = "\nFinal Answer: "
|
666
|
-
if separator in content:
|
667
|
-
parts = content.split(separator, 1) # dplit only on first occurrence
|
668
|
-
return parts[0].strip(), separator.lstrip() + parts[1].strip()
|
669
|
-
else:
|
670
|
-
return None, content.strip()
|
671
|
-
|
672
|
-
|
673
766
|
def prompt_feedback_to_content(
|
674
767
|
feedback: GenerateContentResponsePromptFeedback,
|
675
768
|
) -> str:
|
@@ -687,7 +780,7 @@ def prompt_feedback_to_content(
|
|
687
780
|
|
688
781
|
|
689
782
|
def usage_metadata_to_model_usage(
|
690
|
-
metadata: GenerateContentResponseUsageMetadata,
|
783
|
+
metadata: GenerateContentResponseUsageMetadata | None,
|
691
784
|
) -> ModelUsage | None:
|
692
785
|
if metadata is None:
|
693
786
|
return None
|
@@ -695,6 +788,7 @@ def usage_metadata_to_model_usage(
|
|
695
788
|
input_tokens=metadata.prompt_token_count or 0,
|
696
789
|
output_tokens=metadata.candidates_token_count or 0,
|
697
790
|
total_tokens=metadata.total_token_count or 0,
|
791
|
+
reasoning_tokens=metadata.thoughts_token_count or 0,
|
698
792
|
)
|
699
793
|
|
700
794
|
|
@@ -720,14 +814,14 @@ def finish_reason_to_stop_reason(finish_reason: FinishReason) -> StopReason:
|
|
720
814
|
|
721
815
|
def parse_safety_settings(
|
722
816
|
safety_settings: Any,
|
723
|
-
) ->
|
817
|
+
) -> list[SafetySettingDict]:
|
724
818
|
# ensure we have a dict
|
725
819
|
if isinstance(safety_settings, str):
|
726
820
|
safety_settings = json.loads(safety_settings)
|
727
821
|
if not isinstance(safety_settings, dict):
|
728
822
|
raise ValueError(f"{SAFETY_SETTINGS} must be dictionary.")
|
729
823
|
|
730
|
-
parsed_settings:
|
824
|
+
parsed_settings: list[SafetySettingDict] = []
|
731
825
|
for key, value in safety_settings.items():
|
732
826
|
if not isinstance(key, str):
|
733
827
|
raise ValueError(f"Unexpected type for harm category: {key}")
|
@@ -735,7 +829,7 @@ def parse_safety_settings(
|
|
735
829
|
raise ValueError(f"Unexpected type for harm block threshold: {value}")
|
736
830
|
key = str_to_harm_category(key)
|
737
831
|
value = str_to_harm_block_threshold(value)
|
738
|
-
parsed_settings
|
832
|
+
parsed_settings.append({"category": key, "threshold": value})
|
739
833
|
return parsed_settings
|
740
834
|
|
741
835
|
|
@@ -795,6 +889,7 @@ async def file_for_content(
|
|
795
889
|
if uploaded_file:
|
796
890
|
try:
|
797
891
|
upload: File = client.files.get(name=uploaded_file)
|
892
|
+
assert upload.state
|
798
893
|
if upload.state.name == "ACTIVE":
|
799
894
|
trace(f"Using uploaded file: {uploaded_file}")
|
800
895
|
return upload
|
@@ -809,14 +904,15 @@ async def file_for_content(
|
|
809
904
|
upload = client.files.upload(
|
810
905
|
file=BytesIO(content_bytes), config=dict(mime_type=mime_type)
|
811
906
|
)
|
812
|
-
while upload.state.name == "PROCESSING":
|
907
|
+
while upload.state.name == "PROCESSING": # type: ignore[union-attr]
|
813
908
|
await anyio.sleep(3)
|
909
|
+
assert upload.name
|
814
910
|
upload = client.files.get(name=upload.name)
|
815
|
-
if upload.state.name == "FAILED":
|
911
|
+
if upload.state.name == "FAILED": # type: ignore[union-attr]
|
816
912
|
trace(f"Failed to upload file '{upload.name}: {upload.error}")
|
817
913
|
raise ValueError(f"Google file upload failed: {upload.error}")
|
818
914
|
# trace and record it
|
819
915
|
trace(f"Uploaded file: {upload.name}")
|
820
|
-
files_db.put(content_sha256, upload.name)
|
916
|
+
files_db.put(content_sha256, str(upload.name))
|
821
917
|
# return the file
|
822
918
|
return upload
|
@@ -347,7 +347,7 @@ def chat_completion_assistant_message(
|
|
347
347
|
|
348
348
|
def set_random_seeds(seed: int | None = None) -> None:
|
349
349
|
if seed is None:
|
350
|
-
seed = np.random.default_rng().integers(2**32 - 1)
|
350
|
+
seed = np.random.default_rng().integers(2**32 - 1) # type: ignore
|
351
351
|
# python hash seed
|
352
352
|
os.environ["PYTHONHASHSEED"] = str(seed)
|
353
353
|
# transformers seed
|
@@ -3,8 +3,6 @@ import json
|
|
3
3
|
import os
|
4
4
|
from typing import Any, Literal
|
5
5
|
|
6
|
-
from httpcore import ReadTimeout
|
7
|
-
from httpx import ReadTimeout as AsyncReadTimeout
|
8
6
|
from mistralai import (
|
9
7
|
ContentChunk,
|
10
8
|
DocumentURLChunk,
|
@@ -51,6 +49,7 @@ from inspect_ai._util.http import is_retryable_http_status
|
|
51
49
|
from inspect_ai._util.images import file_as_data_uri
|
52
50
|
from inspect_ai.tool import ToolCall, ToolChoice, ToolFunction, ToolInfo
|
53
51
|
|
52
|
+
from ..._util.httpx import httpx_should_retry
|
54
53
|
from .._call_tools import parse_tool_call
|
55
54
|
from .._chat_message import (
|
56
55
|
ChatMessage,
|
@@ -235,7 +234,7 @@ class MistralAPI(ModelAPI):
|
|
235
234
|
def should_retry(self, ex: Exception) -> bool:
|
236
235
|
if isinstance(ex, SDKError):
|
237
236
|
return is_retryable_http_status(ex.status_code)
|
238
|
-
elif
|
237
|
+
elif httpx_should_retry(ex):
|
239
238
|
return True
|
240
239
|
else:
|
241
240
|
return False
|