inspect-ai 0.3.90__py3-none-any.whl → 0.3.92__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/_cli/common.py +13 -0
- inspect_ai/_cli/eval.py +44 -0
- inspect_ai/_display/textual/widgets/samples.py +49 -4
- inspect_ai/_display/textual/widgets/vscode.py +4 -2
- inspect_ai/_eval/eval.py +41 -28
- inspect_ai/_eval/evalset.py +4 -0
- inspect_ai/_eval/loader.py +4 -5
- inspect_ai/_eval/registry.py +1 -1
- inspect_ai/_eval/run.py +6 -3
- inspect_ai/_eval/task/log.py +6 -0
- inspect_ai/_eval/task/run.py +108 -41
- inspect_ai/_eval/task/sandbox.py +19 -5
- inspect_ai/_util/_async.py +1 -1
- inspect_ai/_util/constants.py +1 -0
- inspect_ai/_util/environ.py +32 -0
- inspect_ai/_util/file.py +8 -1
- inspect_ai/_util/httpx.py +105 -22
- inspect_ai/_util/registry.py +83 -9
- inspect_ai/_util/text.py +81 -17
- inspect_ai/_util/transcript.py +9 -6
- inspect_ai/_util/vscode.py +7 -2
- inspect_ai/_view/schema.py +1 -1
- inspect_ai/_view/www/babel.config.js +11 -0
- inspect_ai/_view/www/dist/assets/index.css +3640 -3563
- inspect_ai/_view/www/dist/assets/index.js +59204 -52519
- inspect_ai/_view/www/eslint.config.mjs +10 -1
- inspect_ai/_view/www/jest.config.mjs +21 -0
- inspect_ai/_view/www/log-schema.json +111 -2
- inspect_ai/_view/www/package.json +19 -5
- inspect_ai/_view/www/src/{types → @types}/log.d.ts +95 -32
- inspect_ai/_view/www/{App.css → src/app/App.css} +22 -14
- inspect_ai/_view/www/src/app/App.tsx +168 -0
- inspect_ai/_view/www/src/{AppErrorBoundary.tsx → app/AppErrorBoundary.tsx} +1 -1
- inspect_ai/_view/www/src/{appearance → app/appearance}/icons.ts +1 -0
- inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.tsx +5 -5
- inspect_ai/_view/www/src/{workspace/WorkSpaceView.tsx → app/log-view/LogView.tsx} +59 -40
- inspect_ai/_view/www/src/app/log-view/LogViewContainer.tsx +159 -0
- inspect_ai/_view/www/src/app/log-view/LogViewLayout.tsx +109 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.tsx +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.tsx +4 -4
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.tsx +6 -6
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.tsx +8 -8
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.tsx +35 -6
- inspect_ai/_view/www/src/app/log-view/tabs/JsonTab.tsx +136 -0
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/SamplesTab.tsx +82 -73
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/grouping.ts +3 -3
- inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/types.ts +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.tsx +1 -1
- inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.tsx +4 -4
- inspect_ai/_view/www/src/{plan → app/plan}/PlanCard.tsx +2 -2
- inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.tsx +5 -5
- inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.tsx +1 -1
- inspect_ai/_view/www/src/app/routing/AppRouter.tsx +58 -0
- inspect_ai/_view/www/src/app/routing/navigationHooks.ts +182 -0
- inspect_ai/_view/www/src/app/routing/url.ts +43 -0
- inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.tsx +11 -27
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDialog.tsx +36 -40
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/SampleDisplay.tsx +116 -49
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.module.css +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/SampleSummaryView.tsx +29 -26
- inspect_ai/_view/www/src/{samples → app/samples}/SamplesTools.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.module.css +5 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessage.tsx +12 -4
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRenderer.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.tsx +6 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatView.tsx +4 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.tsx +5 -3
- inspect_ai/_view/www/src/app/samples/chat/MessageContent.module.css +12 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContent.tsx +11 -10
- inspect_ai/_view/www/src/app/samples/chat/MessageContents.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/MessageContents.tsx +14 -8
- inspect_ai/_view/www/src/{samples → app/samples}/chat/messages.ts +2 -2
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolCallView.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolCallView.tsx +26 -27
- inspect_ai/_view/www/src/app/samples/chat/tools/ToolInput.module.css +19 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolInput.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.module.css +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolOutput.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.module.css +4 -0
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/ToolTitle.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/chat/tools/tool.ts +1 -1
- inspect_ai/_view/www/src/app/samples/chat/types.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/samplesDescriptor.tsx +38 -15
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/CategoricalScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/NumericScoreDescriptor.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/OtherScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ScoreDescriptor.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/descriptor/types.ts +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.module.css +2 -1
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.tsx +3 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.tsx +47 -33
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.module.css +16 -0
- inspect_ai/_view/www/src/{samples → app/samples}/list/SampleRow.tsx +47 -20
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/filters.ts +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.tsx +4 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/completions.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/language.ts +1 -0
- inspect_ai/_view/www/src/{samples → app/samples}/sampleDataAdapter.ts +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/sampleLimit.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.tsx +12 -11
- inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.tsx +6 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ApprovalEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ErrorEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/InputEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.tsx +3 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.module.css +13 -7
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ModelEventView.tsx +49 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.tsx +11 -9
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleLimitEventView.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.tsx +8 -6
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.tsx +4 -4
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/StepEventView.tsx +11 -3
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.module.css +8 -7
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptView.tsx +32 -114
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.module.css +6 -5
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/TranscriptVirtualListComponent.tsx +14 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.tsx +2 -2
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.tsx +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/utils.ts +1 -1
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenderers.tsx +23 -21
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventRenders.module.css +7 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.tsx +2 -2
- inspect_ai/_view/www/src/app/samples/transcript/transform/fixups.ts +142 -0
- inspect_ai/_view/www/src/app/samples/transcript/transform/treeify.ts +39 -0
- inspect_ai/_view/www/src/{samples → app/samples}/transcript/types.ts +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.tsx +1 -1
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.module.css +16 -0
- inspect_ai/_view/www/src/app/sidebar/LogDirectoryTitleView.tsx +70 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.module.css +8 -0
- inspect_ai/_view/www/src/{workspace → app}/sidebar/Sidebar.tsx +35 -17
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.tsx +1 -1
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.tsx +2 -2
- inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.tsx +2 -2
- inspect_ai/_view/www/src/{types.ts → app/types.ts} +18 -11
- inspect_ai/_view/www/src/{usage → app/usage}/ModelTokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.tsx +2 -2
- inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.tsx +1 -1
- inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.tsx +6 -6
- inspect_ai/_view/www/src/{api → client/api}/api-browser.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/api-http.ts +3 -3
- inspect_ai/_view/www/src/{api → client/api}/api-vscode.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/client-api.ts +6 -5
- inspect_ai/_view/www/src/{api → client/api}/index.ts +2 -2
- inspect_ai/_view/www/src/{api → client/api}/types.ts +4 -1
- inspect_ai/_view/www/src/{logfile → client/remote}/remoteLogFile.ts +3 -3
- inspect_ai/_view/www/src/{storage → client/storage}/index.ts +11 -5
- inspect_ai/_view/www/src/components/Card.tsx +1 -1
- inspect_ai/_view/www/src/components/CopyButton.tsx +1 -1
- inspect_ai/_view/www/src/components/DownloadButton.tsx +1 -1
- inspect_ai/_view/www/src/components/ErrorPanel.tsx +1 -1
- inspect_ai/_view/www/src/components/{ExpandablePanel.css → ExpandablePanel.module.css} +14 -11
- inspect_ai/_view/www/src/components/ExpandablePanel.tsx +16 -10
- inspect_ai/_view/www/src/components/FindBand.tsx +1 -1
- inspect_ai/_view/www/src/components/JsonPanel.css +2 -2
- inspect_ai/_view/www/src/components/LargeModal.tsx +12 -1
- inspect_ai/_view/www/src/components/LightboxCarousel.tsx +1 -1
- inspect_ai/_view/www/src/components/MarkdownDiv.tsx +3 -1
- inspect_ai/_view/www/src/components/MessageBand.tsx +1 -1
- inspect_ai/_view/www/src/components/NoContentsPanel.tsx +1 -1
- inspect_ai/_view/www/src/constants.ts +10 -9
- inspect_ai/_view/www/src/index.tsx +27 -11
- inspect_ai/_view/www/src/state/appSlice.ts +44 -5
- inspect_ai/_view/www/src/state/hooks.ts +30 -7
- inspect_ai/_view/www/src/state/logSlice.ts +7 -5
- inspect_ai/_view/www/src/state/logsPolling.ts +1 -1
- inspect_ai/_view/www/src/state/logsSlice.ts +18 -13
- inspect_ai/_view/www/src/state/samplePolling.ts +12 -12
- inspect_ai/_view/www/src/state/sampleSlice.ts +3 -5
- inspect_ai/_view/www/src/state/sampleUtils.ts +1 -1
- inspect_ai/_view/www/src/{scoring/utils.ts → state/scoring.ts} +2 -2
- inspect_ai/_view/www/src/state/store.ts +9 -7
- inspect_ai/_view/www/src/state/utils.ts +1 -1
- inspect_ai/_view/www/src/tests/README.md +49 -0
- inspect_ai/_view/www/src/tests/__mocks__/fileMock.js +1 -0
- inspect_ai/_view/www/src/tests/__mocks__/styleMock.js +1 -0
- inspect_ai/_view/www/src/tests/setupTests.mjs +1 -0
- inspect_ai/_view/www/src/tests/utils/base64.test.ts +23 -0
- inspect_ai/_view/www/src/tests/utils/format.test.ts +127 -0
- inspect_ai/_view/www/src/tests/utils/path.test.ts +54 -0
- inspect_ai/_view/www/src/utils/format.ts +8 -2
- inspect_ai/_view/www/src/utils/path.ts +14 -2
- inspect_ai/_view/www/src/utils/polling.ts +1 -2
- inspect_ai/_view/www/src/utils/uri.ts +32 -0
- inspect_ai/_view/www/yarn.lock +3310 -382
- inspect_ai/agent/_handoff.py +6 -3
- inspect_ai/agent/_human/agent.py +5 -3
- inspect_ai/agent/_human/install.py +16 -7
- inspect_ai/agent/_human/panel.py +14 -1
- inspect_ai/agent/_human/service.py +5 -1
- inspect_ai/agent/_react.py +161 -128
- inspect_ai/agent/_types.py +15 -4
- inspect_ai/approval/_policy.py +2 -2
- inspect_ai/log/_file.py +30 -11
- inspect_ai/log/_log.py +7 -1
- inspect_ai/log/_recorders/eval.py +3 -0
- inspect_ai/log/_recorders/types.py +1 -0
- inspect_ai/log/_samples.py +4 -0
- inspect_ai/model/_call_tools.py +33 -17
- inspect_ai/model/_generate_config.py +10 -2
- inspect_ai/model/_model.py +41 -21
- inspect_ai/model/_model_output.py +2 -1
- inspect_ai/model/_openai.py +10 -8
- inspect_ai/model/_openai_responses.py +95 -42
- inspect_ai/model/_providers/anthropic.py +14 -12
- inspect_ai/model/_providers/google.py +191 -95
- inspect_ai/model/_providers/hf.py +1 -1
- inspect_ai/model/_providers/mistral.py +2 -3
- inspect_ai/model/_providers/openai.py +54 -17
- inspect_ai/model/_providers/openai_o1.py +1 -1
- inspect_ai/model/_providers/openai_responses.py +28 -16
- inspect_ai/model/_providers/openrouter.py +14 -0
- inspect_ai/model/_providers/providers.py +2 -2
- inspect_ai/model/_providers/util/chatapi.py +17 -7
- inspect_ai/model/_providers/vllm.py +1 -1
- inspect_ai/scorer/_metric.py +17 -1
- inspect_ai/scorer/_model.py +51 -6
- inspect_ai/scorer/_scorer.py +1 -1
- inspect_ai/solver/_human_agent.py +3 -0
- inspect_ai/solver/_plan.py +1 -1
- inspect_ai/solver/_solver.py +1 -1
- inspect_ai/solver/_use_tools.py +14 -8
- inspect_ai/tool/__init__.py +16 -1
- inspect_ai/tool/_json_rpc_helpers.py +285 -0
- inspect_ai/tool/_mcp/__init__.py +13 -0
- inspect_ai/tool/_mcp/_context.py +14 -0
- inspect_ai/tool/_mcp/_mcp.py +293 -0
- inspect_ai/tool/_mcp/_sandbox.py +104 -0
- inspect_ai/tool/_mcp/_types.py +31 -0
- inspect_ai/tool/_mcp/connection.py +60 -0
- inspect_ai/tool/_mcp/sampling.py +118 -0
- inspect_ai/tool/_mcp/server.py +112 -0
- inspect_ai/tool/_mcp/tools.py +34 -0
- inspect_ai/tool/_tool.py +13 -0
- inspect_ai/tool/_tool_def.py +24 -7
- inspect_ai/tool/_tool_support_helpers.py +129 -153
- inspect_ai/tool/_tools/_bash_session.py +11 -11
- inspect_ai/tool/_tools/_text_editor.py +6 -6
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +8 -8
- inspect_ai/util/_anyio.py +31 -20
- inspect_ai/util/_json.py +20 -2
- inspect_ai/util/_sandbox/context.py +18 -7
- inspect_ai/util/_sandbox/docker/compose.py +1 -1
- inspect_ai/util/_sandbox/docker/docker.py +92 -21
- inspect_ai/util/_sandbox/environment.py +33 -2
- inspect_ai/util/_sandbox/events.py +2 -2
- inspect_ai/util/_sandbox/service.py +13 -3
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/METADATA +6 -2
- inspect_ai-0.3.92.dist-info/RECORD +732 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/WHEEL +1 -1
- inspect_ai/_view/www/src/App.tsx +0 -316
- inspect_ai/_view/www/src/samples/chat/MessageContent.module.css +0 -4
- inspect_ai/_view/www/src/samples/chat/MessageContents.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolCallView.module.css +0 -3
- inspect_ai/_view/www/src/samples/chat/tools/ToolInput.module.css +0 -14
- inspect_ai/_view/www/src/workspace/WorkSpace.tsx +0 -292
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.module.css +0 -5
- inspect_ai/_view/www/src/workspace/sidebar/LogDirectoryTitleView.tsx +0 -57
- inspect_ai/_view/www/src/workspace/tabs/JsonTab.tsx +0 -43
- inspect_ai-0.3.90.dist-info/RECORD +0 -705
- /inspect_ai/_view/www/src/{types → @types}/asciicinema-player.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/jsondiffpatch.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/markdown-it-katex.d.ts +0 -0
- /inspect_ai/_view/www/src/{types → @types}/prism.d.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/colors.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/fonts.ts +0 -0
- /inspect_ai/_view/www/src/{appearance → app/appearance}/styles.ts +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataGrid.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetaDataView.tsx +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/MetadataGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/RenderedContent.module.css +0 -0
- /inspect_ai/_view/www/src/{metadata → app/content}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace/WorkSpaceView.module.css → app/log-view/LogView.module.css} +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/error/TaskErrorPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ModelRolesView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/Navbar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/PrimaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ResultsPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/RunningStatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/ScoreGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/SecondaryBar.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/navbar/StatusPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/InfoTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/JsonTab.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/tabs/RunningNoSamples.tsx +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/types.ts +0 -0
- /inspect_ai/_view/www/src/{workspace → app/log-view}/utils.ts +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DatasetDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/DetailStep.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ModelCard.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/PlanDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/ScorerDetailView.tsx +0 -0
- /inspect_ai/_view/www/src/{plan → app/plan}/SolverDetailView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/InlineSampleDisplay.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatMessageRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/chat/ChatViewVirtualList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/BooleanScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/ObjectScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/descriptor/score/PassFailScoreDescriptor.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/FlatSampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/SampleErrorView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/error/error.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleFooter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleHeader.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleList.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/list/SampleSeparator.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/EpochFilter.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SelectScorer.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/SortFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/SampleFilter.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/sample-tools/sample-filter/tokenize.ts +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScores.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresGrid.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/scores/SampleScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/InfoEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/LoggerEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SampleInitEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SandboxEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ScoreEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/SubtaskEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/ToolEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNav.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventNavs.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventProgressPanel.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventRow.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventSection.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/event/EventTimingPanel.module.css +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateDiffView.tsx +0 -0
- /inspect_ai/_view/www/src/{samples → app/samples}/transcript/state/StateEventView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/EvalStatus.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarLogEntry.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoreView.module.css +0 -0
- /inspect_ai/_view/www/src/{workspace → app}/sidebar/SidebarScoresView.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/ModelUsagePanel.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/TokenTable.module.css +0 -0
- /inspect_ai/_view/www/src/{usage → app/usage}/UsageCard.module.css +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/api-shared.ts +0 -0
- /inspect_ai/_view/www/src/{api → client/api}/jsonrpc.ts +0 -0
- /inspect_ai/_view/www/src/{logfile → client/remote}/remoteZipFile.ts +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/licenses/LICENSE +0 -0
- {inspect_ai-0.3.90.dist-info → inspect_ai-0.3.92.dist-info}/top_level.txt +0 -0
inspect_ai/_eval/task/run.py
CHANGED
@@ -2,7 +2,7 @@ import contextlib
|
|
2
2
|
import functools
|
3
3
|
import sys
|
4
4
|
import time
|
5
|
-
from copy import deepcopy
|
5
|
+
from copy import copy, deepcopy
|
6
6
|
from dataclasses import dataclass, field
|
7
7
|
from datetime import datetime
|
8
8
|
from logging import getLogger
|
@@ -307,6 +307,7 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
|
|
307
307
|
functools.partial(
|
308
308
|
task_run_sample,
|
309
309
|
task_name=task.name,
|
310
|
+
log_location=profile.log_location,
|
310
311
|
sample=sample,
|
311
312
|
state=state,
|
312
313
|
sandbox=sandbox,
|
@@ -325,6 +326,8 @@ async def task_run(options: TaskRunOptions) -> EvalLog:
|
|
325
326
|
config.fail_on_error is None
|
326
327
|
or config.fail_on_error is True
|
327
328
|
),
|
329
|
+
retry_on_error=config.retry_on_error or 0,
|
330
|
+
error_retries=[],
|
328
331
|
time_limit=config.time_limit,
|
329
332
|
working_limit=config.working_limit,
|
330
333
|
semaphore=sample_semaphore,
|
@@ -484,7 +487,9 @@ def update_metrics_display_fn(
|
|
484
487
|
|
485
488
|
|
486
489
|
async def task_run_sample(
|
490
|
+
*,
|
487
491
|
task_name: str,
|
492
|
+
log_location: str,
|
488
493
|
sample: Sample,
|
489
494
|
state: TaskState,
|
490
495
|
sandbox: SandboxEnvironmentSpec | None,
|
@@ -500,6 +505,8 @@ async def task_run_sample(
|
|
500
505
|
sample_error: SampleErrorHandler,
|
501
506
|
sample_complete: Callable[[dict[str, SampleScore]], None],
|
502
507
|
fails_on_error: bool,
|
508
|
+
retry_on_error: int,
|
509
|
+
error_retries: list[EvalError],
|
503
510
|
time_limit: int | None,
|
504
511
|
working_limit: int | None,
|
505
512
|
semaphore: anyio.Semaphore | None,
|
@@ -531,6 +538,9 @@ async def task_run_sample(
|
|
531
538
|
sample_complete(sample_scores)
|
532
539
|
return sample_scores
|
533
540
|
|
541
|
+
# copy variables that we may pass back to ourselves on a retry
|
542
|
+
initial_state = deepcopy(state)
|
543
|
+
|
534
544
|
# use semaphore if provided
|
535
545
|
semaphore_cm: anyio.Semaphore | contextlib.AbstractAsyncContextManager[None] = (
|
536
546
|
semaphore if semaphore else contextlib.nullcontext()
|
@@ -561,20 +571,31 @@ async def task_run_sample(
|
|
561
571
|
|
562
572
|
# helper to handle exceptions (will throw if we've exceeded the limit)
|
563
573
|
def handle_error(ex: BaseException) -> tuple[EvalError, BaseException | None]:
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
f"Sample
|
569
|
-
)
|
570
|
-
|
571
|
-
return
|
574
|
+
# helper to log sample error
|
575
|
+
def log_sample_error() -> None:
|
576
|
+
msg = f"Sample error (id: {sample.id}, epoch: {state.epoch}): {exception_message(ex)})"
|
577
|
+
if retry_on_error > 0:
|
578
|
+
msg = f"{msg}. Sample will be retried."
|
579
|
+
py_logger.warning(msg)
|
580
|
+
|
581
|
+
# if we have retries left then return EvalError
|
582
|
+
if retry_on_error > 0:
|
583
|
+
log_sample_error()
|
584
|
+
return eval_error(ex, type(ex), ex, ex.__traceback__), None
|
585
|
+
else:
|
586
|
+
err = sample_error(ex)
|
587
|
+
# if we aren't raising the error then print a warning
|
588
|
+
if err[1] is None:
|
589
|
+
log_sample_error()
|
590
|
+
transcript()._event(ErrorEvent(error=err[0]))
|
591
|
+
return err
|
572
592
|
|
573
593
|
# solver loop
|
574
594
|
async with (
|
575
595
|
semaphore_cm,
|
576
596
|
active_sample(
|
577
597
|
task=task_name,
|
598
|
+
log_location=log_location,
|
578
599
|
model=str(state.model),
|
579
600
|
sample=sample,
|
580
601
|
epoch=state.epoch,
|
@@ -582,7 +603,7 @@ async def task_run_sample(
|
|
582
603
|
token_limit=state.token_limit,
|
583
604
|
time_limit=time_limit,
|
584
605
|
working_limit=working_limit,
|
585
|
-
fails_on_error=fails_on_error,
|
606
|
+
fails_on_error=fails_on_error or (retry_on_error > 0),
|
586
607
|
transcript=sample_transcript,
|
587
608
|
) as active,
|
588
609
|
):
|
@@ -606,7 +627,7 @@ async def task_run_sample(
|
|
606
627
|
|
607
628
|
async with sandboxenv_cm:
|
608
629
|
timeout_cm: (
|
609
|
-
contextlib._GeneratorContextManager[anyio.CancelScope
|
630
|
+
contextlib._GeneratorContextManager[anyio.CancelScope]
|
610
631
|
| contextlib.nullcontext[None]
|
611
632
|
) = contextlib.nullcontext()
|
612
633
|
try:
|
@@ -791,40 +812,84 @@ async def task_run_sample(
|
|
791
812
|
except Exception as ex:
|
792
813
|
error, raise_error = handle_error(ex)
|
793
814
|
|
794
|
-
# complete the sample
|
795
|
-
|
815
|
+
# complete the sample if there is no error or if there is no retry_on_error in play
|
816
|
+
if not error or (retry_on_error == 0):
|
817
|
+
progress(SAMPLE_TOTAL_PROGRESS_UNITS)
|
818
|
+
|
819
|
+
# log it
|
820
|
+
if logger is not None:
|
821
|
+
# if we are logging images then be sure to base64 images injected by solvers
|
822
|
+
if log_images:
|
823
|
+
state = (await states_with_base64_content([state]))[0]
|
824
|
+
|
825
|
+
# otherwise ensure there are no base64 images in sample or messages
|
826
|
+
else:
|
827
|
+
sample = sample_without_base64_content(sample)
|
828
|
+
state = state_without_base64_content(state)
|
829
|
+
|
830
|
+
# log the sample
|
831
|
+
await log_sample(
|
832
|
+
start_time=start_time,
|
833
|
+
logger=logger,
|
834
|
+
sample=sample,
|
835
|
+
state=state,
|
836
|
+
scores=results,
|
837
|
+
error=error,
|
838
|
+
error_retries=error_retries,
|
839
|
+
log_images=log_images,
|
840
|
+
)
|
796
841
|
|
797
|
-
|
842
|
+
# error that should be retried (we do this outside of the above scope so that we can
|
843
|
+
# retry outside of the original semaphore -- our retry will therefore go to the back
|
844
|
+
# of the sample queue)
|
845
|
+
if error and retry_on_error > 0:
|
846
|
+
# remove any buffered sample events
|
798
847
|
if logger is not None:
|
799
|
-
|
800
|
-
if log_images:
|
801
|
-
state = (await states_with_base64_content([state]))[0]
|
848
|
+
logger.remove_sample(state.sample_id, state.epoch)
|
802
849
|
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
#
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
850
|
+
# recurse w/ tick down of retry_on_error and append of error to error_retries
|
851
|
+
return await task_run_sample(
|
852
|
+
task_name=task_name,
|
853
|
+
log_location=log_location,
|
854
|
+
sample=sample,
|
855
|
+
# state was deep copied at the outset
|
856
|
+
state=initial_state,
|
857
|
+
sandbox=sandbox,
|
858
|
+
max_sandboxes=max_sandboxes,
|
859
|
+
sandbox_cleanup=sandbox_cleanup,
|
860
|
+
plan=plan,
|
861
|
+
scorers=scorers,
|
862
|
+
generate=generate,
|
863
|
+
progress=progress,
|
864
|
+
logger=logger,
|
865
|
+
log_images=log_images,
|
866
|
+
sample_source=sample_source,
|
867
|
+
sample_error=sample_error,
|
868
|
+
sample_complete=sample_complete,
|
869
|
+
fails_on_error=fails_on_error,
|
870
|
+
# tick retry count down
|
871
|
+
retry_on_error=retry_on_error - 1,
|
872
|
+
# forward on error that caused retry
|
873
|
+
error_retries=copy(error_retries) + [error],
|
874
|
+
time_limit=time_limit,
|
875
|
+
working_limit=working_limit,
|
876
|
+
semaphore=semaphore,
|
877
|
+
)
|
818
878
|
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
879
|
+
# no error
|
880
|
+
elif error is None:
|
881
|
+
# call sample_complete callback if we have score results
|
882
|
+
if results is not None:
|
883
|
+
sample_complete(results)
|
884
|
+
return results
|
885
|
+
|
886
|
+
# we have an error and should raise it
|
887
|
+
elif raise_error is not None:
|
888
|
+
raise raise_error
|
889
|
+
|
890
|
+
# we have an error and should not raise it
|
891
|
+
else:
|
892
|
+
return None
|
828
893
|
|
829
894
|
|
830
895
|
async def log_sample(
|
@@ -834,6 +899,7 @@ async def log_sample(
|
|
834
899
|
state: TaskState,
|
835
900
|
scores: dict[str, SampleScore],
|
836
901
|
error: EvalError | None,
|
902
|
+
error_retries: list[EvalError],
|
837
903
|
log_images: bool,
|
838
904
|
) -> None:
|
839
905
|
# sample must have id to be logged
|
@@ -879,6 +945,7 @@ async def log_sample(
|
|
879
945
|
if total_time is not None
|
880
946
|
else None,
|
881
947
|
error=error,
|
948
|
+
error_retries=error_retries,
|
882
949
|
limit=limit,
|
883
950
|
)
|
884
951
|
|
inspect_ai/_eval/task/sandbox.py
CHANGED
@@ -17,6 +17,7 @@ from inspect_ai._eval.task.task import Task
|
|
17
17
|
from inspect_ai._eval.task.util import task_run_dir
|
18
18
|
from inspect_ai._util.file import file, filesystem
|
19
19
|
from inspect_ai._util.httpx import httpx_should_retry, log_httpx_retry_attempt
|
20
|
+
from inspect_ai._util.path import chdir
|
20
21
|
from inspect_ai._util.registry import registry_unqualified_name
|
21
22
|
from inspect_ai._util.url import data_uri_to_base64, is_data_uri, is_http_url
|
22
23
|
from inspect_ai.dataset import Sample
|
@@ -29,6 +30,7 @@ from inspect_ai.util._sandbox.environment import (
|
|
29
30
|
SandboxEnvironment,
|
30
31
|
SandboxEnvironmentConfigType,
|
31
32
|
SandboxEnvironmentSpec,
|
33
|
+
TaskInitEnvironment,
|
32
34
|
)
|
33
35
|
from inspect_ai.util._sandbox.registry import registry_find_sandboxenv
|
34
36
|
|
@@ -42,7 +44,7 @@ async def sandboxenv_context(
|
|
42
44
|
sample: Sample,
|
43
45
|
) -> AsyncGenerator[None, None]:
|
44
46
|
# resolve sandbox
|
45
|
-
sandbox = resolve_sandbox(sandbox, sample)
|
47
|
+
sandbox = await resolve_sandbox(sandbox, sample)
|
46
48
|
if not sandbox:
|
47
49
|
raise ValueError("sandboxenv_context called with no sandbox specified")
|
48
50
|
|
@@ -143,22 +145,34 @@ async def read_sandboxenv_file(contents: str) -> bytes:
|
|
143
145
|
class TaskSandboxEnvironment(NamedTuple):
|
144
146
|
sandbox: SandboxEnvironmentSpec
|
145
147
|
run_dir: str
|
148
|
+
env: tuple[tuple[str, str], ...]
|
146
149
|
|
147
150
|
|
148
|
-
def
|
151
|
+
async def resolve_sandbox_for_task_and_sample(
|
149
152
|
eval_sandbox: SandboxEnvironmentSpec | None,
|
150
153
|
task: Task,
|
151
154
|
sample: Sample,
|
152
155
|
) -> TaskSandboxEnvironment | None:
|
153
156
|
# eval_sandbox overrides task or sample sandbox
|
154
|
-
sandbox = eval_sandbox or resolve_sandbox(task.sandbox, sample)
|
157
|
+
sandbox = eval_sandbox or await resolve_sandbox(task.sandbox, sample)
|
155
158
|
if sandbox is not None:
|
156
|
-
|
159
|
+
# see if there are environment variables required for init of this sample
|
160
|
+
run_dir = task_run_dir(task)
|
161
|
+
with chdir(run_dir):
|
162
|
+
sandboxenv_type = registry_find_sandboxenv(sandbox.type)
|
163
|
+
task_init_environment = cast(
|
164
|
+
TaskInitEnvironment, getattr(sandboxenv_type, "task_init_environment")
|
165
|
+
)
|
166
|
+
env = await task_init_environment(sandbox.config, sample.metadata or {})
|
167
|
+
|
168
|
+
return TaskSandboxEnvironment(
|
169
|
+
sandbox=sandbox, run_dir=run_dir, env=tuple(sorted(env.items()))
|
170
|
+
)
|
157
171
|
else:
|
158
172
|
return None
|
159
173
|
|
160
174
|
|
161
|
-
def resolve_sandbox(
|
175
|
+
async def resolve_sandbox(
|
162
176
|
sandbox: SandboxEnvironmentSpec | None,
|
163
177
|
sample: Sample,
|
164
178
|
) -> SandboxEnvironmentSpec | None:
|
inspect_ai/_util/_async.py
CHANGED
inspect_ai/_util/constants.py
CHANGED
@@ -26,6 +26,7 @@ ALL_LOG_LEVELS = [
|
|
26
26
|
DEFAULT_LOG_LEVEL = "warning"
|
27
27
|
DEFAULT_LOG_LEVEL_TRANSCRIPT = "info"
|
28
28
|
DEFAULT_LOG_SHARED = 10
|
29
|
+
DEFAULT_RETRY_ON_ERROR = 1
|
29
30
|
ALL_LOG_FORMATS = ["eval", "json"]
|
30
31
|
DEFAULT_LOG_FORMAT: Literal["eval", "json"] = "eval"
|
31
32
|
JSON_LOG_FORMAT = "json"
|
inspect_ai/_util/environ.py
CHANGED
@@ -24,3 +24,35 @@ def environ_var(name: str, value: str) -> Iterator[None]:
|
|
24
24
|
os.environ.pop(name, None)
|
25
25
|
else:
|
26
26
|
os.environ[name] = previous_value
|
27
|
+
|
28
|
+
|
29
|
+
@contextmanager
|
30
|
+
def environ_vars(env_vars: dict[str, str]) -> Iterator[None]:
|
31
|
+
"""
|
32
|
+
Temporarily set multiple environment variables within a context.
|
33
|
+
|
34
|
+
Args:
|
35
|
+
env_vars: Dictionary mapping environment variable names to values
|
36
|
+
|
37
|
+
Yields:
|
38
|
+
None
|
39
|
+
"""
|
40
|
+
# save previous values
|
41
|
+
previous_values = {}
|
42
|
+
for name in env_vars:
|
43
|
+
previous_values[name] = os.environ.get(name)
|
44
|
+
|
45
|
+
# set new values
|
46
|
+
for name, value in env_vars.items():
|
47
|
+
os.environ[name] = value
|
48
|
+
|
49
|
+
try:
|
50
|
+
yield
|
51
|
+
finally:
|
52
|
+
# Restore previous environment
|
53
|
+
for name in env_vars:
|
54
|
+
previous_value = previous_values[name]
|
55
|
+
if previous_value is None:
|
56
|
+
os.environ.pop(name, None)
|
57
|
+
else:
|
58
|
+
os.environ[name] = previous_value
|
inspect_ai/_util/file.py
CHANGED
@@ -271,8 +271,15 @@ class FileSystem:
|
|
271
271
|
if "mtime" not in file.keys() and file["type"] == "file":
|
272
272
|
file["mtime"] = self.fs.created(file).timestamp()
|
273
273
|
|
274
|
+
# adjust mtime to be milliseconds
|
274
275
|
if "mtime" in file.keys():
|
275
|
-
|
276
|
+
mtime = file["mtime"]
|
277
|
+
if isinstance(mtime, datetime.datetime):
|
278
|
+
file["mtime"] = mtime.timestamp() * 1000
|
279
|
+
elif isinstance(mtime, int | float):
|
280
|
+
file["mtime"] = mtime * 1000
|
281
|
+
else:
|
282
|
+
raise ValueError(f"Unexpected type for mtime ({type(mtime)}): {mtime}")
|
276
283
|
else:
|
277
284
|
file["mtime"] = None
|
278
285
|
|
inspect_ai/_util/httpx.py
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
import logging
|
2
2
|
from typing import Callable
|
3
3
|
|
4
|
-
|
4
|
+
import httpcore
|
5
|
+
import httpx
|
6
|
+
from httpx import HTTPStatusError
|
5
7
|
from tenacity import RetryCallState
|
6
8
|
|
7
9
|
from inspect_ai._util.constants import HTTP
|
10
|
+
from inspect_ai._util.http import is_retryable_http_status
|
8
11
|
|
9
12
|
logger = logging.getLogger(__name__)
|
10
13
|
|
@@ -20,25 +23,10 @@ def httpx_should_retry(ex: BaseException) -> bool:
|
|
20
23
|
Returns:
|
21
24
|
True if a retry should occur
|
22
25
|
"""
|
23
|
-
# httpx status exception
|
24
26
|
if isinstance(ex, HTTPStatusError):
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
# lock timeout
|
29
|
-
elif ex.response.status_code == 409:
|
30
|
-
return True
|
31
|
-
# rate limit
|
32
|
-
elif ex.response.status_code == 429:
|
33
|
-
return True
|
34
|
-
# internal errors
|
35
|
-
elif ex.response.status_code >= 500:
|
36
|
-
return True
|
37
|
-
else:
|
38
|
-
return False
|
39
|
-
|
40
|
-
# connection error
|
41
|
-
elif is_httpx_connection_error(ex):
|
27
|
+
return is_retryable_http_status(ex.response.status_code)
|
28
|
+
|
29
|
+
elif httpx_should_retry_no_status_code(ex):
|
42
30
|
return True
|
43
31
|
|
44
32
|
# don't retry
|
@@ -50,11 +38,106 @@ def log_httpx_retry_attempt(context: str) -> Callable[[RetryCallState], None]:
|
|
50
38
|
def log_attempt(retry_state: RetryCallState) -> None:
|
51
39
|
logger.log(
|
52
40
|
HTTP,
|
53
|
-
f"{context} connection retry {retry_state.attempt_number}
|
41
|
+
f"{context} connection retry {retry_state.attempt_number} (retrying in {retry_state.upcoming_sleep:,.0f} seconds)",
|
54
42
|
)
|
55
43
|
|
56
44
|
return log_attempt
|
57
45
|
|
58
46
|
|
59
|
-
def
|
60
|
-
|
47
|
+
def httpx_should_retry_no_status_code(ex: BaseException) -> bool:
|
48
|
+
"""
|
49
|
+
Check whether an exception (without an HTTP status code) should be retried.
|
50
|
+
|
51
|
+
To understand this function, it may be helpful to look at the exception hierarchies for
|
52
|
+
httpx and httpcore, which are reproduced below.
|
53
|
+
|
54
|
+
|
55
|
+
# HTTPX Exception Hierarchy
|
56
|
+
Exception (Python built-in)
|
57
|
+
|
|
58
|
+
+-- HTTPError
|
59
|
+
| |
|
60
|
+
| +-- RequestError
|
61
|
+
| | |
|
62
|
+
| | +-- TransportError
|
63
|
+
| | | |
|
64
|
+
| | | +-- TimeoutException
|
65
|
+
| | | | |
|
66
|
+
| | | | +-- ConnectTimeout
|
67
|
+
| | | | +-- ReadTimeout
|
68
|
+
| | | | +-- WriteTimeout
|
69
|
+
| | | | +-- PoolTimeout
|
70
|
+
| | | |
|
71
|
+
| | | +-- NetworkError
|
72
|
+
| | | | |
|
73
|
+
| | | | +-- ConnectError
|
74
|
+
| | | | +-- ReadError
|
75
|
+
| | | | +-- WriteError
|
76
|
+
| | | | +-- CloseError
|
77
|
+
| | | |
|
78
|
+
| | | +-- ProtocolError
|
79
|
+
| | | | |
|
80
|
+
| | | | +-- LocalProtocolError
|
81
|
+
| | | | +-- RemoteProtocolError
|
82
|
+
| | | |
|
83
|
+
| | | +-- ProxyError
|
84
|
+
| | | +-- UnsupportedProtocol
|
85
|
+
| | |
|
86
|
+
| | +-- DecodingError
|
87
|
+
| | +-- TooManyRedirects
|
88
|
+
| |
|
89
|
+
| +-- HTTPStatusError
|
90
|
+
|
|
91
|
+
+-- InvalidURL
|
92
|
+
+-- CookieConflict
|
93
|
+
+-- RuntimeError (Python built-in)
|
94
|
+
|
|
95
|
+
+-- StreamError
|
96
|
+
|
|
97
|
+
+-- StreamConsumed
|
98
|
+
+-- StreamClosed
|
99
|
+
+-- ResponseNotRead
|
100
|
+
+-- RequestNotRead
|
101
|
+
|
102
|
+
|
103
|
+
# HTTPCore Exception Hierarchy
|
104
|
+
Exception (Python built-in)
|
105
|
+
|
|
106
|
+
+-- ConnectionNotAvailable
|
107
|
+
+-- ProxyError
|
108
|
+
+-- UnsupportedProtocol
|
109
|
+
+-- ProtocolError
|
110
|
+
| |
|
111
|
+
| +-- RemoteProtocolError
|
112
|
+
| +-- LocalProtocolError
|
113
|
+
|
|
114
|
+
+-- TimeoutException
|
115
|
+
| |
|
116
|
+
| +-- PoolTimeout
|
117
|
+
| +-- ConnectTimeout
|
118
|
+
| +-- ReadTimeout
|
119
|
+
| +-- WriteTimeout
|
120
|
+
|
|
121
|
+
+-- NetworkError
|
122
|
+
|
|
123
|
+
+-- ConnectError
|
124
|
+
+-- ReadError
|
125
|
+
+-- WriteError
|
126
|
+
"""
|
127
|
+
# Base class for all exceptions that occur at the level of the Transport API.
|
128
|
+
is_transport_error = isinstance(ex, httpx.TransportError)
|
129
|
+
|
130
|
+
# Sometimes exceptions are raised directly by httpcore, the lower-level library that httpx uses
|
131
|
+
is_httpcore_network_error = isinstance(ex, httpcore.NetworkError)
|
132
|
+
is_httpcore_timeout_error = isinstance(ex, httpcore.TimeoutException)
|
133
|
+
is_httpcore_protocol_error = isinstance(ex, httpcore.ProtocolError)
|
134
|
+
|
135
|
+
# extensible in case we notice other cases
|
136
|
+
return any(
|
137
|
+
[
|
138
|
+
is_transport_error,
|
139
|
+
is_httpcore_network_error,
|
140
|
+
is_httpcore_timeout_error,
|
141
|
+
is_httpcore_protocol_error,
|
142
|
+
]
|
143
|
+
)
|
inspect_ai/_util/registry.py
CHANGED
@@ -1,6 +1,17 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import inspect
|
2
4
|
from inspect import get_annotations, isclass
|
3
|
-
from typing import
|
5
|
+
from typing import (
|
6
|
+
TYPE_CHECKING,
|
7
|
+
Any,
|
8
|
+
Callable,
|
9
|
+
Literal,
|
10
|
+
TypedDict,
|
11
|
+
TypeGuard,
|
12
|
+
cast,
|
13
|
+
overload,
|
14
|
+
)
|
4
15
|
|
5
16
|
from pydantic import BaseModel, Field
|
6
17
|
from pydantic_core import to_jsonable_python
|
@@ -11,19 +22,30 @@ from inspect_ai._util.package import get_installed_package_name
|
|
11
22
|
from .constants import PKG_NAME
|
12
23
|
from .entrypoints import ensure_entry_points
|
13
24
|
|
25
|
+
if TYPE_CHECKING:
|
26
|
+
from inspect_ai import Task
|
27
|
+
from inspect_ai.agent import Agent
|
28
|
+
from inspect_ai.approval import Approver
|
29
|
+
from inspect_ai.model import ModelAPI
|
30
|
+
from inspect_ai.scorer import Metric, Scorer, ScoreReducer
|
31
|
+
from inspect_ai.solver import Plan, Solver
|
32
|
+
from inspect_ai.tool import Tool
|
33
|
+
from inspect_ai.util import SandboxEnvironment
|
34
|
+
|
14
35
|
obj_type = type
|
15
36
|
|
16
37
|
RegistryType = Literal[
|
17
|
-
"task",
|
18
|
-
"solver",
|
19
38
|
"agent",
|
20
|
-
"
|
21
|
-
"scorer",
|
39
|
+
"approver",
|
22
40
|
"metric",
|
23
|
-
"score_reducer",
|
24
41
|
"modelapi",
|
42
|
+
"plan",
|
25
43
|
"sandboxenv",
|
26
|
-
"
|
44
|
+
"score_reducer",
|
45
|
+
"scorer",
|
46
|
+
"solver",
|
47
|
+
"task",
|
48
|
+
"tool",
|
27
49
|
]
|
28
50
|
"""Enumeration of registry object types.
|
29
51
|
|
@@ -184,7 +206,59 @@ def registry_find(predicate: Callable[[RegistryInfo], bool]) -> list[object]:
|
|
184
206
|
return o
|
185
207
|
|
186
208
|
|
187
|
-
|
209
|
+
@overload
|
210
|
+
def registry_create(type: Literal["agent"], name: str, **kwargs: Any) -> Agent: ...
|
211
|
+
|
212
|
+
|
213
|
+
@overload
|
214
|
+
def registry_create(
|
215
|
+
type: Literal["approver"], name: str, **kwargs: Any
|
216
|
+
) -> Approver: ...
|
217
|
+
|
218
|
+
|
219
|
+
@overload
|
220
|
+
def registry_create(type: Literal["metric"], name: str, **kwargs: Any) -> Metric: ...
|
221
|
+
|
222
|
+
|
223
|
+
@overload
|
224
|
+
def registry_create(
|
225
|
+
type: Literal["modelapi"], name: str, **kwargs: Any
|
226
|
+
) -> ModelAPI: ...
|
227
|
+
|
228
|
+
|
229
|
+
@overload
|
230
|
+
def registry_create(type: Literal["plan"], name: str, **kwargs: Any) -> Plan: ...
|
231
|
+
|
232
|
+
|
233
|
+
@overload
|
234
|
+
def registry_create(
|
235
|
+
type: Literal["sandboxenv"], name: str, **kwargs: Any
|
236
|
+
) -> SandboxEnvironment: ...
|
237
|
+
|
238
|
+
|
239
|
+
@overload
|
240
|
+
def registry_create(type: Literal["scorer"], name: str, **kwargs: Any) -> Scorer: ...
|
241
|
+
|
242
|
+
|
243
|
+
@overload
|
244
|
+
def registry_create(
|
245
|
+
type: Literal["score_reducer"], name: str, **kwargs: Any
|
246
|
+
) -> ScoreReducer: ...
|
247
|
+
|
248
|
+
|
249
|
+
@overload
|
250
|
+
def registry_create(type: Literal["solver"], name: str, **kwargs: Any) -> Solver: ...
|
251
|
+
|
252
|
+
|
253
|
+
@overload
|
254
|
+
def registry_create(type: Literal["task"], name: str, **kwargs: Any) -> Task: ...
|
255
|
+
|
256
|
+
|
257
|
+
@overload
|
258
|
+
def registry_create(type: Literal["tool"], name: str, **kwargs: Any) -> Tool: ...
|
259
|
+
|
260
|
+
|
261
|
+
def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object: # type: ignore[return]
|
188
262
|
r"""Create a registry object.
|
189
263
|
|
190
264
|
Creates objects registered via decorator (e.g. `@task`, `@solver`). Note
|
@@ -230,7 +304,7 @@ def registry_create(type: RegistryType, name: str, **kwargs: Any) -> object:
|
|
230
304
|
if isclass(obj):
|
231
305
|
return with_registry_info(obj(**kwargs))
|
232
306
|
elif callable(obj):
|
233
|
-
return_type = get_annotations(obj).get("return")
|
307
|
+
return_type = get_annotations(obj, eval_str=True).get("return")
|
234
308
|
# Until we remove the MetricDeprecated symbol we need this extra
|
235
309
|
# bit to map the Metric union back to Metric
|
236
310
|
if "_metric.Metric" in str(return_type):
|