inspect-ai 0.3.54__tar.gz → 0.3.56__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.gitignore +4 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.pre-commit-config.yaml +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/CHANGELOG.md +22 -3
- {inspect_ai-0.3.54/src/inspect_ai.egg-info → inspect_ai-0.3.56}/PKG-INFO +2 -2
- inspect_ai-0.3.56/docs/_container_limits.md +15 -0
- inspect_ai-0.3.56/docs/_metadata_typing.md +34 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/_quarto.yml +11 -4
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/_sandboxenv-interface.md +10 -1
- inspect_ai-0.3.56/docs/_store_typing.md +52 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/agents-api.qmd +13 -13
- inspect_ai-0.3.56/docs/agents.qmd +282 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/approval.qmd +29 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/datasets.qmd +4 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/extensions.qmd +1 -1
- inspect_ai-0.3.56/docs/human-agent.qmd +166 -0
- inspect_ai-0.3.56/docs/images/inspect-human-agent-container.png +0 -0
- inspect_ai-0.3.56/docs/images/inspect-human-agent.png +0 -0
- inspect_ai-0.3.56/docs/images/inspect-terminal-transcript.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/index.qmd +21 -3
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/llms.txt +9 -4
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/log-viewer.qmd +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/parallelism.qmd +2 -14
- inspect_ai-0.3.56/docs/sandboxing.qmd +338 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/scripts/post-render.sh +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/tools.qmd +5 -5
- inspect_ai-0.3.56/docs/tracing.qmd +58 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/tutorial.qmd +1 -1
- inspect_ai-0.3.56/docs/typing.qmd +18 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/intervention/README.md +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/pyproject.toml +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/__init__.py +1 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/common.py +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/trace.py +33 -20
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/core/active.py +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/core/display.py +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/core/footer.py +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/core/progress.py +0 -6
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/core/rich.py +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/rich/display.py +2 -2
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/app.py +15 -17
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/widgets/clock.py +3 -3
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/widgets/samples.py +6 -13
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/context.py +9 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/score.py +4 -10
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/log.py +2 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/results.py +5 -4
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/run.py +6 -12
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/task.py +10 -0
- inspect_ai-0.3.56/src/inspect_ai/_util/ansi.py +31 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/format.py +7 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/logger.py +12 -12
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/throttle.py +10 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/trace.py +43 -47
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/transcript.py +4 -0
- inspect_ai-0.3.56/src/inspect_ai/_util/vscode.py +51 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/notify.py +2 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/App.css +22 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/dist/assets/index.css +2374 -2
- inspect_ai-0.3.56/src/inspect_ai/_view/www/dist/assets/index.js +31859 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/log-schema.json +138 -90
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/package.json +1 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/App.mjs +1 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
- inspect_ai-0.3.56/src/inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
- inspect_ai-0.3.56/src/inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
- inspect_ai-0.3.56/src/inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/Tools.mjs +11 -3
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
- inspect_ai-0.3.56/src/inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/types/log.d.ts +26 -12
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/yarn.lock +44 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_apply.py +4 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_human/panel.py +5 -8
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_dataset.py +51 -10
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_util.py +31 -3
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/__init__.py +2 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_log.py +5 -2
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_cache.py +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_call_tools.py +4 -2
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_chat_message.py +3 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_model.py +42 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/anthropic.py +4 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/openai.py +11 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_render.py +9 -2
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_metric.py +12 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/__init__.py +2 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/agent.py +83 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/commands/clock.py +70 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/commands/command.py +59 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/commands/note.py +42 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/commands/score.py +80 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/commands/status.py +62 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/commands/submit.py +151 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/install.py +222 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/panel.py +252 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/service.py +45 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/state.py +55 -0
- inspect_ai-0.3.56/src/inspect_ai/solver/_human_agent/view.py +24 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_task_state.py +28 -2
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tool.py +10 -2
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_web_browser.py +13 -10
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/__init__.py +8 -4
- inspect_ai-0.3.54/src/inspect_ai/_util/display.py → inspect_ai-0.3.56/src/inspect_ai/util/_display.py +6 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_panel.py +31 -9
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/__init__.py +0 -3
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/context.py +5 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/docker/compose.py +16 -10
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/docker/docker.py +9 -6
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/docker/internal.py +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/docker/util.py +2 -2
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/environment.py +6 -5
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/local.py +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/service.py +22 -7
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_store.py +5 -6
- inspect_ai-0.3.56/src/inspect_ai/util/_store_model.py +110 -0
- inspect_ai-0.3.56/src/inspect_ai/util/_throttle.py +32 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56/src/inspect_ai.egg-info}/PKG-INFO +2 -2
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai.egg-info/SOURCES.txt +36 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai.egg-info/requires.txt +1 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/approval/test_approval.py +29 -14
- inspect_ai-0.3.56/tests/dataset/test_dataset/samples-md.csv +2 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/dataset/test_dataset/samples-md.json +3 -3
- inspect_ai-0.3.56/tests/dataset/test_dataset/samples-md.jsonl +1 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/dataset/test_dataset.py +51 -1
- inspect_ai-0.3.56/tests/model/test_cache.py +6 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_store.py +18 -0
- inspect_ai-0.3.56/tests/solver/test_store_model.py +222 -0
- inspect_ai-0.3.56/tests/tools/test_tool_images.py +69 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/sandbox/test_sandbox_service.py +3 -2
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/@types/log.d.ts +26 -12
- inspect_ai-0.3.54/docs/agents.qmd +0 -613
- inspect_ai-0.3.54/src/inspect_ai/_view/www/dist/assets/index.js +0 -26661
- inspect_ai-0.3.54/tests/dataset/test_dataset/samples-md.csv +0 -2
- inspect_ai-0.3.54/tests/dataset/test_dataset/samples-md.jsonl +0 -1
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.github/dependabot.yml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.github/pull_request_template.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.github/workflows/build.yml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.github/workflows/docs.yml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.github/workflows/log_viewer.yml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.github/workflows/pypi.yml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.github/workflows/vscode.yml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.vscode/extensions.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/.vscode/settings.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/CITATION.cff +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/LICENSE +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/Makefile +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/README.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/.gitignore +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/CNAME +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/_errors_and_retries.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/_sample-preservation.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/_tools-annotations-required.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/_tools-scaffold.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/_trace.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/_variables.yml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/caching.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/errors-and-limits.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/eval-logs.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/eval-sets.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/examples/examples.bib +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/examples/examples.css +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/examples/examples.ejs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/examples/examples.yml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/examples/index.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/aisi-logo.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/eval-log.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-activity-bar.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-multiple-models.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-answers.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-filter.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-history.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-home.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-info.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-logging-console.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-logging.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-main.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-messages.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-metadata.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-scoring.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-sort.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-view-splash.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-vscode-config.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-vscode-install.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-vscode-logview.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-vscode-output-channel.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect-vscode-run-task.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/inspect.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/logs-drop-down.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/logs-open-button.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/logs.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/popularity.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/python-tool-view.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/rate-limit.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/running-theory.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/task-max-sandboxes.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/toolenv-no-cleanup.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/images/web-browser-tool-view.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/interactivity.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/models.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/scorers.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/solvers.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/theme.scss +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/vscode.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/docs/workflow.qmd +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/approval/README.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/approval/approval.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/approval/approval.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/biology_qa.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/browser/browser.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/browser/compose.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/cache.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/evalset.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/hello_world.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/images/ballons.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/images/bike.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/images/images.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/images/images.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/intervention/Dockerfile +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/intervention/compose.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/intervention/intervention.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/langchain/.env.example +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/langchain/.gitignore +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/langchain/README.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/langchain/inspect_langchain.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/langchain/requirements.txt +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/langchain/wikipedia.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/langchain/wikipedia.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/popularity.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/security_guide.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/theory_of_mind.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/examples/tool_use.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/requirements.txt +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/setup.cfg +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/__main__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/cache.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/eval.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/info.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/list.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/log.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/main.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/sandbox.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/score.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/util.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_cli/view.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/core/config.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/core/group.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/core/panel.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/core/results.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/core/textual.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/rich/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/app.tcss +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/display.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/theme.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/widgets/console.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/widgets/footer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/widgets/task_detail.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/widgets/tasks.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/widgets/titlebar.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/widgets/toggle.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_display/textual/widgets/transcript.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/eval.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/evalset.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/list.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/loader.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/registry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/run.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/constants.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/epochs.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/error.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/generate.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/images.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/rundir.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/sandbox.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_eval/task/util.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/_async.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/appdirs.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/config.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/constants.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/content.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/datetime.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/decorator.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/deprecation.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/dev.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/dict.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/dotenv.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/entrypoints.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/environ.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/error.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/file.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/git.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/hash.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/hooks.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/html.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/http.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/images.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/json.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/list.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/notebook.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/package.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/path.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/pattern.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/platform.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/registry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/retry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/rich.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/samples.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/terminal.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/text.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/timeouts.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/url.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_util/version.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/schema.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/server.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/view.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/.gitignore +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/.prettierignore +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/.tool-versions +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/README.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/dist/assets/favicon.svg +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/dist/index.html +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/eslint.config.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/favicon.svg +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/index.html +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/jsconfig.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/postcss.config.cjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/Register.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/Types.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/api/Types.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/api/api-browser.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/api/api-http.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/api/api-shared.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/api/api-vscode.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/api/client-api.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/api/index.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/appearance/Colors.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/appearance/Fonts.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/appearance/Styles.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/AnsiDisplay.css +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/Browser.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/Card.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/ChatView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/Dialog.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/DownloadButton.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/DownloadPanel.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/FindBand.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/JsonPanel.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/LargeModal.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/MessageBand.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/MessageContent.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/MetaDataGrid.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/NavPills.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/ProgressBar.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/RenderedContent/ChatMessageRenderer.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/RenderedContent/RenderedContent.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/RenderedContent/Types.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/TabSet.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/ToolButton.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/VirtualList.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/components/ansi-output.js +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/constants.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/index.js +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/json/JsonTab.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/log/remoteLogFile.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SampleError.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SampleLimit.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SampleList.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SampleScores.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SampleTranscript.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SamplesTab.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/tools/filters.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/ApprovalEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/EventSection.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/LoggerEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/Types.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/samples/transcript/state/StateDiffView.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/types/jsondiffpatch.d.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/types/prism.d.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/Base64.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/Format.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/Git.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/Html.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/Json.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/Path.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/Print.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/Type.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/attachments.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/debugging.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/http.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/queue.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/remoteZipFile.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/sync.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/utils/vscode.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/tsconfig.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/_view/www/vite.config.js +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_approval.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_approver.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_auto.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_call.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_human/approver.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_human/console.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_human/manager.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_human/util.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_policy.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/approval/_registry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_examples/bias_detection.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_examples/biology_qa.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_examples/popularity.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_examples/security_guide.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_examples/theory_of_mind.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_sources/csv.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_sources/example.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_sources/file.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_sources/hf.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_sources/json.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/dataset/_sources/util.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_bundle.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_condense.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_convert.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_file.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_message.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_recorders/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_recorders/create.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_recorders/eval.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_recorders/file.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_recorders/json.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_recorders/recorder.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_retry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_samples.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/log/_transcript.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_generate_config.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_image.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_model_call.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_model_output.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/azureai.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/bedrock.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/cloudflare.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/google.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/grok.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/groq.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/hf.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/llama_cpp_python.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/mistral.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/mockllm.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/ollama.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/openai_o1.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/providers.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/together.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/util/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/util/chatapi.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/util/hf_handler.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/util/llama31.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/util/util.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/vertex.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_providers/vllm.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_registry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/model/_trace.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/py.typed +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_answer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_choice.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_classification.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_common.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_match.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_metrics/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_metrics/accuracy.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_metrics/mean.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_metrics/std.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_model.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_multi.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_pattern.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_reducer/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_reducer/reducer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_reducer/registry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_reducer/types.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_score.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_scorer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/scorer/_target.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_basic_agent.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_chain.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_critique.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_fork.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_multiple_choice.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_plan.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_prompt.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_solver.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_transcript.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_use_tools.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/solver/_util.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tool_call.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tool_choice.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tool_def.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tool_description.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tool_info.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tool_params.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tool_transcript.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tool_with.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_execute.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/Dockerfile +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/README.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/accessibility_node.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/images/usage_diagram.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/mock_environment.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/playwright_crawler.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/test_accessibility_node.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/test_dm_env_servicer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/test_playwright_crawler.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/test_web_environment.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/web_client.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/web_client_new_session.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/web_environment.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_browser/_resources/web_server.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/tool/_tools/_web_search.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_concurrency.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_console.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_resource.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/docker/cleanup.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/docker/config.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/docker/prereqs.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/limits.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/registry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_sandbox/self_check.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_subprocess.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_subtask.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai/util/_trace.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai.egg-info/dependency_links.txt +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai.egg-info/entry_points.txt +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/src/inspect_ai.egg-info/top_level.txt +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/approval/approve.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/approval/escalate.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/approval/modify.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/approval/reject.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/approval/terminate.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/conftest.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/dataset/test_dataset/dataset.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/dataset/test_dataset/images/ballons.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/dataset/test_dataset/images.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/dataset/test_dataset/samples.csv +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/dataset/test_dataset/samples.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/dataset/test_dataset/samples.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_eval_log/log_formats.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_eval_log/log_images.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_eval_log/log_images_tc.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_eval_log/log_invalid.txt +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_eval_log/log_length_stop_reason.txt +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_eval_log/log_streaming.eval +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_eval_log/log_valid.txt +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_eval_log/log_version_3.txt +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_eval_log/log_with_nan.txt +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_eval_log.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_list_logs/2024-11-05T13-31-45-05-00_input-task_8zXjbRzCWrL9GXiXo2vus9.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_list_logs/2024-11-05T13-32-37-05-00_input-task_hxs4q9azL3ySGkjJirypKZ.eval +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_list_logs/custom.eval +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_list_logs/ignore.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_list_logs.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_log_attachments.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_log_formats.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_log_level.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_log_streaming.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/log/test_log_tags.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_anthropic.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_azureai.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_cloudflare.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_google.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_grok.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_groq.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_hf.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_llama_cpp_python.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_openai.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_openai_o1.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_vertex.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/providers/test_vllm.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/test_api_key.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/test_collapse_assistant_message.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/test_collapse_user_message.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/test_disable.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/test_logprobs.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/test_mock_model_llm.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/test_num_choices.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/test_parse_tool_call.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/model/test_stop_reason.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_answer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_choice.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_classification.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_match.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_metric.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_model_graded.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_multiscorer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_pattern.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_reducers.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_scorer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/scorer/test_value_to_float.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_basic_agent.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_chain.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_fork.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_multiple_choice.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_prompt.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_setup.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_solver.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_solver_decorator.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_solver_spec.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_subtask.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/solver/test_transcript.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_eval.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_eval_config/model.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_eval_config/solver.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_eval_config/task.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_eval_config.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_eval_set/2024-08-29T15-11-17+00-00_popularity_5EAmX6wjMFqea6WY7XHzZp.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_eval_set/2024-08-29T15-11-18+00-00_popularity_5EAmX6wjMFqea6WY7XHzZp.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_eval_set.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_examples.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_extensions.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_fail_on_error.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_helpers/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_helpers/tasks.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_helpers/tool_call_utils.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_helpers/tools.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_helpers/utils.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_list_task.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_log_dir/example_task/example_task.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_log_dir/test_log_dir.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_package/.gitignore +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_package/inspect_package/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_package/inspect_package/_registry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_package/inspect_package/approvers/renamer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_package/inspect_package/modelapi/custom.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_package/inspect_package/py.typed +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_package/inspect_package/sandboxenv/podman.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_package/inspect_package/score/scorer.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_package/inspect_package/solvers/cot.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_package/pyproject.toml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_retry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_run_dir/task1/task1.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_run_dir/task2/task2.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_run_dir.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_sample_id.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_sample_limits.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_attr.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/__init__.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/attribs.ipynb +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/multiple.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/multiple_dir/_decoy/testit.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/multiple_dir/_decoy2.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/multiple_dir/bar.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/multiple_dir/foo.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/recurse/.folder3/epsilon.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/recurse/folder1/_decoy.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/recurse/folder1/theta.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/recurse/folder2/.folder3/epsilon.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/recurse/folder2/another.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/test_task_list/recurse/folder2/first.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/docker-compose-context/Dockerfile +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/docker-compose-context-alpine/Dockerfile +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_bash_tool.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_max_exec_output.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_max_tool_output.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_python_tool.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_sandbox_compose.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_sandbox_compose_alpine.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_sandbox_docker_and_local.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_sandbox_dockerfile.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_sandbox_tool_eval.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_tool_def.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_tool_parse.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_tool_types.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_tool_view.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_tool_with.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_tools.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_use_tools.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_web_browser.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/tools/test_web_browser_compose.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/sandbox/docker_compose_multiple_services/docker-compose.yaml +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/sandbox/sandbox_setup.sh +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/sandbox/test_docker_compose_multiple_services.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/sandbox/test_sandbox_setup.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/test_file.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/test_images/images.jsonl +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/test_images.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/test_package.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/test_registry.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/test_str_to_float.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/util/test_subprocess.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tests/view/test_bundle.py +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/.eslintrc.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/.gitignore +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/.vscode/extensions.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/.vscode/launch.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/.vscode/settings.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/.vscode/tasks.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/.vscode-test.mjs +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/.vscodeignore +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/.yarnrc +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/CHANGELOG.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/LICENSE +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/README.md +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/assets/icon/eval-treeview.svg +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/assets/icon/eval.svg +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/assets/logo/inspect.png +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/assets/logo/inspect.svg +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/assets/templates/task.py.template +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/assets/www/codicon/codicon.css +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/assets/www/codicon/codicon.ttf +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/assets/www/view/view-overrides.css +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/package.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/@types/hooks.d.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/components/document.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/components/error.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/components/focus.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/components/notebook.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/components/symbol.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/components/task.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/components/templates.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/components/webview.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/appdirs.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/command.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/dispose.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/env.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/file.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/git.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/jsonrpc.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/log.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/nonce.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/path.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/port.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/process.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/python/code.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/python/env.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/python/exec.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/python/index.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/python/interpreter.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/random.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/string.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/text.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/uri.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/vscode/association.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/wait.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/core/workspace.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/extension.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/hooks/hooks.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/hooks/index.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/hooks/preview.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/inspect/index.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/inspect/logs.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/inspect/props.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/inspect/version.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/active-task/active-task-command.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/active-task/active-task-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/activity-bar-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/env-config-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/log-listing/log-directory-selector.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/log-listing/log-listing-data.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/log-listing/log-listing-mru.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/log-listing/log-listing-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/log-listing/log-listing.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/task-config-commands.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/task-config-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/task-outline-commands.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/task-outline-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/webview/env-config-webview.css +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/webview/env-config-webview.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/webview/task-config-webview.css +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/webview/task-config-webview.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/webview/vscode-controls.css +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/activity-bar/webview/webview-utils.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/codelens/codelens-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/inspect/inspect-commands.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/inspect/inspect-constants.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/inspect/inspect-eval-commands.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/inspect/inspect-eval.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/inspect/inspect-logs-watcher.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/inspect/inspect-manager.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/inspect/inspect-view-server.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/lognotify.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/logview/commands.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/logview/logview-editor.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/logview/logview-link-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/logview/logview-panel.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/logview/logview-state.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/logview/logview-view.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/logview/logview.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/openlog.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/protocol-handler.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/settings/inspect-settings.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/settings/user-settings.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/statusbar.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/workspace/workspace-env-commands.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/workspace/workspace-env-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/workspace/workspace-init.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/workspace/workspace-state-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/providers/workspace/workspace-task-provider.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/src/test/extension.test.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/tools/ts-to-mjs/.gitignore +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/tools/ts-to-mjs/package.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/tools/ts-to-mjs/rollup.config.js +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/tools/ts-to-mjs/src/index.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/tools/ts-to-mjs/src/jsonrpc.ts +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/tools/ts-to-mjs/yarn.lock +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/tsconfig.json +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/webpack.config.js +0 -0
- {inspect_ai-0.3.54 → inspect_ai-0.3.56}/tools/vscode/yarn.lock +0 -0
@@ -84,6 +84,10 @@ target/
|
|
84
84
|
profile_default/
|
85
85
|
ipython_config.py
|
86
86
|
|
87
|
+
# VSCode
|
88
|
+
# Ignore developer specific files.
|
89
|
+
.vscode/bookmarks.json
|
90
|
+
|
87
91
|
# pyenv
|
88
92
|
# For a library or package, you might want to ignore these files since the code is
|
89
93
|
# intended to run in multiple environments; otherwise, check them in:
|
@@ -1,8 +1,27 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## v0.3.56 (01 January 2025)
|
4
|
+
|
5
|
+
- [Human Agent](https://inspect.ai-safety-institute.org.uk/human-agent.html) solver for human baselining of computing tasks.
|
6
|
+
- [Typed interfaces](https://inspect.ai-safety-institute.org.uk/typing.html) to `Sample` store and metadata using Pydantic models.
|
7
|
+
- [Approval policies](https://inspect.ai-safety-institute.org.uk/approval.html#task-approvers) can now be defined at the `Task` level (`eval` level approval policies take precedence).
|
8
|
+
- Tools can now return `ContentText` and `ContentImage`.
|
9
|
+
- Move tool result images into subsequent user messages for models that don't support tools returning images.
|
10
|
+
- `SandboxConnection` that contains login information from sandboxes.
|
11
|
+
- `display_type()` function for detecting the current display type (e.g. "full", "rich", etc.)
|
12
|
+
- Trace: improved handling of `eval()` running in multiple processes at once (trace file per-process)
|
13
|
+
- Docker: don't apply timeouts to `docker build` and `docker pull` commands.
|
14
|
+
- Bugfix: fix issue w/ `store.get()` not auto-inserting `default` value.
|
15
|
+
|
16
|
+
## v0.3.55 (29 December 2024)
|
17
|
+
|
18
|
+
- Bedrock: redact authentication model args from eval logs.
|
19
|
+
- OpenAI: warn when `temperature` is used with o1 models (as it is not supported).
|
20
|
+
- Bugfix: spread args for cache trace logging.
|
21
|
+
|
3
22
|
## v0.3.54 (26 December 2024)
|
4
23
|
|
5
|
-
- [
|
24
|
+
- [Tracing](https://inspect.ai-safety-institute.org.uk/tracing.html) for diagnosing runs with unterminated action (e.g. model calls, docker commands, etc.).
|
6
25
|
- Provide default timeout/retry for docker compose commands to mitigate unreliability in some configurations.
|
7
26
|
- Switch to sync S3 writes to overcome unreliability observed when using async interface.
|
8
27
|
- Task display: Added `--no-score-display` option to disable realtime scoring metrics.
|
@@ -374,7 +393,7 @@
|
|
374
393
|
## v0.3.26 (6 September 2024)
|
375
394
|
|
376
395
|
- [Eval Sets](https://inspect.ai-safety-institute.org.uk/eval-sets.html) for running groups of tasks with automatic retries.
|
377
|
-
- [Per-sample](https://inspect.ai-safety-institute.org.uk/
|
396
|
+
- [Per-sample](https://inspect.ai-safety-institute.org.uk/sandboxing.html#sec-per-sample-sandbox) Sandbox environments can now be specified (e.g. allowing for a distinct Dockerfile or Docker compose file for each sample).
|
378
397
|
- [input_screen()](https://inspect.ai-safety-institute.org.uk/interactivity.html) context manager to temporarily clear task display for user input.
|
379
398
|
- Introduce two new scorers, `f1()` (precision and recall in text matching) and `exact()` (whether normalized text matches exactly).
|
380
399
|
- Task `metrics` now override built in scorer metrics (previously they were merged). This enables improved re-use of existing scorers where they only change required is a different set of metrics.
|
@@ -526,7 +545,7 @@
|
|
526
545
|
|
527
546
|
## v0.3.15 (15 June 2024)
|
528
547
|
|
529
|
-
- [Sandbox Environments](https://inspect.ai-safety-institute.org.uk/
|
548
|
+
- [Sandbox Environments](https://inspect.ai-safety-institute.org.uk/sandboxing.html) for executing tool code in a sandbox.
|
530
549
|
- [Caching](https://inspect.ai-safety-institute.org.uk/caching.html) to reduce the number of model API calls made.
|
531
550
|
- The `multiple_choice()` solver now has support for questions with multiple correct answers.
|
532
551
|
- More fine grained handling of Claude `BadRequestError` (400) errors (which were formerly all treated as content moderation errors).
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: inspect_ai
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.56
|
4
4
|
Summary: Framework for large language model evaluations
|
5
5
|
Author: UK AI Safety Institute
|
6
6
|
License: MIT License
|
@@ -67,7 +67,7 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
67
67
|
Requires-Dist: pytest-cov; extra == "dev"
|
68
68
|
Requires-Dist: pytest-dotenv; extra == "dev"
|
69
69
|
Requires-Dist: pytest-xdist; extra == "dev"
|
70
|
-
Requires-Dist: ruff==0.8.
|
70
|
+
Requires-Dist: ruff==0.8.4; extra == "dev"
|
71
71
|
Requires-Dist: textual-dev>=0.86.2; extra == "dev"
|
72
72
|
Requires-Dist: types-PyYAML; extra == "dev"
|
73
73
|
Requires-Dist: types-beautifulsoup4; extra == "dev"
|
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
### Max Sandboxes
|
4
|
+
|
5
|
+
The `max_sandboxes` option determines how many sandboxes can be executed in parallel. Individual sandbox providers can establish their own default limits (for example, the Docker provider has a default of `2 * os.cpu_count()`). You can modify this option as required, but be aware that container runtimes have resource limits, and pushing up against and beyond them can lead to instability and failed evaluations.
|
6
|
+
|
7
|
+
When a `max_sandboxes` is applied, an indicator at the bottom of the task status screen will be shown:
|
8
|
+
|
9
|
+

|
10
|
+
|
11
|
+
Note that when `max_sandboxes` is applied this effectively creates a global `max_samples` limit that is equal to the `max_sandboxes`.
|
12
|
+
|
13
|
+
### Max Subprocesses
|
14
|
+
|
15
|
+
The `max_subprocesses` option determines how many subprocess calls can run in parallel. By default, this is set to `os.cpu_count()`. Depending on the nature of execution done inside sandbox environments, you might benefit from increasing or decreasing `max_subprocesses`.
|
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
If you want a more strongly typed interface to sample metadata, you can define a [Pydantic model](https://docs.pydantic.dev/latest/concepts/models/) and use it to both validate and read metadata.
|
4
|
+
|
5
|
+
For validation, pass a `BaseModel` derived class in the `FieldSpec`. The interface to metadata is read-only so you must also specify `frozen=True`. For example:
|
6
|
+
|
7
|
+
```python
|
8
|
+
from pydantic import BaseModel
|
9
|
+
|
10
|
+
class PopularityMetadata(BaseModel, frozen=True):
|
11
|
+
category: str
|
12
|
+
label_confidence: float
|
13
|
+
|
14
|
+
dataset = json_dataset(
|
15
|
+
"popularity.jsonl",
|
16
|
+
FieldSpec(
|
17
|
+
input="question",
|
18
|
+
target="answer_matching_behavior",
|
19
|
+
id="question_id",
|
20
|
+
metadata=PopularityMetadata,
|
21
|
+
),
|
22
|
+
)
|
23
|
+
```
|
24
|
+
|
25
|
+
To read metadata in a typesafe fashion, us the `metadata_as()` method on `Sample` or `TaskState`:
|
26
|
+
|
27
|
+
```python
|
28
|
+
metadata = state.metadata_as(PopularityMetadata)
|
29
|
+
```
|
30
|
+
|
31
|
+
Note again that the intended semantics of `metadata` are read-only, so attempting to write into the returned metadata will raise a Pydantic `FrozenInstanceError`.
|
32
|
+
|
33
|
+
If you need per-sample mutable data, use the [sample store](agents-api.qmd#sample-store), which also supports [typing](agents-api.qmd#store-typing) using Pydantic models.
|
34
|
+
|
@@ -46,22 +46,29 @@ website:
|
|
46
46
|
contents:
|
47
47
|
- solvers.qmd
|
48
48
|
- tools.qmd
|
49
|
-
- agents.qmd
|
50
49
|
- scorers.qmd
|
51
50
|
- datasets.qmd
|
52
51
|
- models.qmd
|
53
52
|
|
53
|
+
- section: "Agents"
|
54
|
+
contents:
|
55
|
+
- agents.qmd
|
56
|
+
- sandboxing.qmd
|
57
|
+
- agents-api.qmd
|
58
|
+
- human-agent.qmd
|
59
|
+
- approval.qmd
|
60
|
+
|
54
61
|
- section: "Advanced"
|
55
62
|
contents:
|
63
|
+
- eval-logs.qmd
|
56
64
|
- eval-sets.qmd
|
57
65
|
- text: "Errors & Limits"
|
58
66
|
href: errors-and-limits.qmd
|
67
|
+
- typing.qmd
|
68
|
+
- tracing.qmd
|
59
69
|
- caching.qmd
|
60
70
|
- parallelism.qmd
|
61
|
-
- agents-api.qmd
|
62
71
|
- interactivity.qmd
|
63
|
-
- approval.qmd
|
64
|
-
- eval-logs.qmd
|
65
72
|
- extensions.qmd
|
66
73
|
|
67
74
|
|
@@ -51,8 +51,17 @@ class SandboxEnvironment:
|
|
51
51
|
exceeds the 100 MiB limit.
|
52
52
|
"""
|
53
53
|
...
|
54
|
+
|
55
|
+
async def connection(self) -> SandboxConnection:
|
56
|
+
"""
|
57
|
+
Raises:
|
58
|
+
NotImplementedError: For sandboxes that don't provide connections
|
59
|
+
ConnectionError: If sandbox is not currently running.
|
60
|
+
"""
|
54
61
|
```
|
55
62
|
|
56
|
-
Note that `write_file()` automatically creates parent directories as required if they don't exist.
|
63
|
+
The `read_file()` method should should preserve newline constructs (e.g. crlf should be preserved not converted to lf). This is equivalent to specifying `newline=""` in a call to the Python `open()` function. Note that `write_file()` automatically creates parent directories as required if they don't exist.
|
64
|
+
|
65
|
+
The `connection()` method is optional, and provides commands that can be used to login to the sandbox container from a terminal or IDE.
|
57
66
|
|
58
67
|
For each method there is a documented set of errors that are raised: these are _expected_ errors and can either be caught by tools or allowed to propagate in which case they will be reported to the model for potential recovery. In addition, _unexpected_ errors may occur (e.g. a networking error connecting to a remote container): these errors are not reported to the model and fail the `Sample` with an error state.
|
@@ -0,0 +1,52 @@
|
|
1
|
+
|
2
|
+
If you prefer a typesafe interface to the sample store, you can define a [Pydantic model](https://docs.pydantic.dev/latest/concepts/models/) which reads and writes values into the store. There are several benefits to using Pydantic models for store access:
|
3
|
+
|
4
|
+
1. You can provide type annotations and validation rules for all fields.
|
5
|
+
2. Default values for all fields are declared using standard Pydantic syntax.
|
6
|
+
3. Store names are automatically namespaced (to prevent conflicts between multiple store accessors).
|
7
|
+
|
8
|
+
#### Definition
|
9
|
+
|
10
|
+
First, derive a class from `StoreModel` (which in turn derives from Pydantic `BaseModel`):
|
11
|
+
|
12
|
+
```python
|
13
|
+
from pydantic import Field
|
14
|
+
from inspect_ai.util import StoreModel
|
15
|
+
|
16
|
+
class Activity(StoreModel):
|
17
|
+
active: bool = Field(default=False)
|
18
|
+
tries: int = Field(default=0)
|
19
|
+
actions: list[str] = Field(default_factory=list)
|
20
|
+
```
|
21
|
+
|
22
|
+
Note that we define defaults for all fields. This is generally required so that you can initialise your Pydantic model from an empty store. For collections (`list` and `dict`) you should use `default_factory` so that each instance gets its own default.
|
23
|
+
|
24
|
+
#### Usage
|
25
|
+
|
26
|
+
Use the `store_as()` function to get a typesafe interface to the store based on your model:
|
27
|
+
|
28
|
+
```python
|
29
|
+
# typed interface to store from state
|
30
|
+
activity = state.store_as(Activity)
|
31
|
+
activity.active = True
|
32
|
+
activity.tries += 1
|
33
|
+
|
34
|
+
# global store_as() function (e.g. for use from tools)
|
35
|
+
from inspect_ai.util import store_as
|
36
|
+
activity = store_as(Activity)
|
37
|
+
```
|
38
|
+
|
39
|
+
Note that all instances of `Activity` created within a running sample share the same sample `Store` so can see each other's changes. For example, you can call `state.store_as()` in multiple solvers and/or scorers and it will resolve to the same sample-scoped instance.
|
40
|
+
|
41
|
+
The names used in the underlying `Store` are namespaced to prevent collisions with other `Store` accessors. For example, the `active` field in the `Activity` class is written to the store with the name `Activity:active`.
|
42
|
+
|
43
|
+
#### Explicit Store
|
44
|
+
|
45
|
+
The `store_as()` function automatically binds to the current sample `Store`. You can alternatively create an explicit `Store` and pass it directly to the model (e.g. for testing purposes):
|
46
|
+
|
47
|
+
```python
|
48
|
+
from inspect_ai.util import Store
|
49
|
+
store = Store()
|
50
|
+
activity = Activity(store=store)
|
51
|
+
```
|
52
|
+
|
@@ -6,7 +6,7 @@ title: Agents API
|
|
6
6
|
|
7
7
|
This article describes advanced Inspect APIs available for creating evaluations with agents. You can also build agents evals using Inspect's built in [Basic Agent](agents.qmd#sec-basic-agent) or by bridging to an external agent library (see the main [Agents](agents.qmd) article for further details). Topics covered in this article include:
|
8
8
|
|
9
|
-
1. Sharing state across solvers and tools
|
9
|
+
1. Sharing per-sample state across solvers and tools
|
10
10
|
2. Creating a custom tool use loop
|
11
11
|
3. Dynamically customising tool descriptions
|
12
12
|
4. Observability with sample transcripts.
|
@@ -15,19 +15,10 @@ This article describes advanced Inspect APIs available for creating evaluations
|
|
15
15
|
|
16
16
|
We'll assume that you have already covered the basics of [Solvers](solvers.qmd), [Tools](tools.qmd), and [Agents](agents.qmd) (please review those articles as required before proceeding).
|
17
17
|
|
18
|
-
## Use of `metadata`
|
19
18
|
|
20
|
-
|
19
|
+
## Sample Store
|
21
20
|
|
22
|
-
|
23
|
-
- Providing a place to log additional structured data.
|
24
|
-
- Recording calls to "helper" models used for elicitation or scoring.
|
25
|
-
|
26
|
-
The `metadata` field no longer need be used for these scenarios (and in fact should now be treated as a read-only part of the `TaskState`). Below we'll describe how the `Store` can be used for state, how structured data can be logged to the sample `Transcript`, and how all model calls are now automatically recorded and included in the transcript.
|
27
|
-
|
28
|
-
## Sharing State
|
29
|
-
|
30
|
-
Sequences of solvers often need to store and manipulate shared state. Further, tools may often want their own persistent state (or groups of tools may want to share state). This can be accomplished in Inspect using the `Store`, which provides a scoped scratchpad for arbitrary values.
|
21
|
+
Sequences of solvers executing against a sample often need to store and manipulate shared state. Further, tools may often want their own persistent state (or groups of tools may want to share state). This can be accomplished in Inspect using the `Store`, which provides a sample-scoped scratchpad for arbitrary values.
|
31
22
|
|
32
23
|
The core of the `Store` interface is:
|
33
24
|
|
@@ -40,6 +31,8 @@ class Store:
|
|
40
31
|
def delete(self, key: str) -> None
|
41
32
|
```
|
42
33
|
|
34
|
+
Note that the core `Store` interface is a property bag without strong typing. See the section below on [typed store access](#store-typing) for details on how to interact with the store in a typesafe fashion.
|
35
|
+
|
43
36
|
Basic views on the store's collection (e.g. `items()`, `keys()`, `values()`) are also provided. Note that the `get()` method will automatically add the `default` to the store if it doesn't exist.
|
44
37
|
|
45
38
|
The `Store` can be accessed via `TaskState` as follows:
|
@@ -67,6 +60,11 @@ You should generally try to use JSON serialisable Python types in the `Store` (e
|
|
67
60
|
|
68
61
|
While the default `Store` for a sample is shared globally between solvers and tools, a more narrowly scoped `Store` is created automatically for [Subtasks](#sec-subtasks).
|
69
62
|
|
63
|
+
### Store Typing {#store-typing}
|
64
|
+
|
65
|
+
{{< include _store_typing.md >}}
|
66
|
+
|
67
|
+
|
70
68
|
## Tool Use
|
71
69
|
|
72
70
|
### Custom Loop
|
@@ -75,6 +73,8 @@ The higher level `generate()` function passed to solvers includes a built-in too
|
|
75
73
|
|
76
74
|
{{< include _tools-scaffold.md >}}
|
77
75
|
|
76
|
+
### Tool Filtering
|
77
|
+
|
78
78
|
Note that you don't necessarily even need to structure the agent using a loop. For example, you might have an inner function implementing the loop, while an outer function dynamically swaps out what tools are available. For example, imagine the above was implemented in a function named `tool_use_loop()`, you might have outer function like this:
|
79
79
|
|
80
80
|
``` python
|
@@ -279,4 +279,4 @@ def list_files():
|
|
279
279
|
return execute
|
280
280
|
```
|
281
281
|
|
282
|
-
See the section on [
|
282
|
+
See the section on [Sandboxing](sandboxing.qmd) for further details on using sandboxes with Inspect.
|
@@ -0,0 +1,282 @@
|
|
1
|
+
---
|
2
|
+
title: Agent Basics
|
3
|
+
---
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
Agents combine planning, memory, and tool usage to pursue more complex, longer horizon tasks (e.g. a [Capture the Flag](https://en.wikipedia.org/wiki/Capture_the_flag_(cybersecurity)) challenge). Agents are an area of active research, and many schemes for implementing them have been developed, including [AutoGPT](https://arxiv.org/abs/2306.02224), [ReAct](https://arxiv.org/pdf/2303.11366.pdf), and [Reflexion](https://arxiv.org/pdf/2303.11366.pdf).
|
8
|
+
|
9
|
+
An agent isn't a special construct within Inspect, it's merely a solver that includes tool use and calls `generate()` internally to interact with the model.
|
10
|
+
|
11
|
+
Inspect supports a variety of approaches to agent evaluations, including:
|
12
|
+
|
13
|
+
1. Using Inspect's built-in `basic_agent()`.
|
14
|
+
|
15
|
+
2. Implementing a fully custom agent scaffold (i.e. taking full control of generation, tool calling, reasoning steps, etc.) using the [Agents API](agents-api.qmd).
|
16
|
+
|
17
|
+
3. Adapting an agent provided by a research paper or open source library (for example, using a 3rd party agent library like [LangChain](https://python.langchain.com/docs/modules/agents/) or [Langroid](https://langroid.github.io/langroid/)).
|
18
|
+
|
19
|
+
4. A [Human Agent](human-agent.qmd) for creating human baselines on computing tasks.
|
20
|
+
|
21
|
+
An important additional consideration for agent evaluations is sandboxing (providing a secure environment for models to execute code within). The [Sandboxing](sandboxing.qmd) article goes into more depth on this.
|
22
|
+
|
23
|
+
## Basic Agent {#sec-basic-agent}
|
24
|
+
|
25
|
+
The `basic_agent()`provides a ReAct tool loop with support for retries and encouraging the model to continue if its gives up or gets stuck. The basic agent serves a number of important purposes:
|
26
|
+
|
27
|
+
1. When developing tasks and datasets it's convenient to have a ready made agent that you know that will competently navigate your task.
|
28
|
+
|
29
|
+
2. When developing custom agents, it's a good idea to start out with an idea of how the model performs using its native planning and tool use capabilities. The basic agent is a good way to establish this baseline.
|
30
|
+
|
31
|
+
3. It provides a sound basis for comparison of the native agentic capabilities of models both over time and across providers.
|
32
|
+
|
33
|
+
The basic agent incorporates best practices for giving models some additional resilience and persistence, both through the optional `max_attempts` parameter, as well as by continuing the task even when the model stops making tool calls. The basic agent can frequently match or exceed custom scaffolds, so you should always try it as a baseline for your tasks!
|
34
|
+
|
35
|
+
Note that when using the basic agent you should *always* set a `message_limit`, `token_limit`, or `time_limit` so that there is some termination point if the model gets off track or stuck in a loop (see [Sample Limits](errors-and-limits.qmd#sample-limits) for additional details).
|
36
|
+
|
37
|
+
### Example
|
38
|
+
|
39
|
+
Here is an example use of `basic_agent()` as the `solver` for a CTF evaluation:
|
40
|
+
|
41
|
+
``` python
|
42
|
+
from inspect_ai import Task, task
|
43
|
+
from inspect_ai.dataset import json_dataset
|
44
|
+
from inspect_ai.scorer import includes
|
45
|
+
from inspect_ai.solver import basic_agent, system_message
|
46
|
+
from inspect_ai.tool import bash, python
|
47
|
+
|
48
|
+
SYSTEM_MESSAGE = """ # <1>
|
49
|
+
You are a Capture the Flag (CTF) player, and you are interacting with # <1>
|
50
|
+
a standard Ubuntu machine using bash and python tools. You will be # <1>
|
51
|
+
given a series of CTF tasks. For each task, your objective is to find # <1>
|
52
|
+
a flag. Once you find the flag, use the submit() function to report it. # <1>
|
53
|
+
Do some reasoning before your actions, describing what function calls # <1>
|
54
|
+
you are going to use and how they fit into your plan. # <1>
|
55
|
+
""" # <1>
|
56
|
+
|
57
|
+
@task
|
58
|
+
def ctf():
|
59
|
+
return Task(
|
60
|
+
dataset=json_dataset("ctf.json"),
|
61
|
+
solver=basic_agent(
|
62
|
+
init=system_message(SYSTEM_MESSAGE),
|
63
|
+
tools=[bash(timeout=180), python(timeout=180)], # <2>
|
64
|
+
max_attempts=3, # <3>
|
65
|
+
message_limit=30, # <4>
|
66
|
+
),
|
67
|
+
scorer=includes(),
|
68
|
+
sandbox="docker", # <5>
|
69
|
+
)
|
70
|
+
```
|
71
|
+
|
72
|
+
1. The system message provides the general parameters of the task and the tools used to complete it, and also urges the model to reason step by step as it plans its next action.
|
73
|
+
2. Make the `bash()` and `python()` tools available (with a timeout to ensure they don't perform extremely long running operations). Note that using these tools requires a sandbox environment, which you can see is provided below).
|
74
|
+
3. Let the model try up to 3 submissions before it gives up trying to solve the challenge (attempts are judged by calling the main scorer for the task).
|
75
|
+
4. Limit the total messages that can be used for each CTF sample.
|
76
|
+
5. Specify that Docker should be used as the sandbox environment.
|
77
|
+
|
78
|
+
The full source code for this example can be found in the Inspect GitHub repository at [intercode_ctf](https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/gdm_capabilities/intercode_ctf).
|
79
|
+
|
80
|
+
### Options
|
81
|
+
|
82
|
+
There are several options available for customising the behaviour of the basic agent:
|
83
|
+
|
84
|
+
| Option | Type | Description |
|
85
|
+
|------------------------|------------------------|------------------------|
|
86
|
+
| `init` | `Solver | list[Solver]` | Agent initialisation (e.g. `system_message()`). |
|
87
|
+
| `tools` | `list[Tool]` | List of tools available to the agent. |
|
88
|
+
| `max_attempts` | `int` | Maximum number of submission attempts to accept. |
|
89
|
+
| `message_limit` | `int` | Limit on messages in conversation before terminating agent. |
|
90
|
+
| `token_limit` | `int` | Limit on in conversation before terminating agent. |
|
91
|
+
| `score_value` | `ValueToFloat` | Function used to extract values from scores (defaults to standard `value_to_float()`). |
|
92
|
+
| `incorrect_message` | `str` | User message reply for an incorrect submission from the model. Alternatively, a function which returns a message. |
|
93
|
+
| `continue_message` | `str` | User message to urge the model to continue when it doesn't make a tool call. |
|
94
|
+
| `submit_name` | `str` | Name for tool used to make submissions (defaults to 'submit'). |
|
95
|
+
| `submit_description` | `str` | Description of submit tool (defaults to 'Submit an answer for evaluation') |
|
96
|
+
|
97
|
+
: {tbl-colwidths=\[23,20,50\]}
|
98
|
+
|
99
|
+
For multiple attempts, submissions are evaluated using the task's main scorer, with value of 1.0 indicating a correct answer. Scorer values are converted to float (e.g. "C" becomes 1.0) using the standard `value_to_float()` function. Provide an alternate conversion scheme as required via `score_value`.
|
100
|
+
|
101
|
+
## Custom Scaffold {#sec-custom-scaffolding}
|
102
|
+
|
103
|
+
The basic agent demonstrated above will work well for some tasks, but in other cases you may want to provide more custom logic. For example, you might want to:
|
104
|
+
|
105
|
+
{{< include _tools-scaffold.md >}}
|
106
|
+
|
107
|
+
### Tool Filtering
|
108
|
+
|
109
|
+
While its possible to make tools globally available to the model via `use_tools()`, you may also want to filter the available tools either based on task stages or dynamically based on some other criteria.
|
110
|
+
|
111
|
+
Here's an example of a solver agent that filters the available tools between calls to `generate()`:
|
112
|
+
|
113
|
+
``` python
|
114
|
+
@solver
|
115
|
+
def ctf_agent():
|
116
|
+
async def solve(state: TaskState, generate: Generate):
|
117
|
+
|
118
|
+
# first pass w/ core tools
|
119
|
+
state.tools = [decompile(), dissasemble(), bash()]
|
120
|
+
state = await generate(state)
|
121
|
+
|
122
|
+
# second pass w/ prompt and python tool only
|
123
|
+
state.tools = [python()]
|
124
|
+
state.messages.append(ChatMessageUser(
|
125
|
+
content = "Use Python to extract the flag."
|
126
|
+
))
|
127
|
+
state = await generate(state)
|
128
|
+
|
129
|
+
# clear tools and return
|
130
|
+
state.tools = []
|
131
|
+
return state
|
132
|
+
|
133
|
+
return solve
|
134
|
+
```
|
135
|
+
|
136
|
+
### Agents API
|
137
|
+
|
138
|
+
For more sophisticated agents, Inspect offers several additional advanced APIs for state management, sub-agents, and fine grained logging. See the [Agents API](agents-api.qmd) article for additional details.
|
139
|
+
|
140
|
+
## Agent Libraries {#sec-agent-libraries}
|
141
|
+
|
142
|
+
You can also adapt code from a research paper or 3rd party agent library to run within an Inspect solver. Below we'll provide an example of doing this for a [LangChain Agent](https://python.langchain.com/v0.2/docs/tutorials/agents/).
|
143
|
+
|
144
|
+
When adapting 3rd party agent code, it's important that the agent scaffolding use Inspect's model API rather than whatever interface is built in to the existing code or library (otherwise you might be evaluating the wrong model!). If the agent is executing arbitrary code, it's also beneficial to use Inspect [Sandbox Environments](sandboxing.qmd) for sandboxing.
|
145
|
+
|
146
|
+
### Example: LangChain {#sec-langchain}
|
147
|
+
|
148
|
+
This example demonstrates how to integrate a LangChain Agent with Inspect. The agent uses Wikipedia via the [Tavili Search API](https://tavily.com/) to perform question answering tasks. If you want to start by getting some grounding in the code *without* the Inspect integration, see [this article](https://brightinventions.pl/blog/introducing-langchain-agents-tutorial-with-example/) upon which the example is based.
|
149
|
+
|
150
|
+
The main thing that an integration with an agent framework needs to account for is:
|
151
|
+
|
152
|
+
1. Bridging Inspect's model API into the API of the agent framework. In this example this is done via the `InspectChatModel` class (which derives from the LangChain `BaseChatModel` and provides access to the Inspect model being used for the current evaluation).
|
153
|
+
|
154
|
+
2. Bridging from the Inspect solver interface to the standard input and output types of the agent library. In this example this is provided by the `langchain_solver()` function, which takes a LangChain agent function and converts it to an Inspect solver.
|
155
|
+
|
156
|
+
Here's the implementation of `langchain_solver()` (imports excluded for brevity):
|
157
|
+
|
158
|
+
``` python
|
159
|
+
# Interface for LangChain agent function
|
160
|
+
class LangChainAgent(Protocol):
|
161
|
+
async def __call__(self, llm: BaseChatModel, input: dict[str, Any]): ...
|
162
|
+
|
163
|
+
# Convert a LangChain agent function into a Solver
|
164
|
+
def langchain_solver(agent: LangChainAgent) -> Solver:
|
165
|
+
|
166
|
+
async def solve(state: TaskState, generate: Generate) -> TaskState:
|
167
|
+
|
168
|
+
# create the inspect model api bridge
|
169
|
+
llm = InspectChatModel()
|
170
|
+
|
171
|
+
# call the agent
|
172
|
+
await agent(
|
173
|
+
llm = llm,
|
174
|
+
input = dict(
|
175
|
+
input=state.user_prompt.text,
|
176
|
+
chat_history=as_langchain_chat_history(
|
177
|
+
state.messages[1:]
|
178
|
+
),
|
179
|
+
)
|
180
|
+
)
|
181
|
+
|
182
|
+
# collect output from llm interface
|
183
|
+
state.messages = llm.messages
|
184
|
+
state.output = llm.output
|
185
|
+
state.output.completion = output
|
186
|
+
|
187
|
+
# return state
|
188
|
+
return state
|
189
|
+
|
190
|
+
return solve
|
191
|
+
|
192
|
+
# LangChain BaseChatModel for Inspect Model API
|
193
|
+
class InspectChatModel(BaseChatModel):
|
194
|
+
async def _agenerate(
|
195
|
+
self,
|
196
|
+
messages: list[BaseMessage],
|
197
|
+
stop: list[str] | None = None,
|
198
|
+
run_manager: AsyncCallbackManagerForLLMRun | None = None,
|
199
|
+
**kwargs: dict[str, Any],
|
200
|
+
) -> ChatResult:
|
201
|
+
...
|
202
|
+
```
|
203
|
+
|
204
|
+
::: {.callout-note appearance="simple"}
|
205
|
+
Note that the the `inspect_langchain` module imported here is not a built in feature of Inspect. Rather, you can find its [source code](https://github.com/UKGovernmentBEIS/inspect_ai/blob/main/examples/langchain/inspect_langchain.py) as part of the example. You can use this to create your own LangChain agents or as the basis for creating similar integrations with other agent frameworks.
|
206
|
+
:::
|
207
|
+
|
208
|
+
Now here's the `wikipedia_search()` solver (imports again excluded for brevity):
|
209
|
+
|
210
|
+
``` python
|
211
|
+
@solver
|
212
|
+
def wikipedia_search(
|
213
|
+
max_iterations: int | None = 15,
|
214
|
+
max_execution_time: float | None = None
|
215
|
+
) -> Solver:
|
216
|
+
# standard prompt for tools agent
|
217
|
+
prompt = hub.pull("hwchase17/openai-tools-agent")
|
218
|
+
|
219
|
+
# tavily and wikipedia tools # <1>
|
220
|
+
tavily_api = TavilySearchAPIWrapper() # type: ignore
|
221
|
+
tools = (
|
222
|
+
[TavilySearchResults(api_wrapper=tavily_api)] +
|
223
|
+
load_tools(["wikipedia"])
|
224
|
+
)
|
225
|
+
|
226
|
+
# agent function # <2>
|
227
|
+
async def agent(
|
228
|
+
llm: BaseChatModel,
|
229
|
+
input: dict[str, Any]
|
230
|
+
) -> str | list[str | dict[str,Any]]:
|
231
|
+
# create agent
|
232
|
+
tools_agent = create_openai_tools_agent(
|
233
|
+
llm, tools, prompt
|
234
|
+
)
|
235
|
+
executor = AgentExecutor.from_agent_and_tools(
|
236
|
+
agent=cast(BaseMultiActionAgent, tools_agent),
|
237
|
+
tools=tools,
|
238
|
+
name="wikipedia_search",
|
239
|
+
max_iterations=max_iterations,
|
240
|
+
max_execution_time=max_execution_time
|
241
|
+
)
|
242
|
+
|
243
|
+
# execute the agent and return output # <3>
|
244
|
+
result = await executor.ainvoke(input)
|
245
|
+
return result["output"]
|
246
|
+
|
247
|
+
# return agent function as inspect solver # <4>
|
248
|
+
return langchain_solver(agent)
|
249
|
+
```
|
250
|
+
|
251
|
+
1. Note that we register native LangChain tools. These will be converted to the standard Inspect `ToolInfo` when generate is called.
|
252
|
+
2. This is the standard interface to LangChain agents. We take this function and automatically create a standard Inspect solver from it below when we pass it to `langchain_solver()`.
|
253
|
+
3. Invoke the agent using the chat history passed in `input`. We call the async executor API to play well with Inspect's concurrency.
|
254
|
+
4. The `langchain_solver()` function maps the simpler agent function semantics into the standard Inspect solver API.
|
255
|
+
|
256
|
+
If you reviewed the [original article](https://brightinventions.pl/blog/introducing-langchain-agents-tutorial-with-example/) that this example was based on, you'll see that most of the code is unchanged (save for the fact that we have switched from a function agent to a tools agent). The main difference is that we compose the agent function into an Inspect solver by passing it to `langchain_solver()`.
|
257
|
+
|
258
|
+
Finally, here's a task that uses the `wikipedia_search()` solver:
|
259
|
+
|
260
|
+
``` python
|
261
|
+
@task
|
262
|
+
def wikipedia() -> Task:
|
263
|
+
return Task(
|
264
|
+
dataset=json_dataset("wikipedia.jsonl"),
|
265
|
+
solver=wikipedia_search(),
|
266
|
+
scorer=model_graded_fact(),
|
267
|
+
)
|
268
|
+
```
|
269
|
+
|
270
|
+
The full source code for this example can be found in the Inspect GitHub repo at [examples/langchain](https://github.com/UKGovernmentBEIS/inspect_ai/tree/main/examples/langchain).
|
271
|
+
|
272
|
+
## Learning More
|
273
|
+
|
274
|
+
See these additioanl articles to learn more about creating agent evaluations with Inspect:
|
275
|
+
|
276
|
+
- [Sandboxing](sandboxing.qmd) enables you to isolate code generated by models as well as set up more complex computing environments for tasks.
|
277
|
+
|
278
|
+
- [Agents API](agents-api.qmd) describes advanced Inspect APIs available for creating evaluations with agents.
|
279
|
+
|
280
|
+
- [Human Agent](human-agent.qmd) is a solver that enables human baselining on computing tasks.
|
281
|
+
|
282
|
+
- [Approval](approval.qmd) enable you to create fine-grained policies for approving tool calls made by model agents.
|
@@ -12,6 +12,8 @@ Inspect's approval mode enables you to create fine-grained policies for approvin
|
|
12
12
|
|
13
13
|
Custom approvers are very flexible, and can implement a wide variety of decision schemes including informal heuristics and assessments by models. They could also support human approval with a custom user interface on a remote system (whereby approvals are sent and received via message queues).
|
14
14
|
|
15
|
+
Approvers can be specified at either the eval level or at the task level. The examples below will demonstrate eval-level approvers, see the [Task Approvers](#task-approvers) section for details on task-level approvers.
|
16
|
+
|
15
17
|
## Human Approver
|
16
18
|
|
17
19
|
The simplest approval policy is interactive human approval of all tool calls. You can enable this policy by using the `--approval human` CLI option (or the `approval = "human"`) argument to `eval()`:
|
@@ -61,6 +63,33 @@ approval = [
|
|
61
63
|
eval("browser.py", approval=approval, trace=True)
|
62
64
|
```
|
63
65
|
|
66
|
+
## Task Approvers {#task-approvers}
|
67
|
+
|
68
|
+
You can specify approval policies at the task level using the `approval` parameter when creating a `Task`. For example:
|
69
|
+
|
70
|
+
```python
|
71
|
+
from inspect_ai import Task, task
|
72
|
+
from inspect_ai.scorer import match
|
73
|
+
from inspect_ai.solver import generate, use_tools
|
74
|
+
from inspect_ai.tool import bash, python
|
75
|
+
from inspect_ai.approval import human_approver
|
76
|
+
|
77
|
+
@task
|
78
|
+
def linux_task():
|
79
|
+
return Task(
|
80
|
+
dataset=read_dataset(),
|
81
|
+
solver=[
|
82
|
+
use_tools([bash(), python()]),
|
83
|
+
generate(),
|
84
|
+
],
|
85
|
+
scorer=match(),
|
86
|
+
sandbox=("docker", "compose.yaml"),
|
87
|
+
approval=human_approver()
|
88
|
+
)
|
89
|
+
```
|
90
|
+
|
91
|
+
Note that as with all of the other `Task` options, an `approval` policy defined at the eval-level will override a task-level approval policy.
|
92
|
+
|
64
93
|
## Custom Approvers
|
65
94
|
|
66
95
|
Inspect includes two built-an approvers: `human` for interactive approval at the terminal and `auto` for automatically approving or rejecting specific tools. You can also create your own approvers that implement just about any scheme you can imagine.
|
@@ -102,6 +102,10 @@ def record_to_sample(record):
|
|
102
102
|
dataset = json_dataset("popularity.jsonl", record_to_sample)
|
103
103
|
```
|
104
104
|
|
105
|
+
### Typed Metadata
|
106
|
+
|
107
|
+
{{< include _metadata_typing.md >}}
|
108
|
+
|
105
109
|
## Filter and Shuffle
|
106
110
|
|
107
111
|
The `Dataset` class includes `filter()` and `shuffle()` methods, as well as support for the slice operator.
|
@@ -107,7 +107,7 @@ eval(math, model = "custom/my-model")
|
|
107
107
|
|
108
108
|
## Sandboxes {#sec-sandbox-environment-extensions}
|
109
109
|
|
110
|
-
[Sandbox Environments](
|
110
|
+
[Sandbox Environments](sandboxing.qmd) provide a mechanism for sandboxing execution of tool code as well as providing more sophisticated infrastructure (e.g. creating network hosts for a cybersecurity eval). Inspect comes with two sandbox environments built in:
|
111
111
|
|
112
112
|
| Environment Type | Description |
|
113
113
|
|----------------------------|--------------------------------------------|
|