inspect-ai 0.3.11__tar.gz → 0.13.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/CHANGELOG.md +9 -0
- {inspect_ai-0.3.11/src/inspect_ai.egg-info → inspect_ai-0.13.3}/PKG-INFO +1 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/solvers.qmd +56 -3
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/inspect_langchain.py +1 -2
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/__init__.py +1 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/list.py +1 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/eval.py +1 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/list.py +1 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/loader.py +1 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/registry.py +1 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/score.py +1 -1
- inspect_ai-0.13.3/src/inspect_ai/_eval/task/__init__.py +3 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/run.py +7 -4
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/util.py +1 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/App.mjs +19 -7
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +1 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SamplesTab.mjs +1 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/__init__.py +2 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_critique.py +2 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_multiple_choice.py +2 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_plan.py +2 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_prompt.py +2 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_solver.py +2 -103
- inspect_ai-0.13.3/src/inspect_ai/solver/_task_state.py +145 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_tool/use_tools.py +2 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3/src/inspect_ai.egg-info}/PKG-INFO +1 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai.egg-info/SOURCES.txt +3 -1
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.gitattributes +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/dependabot.yml +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/pull_request_template.md +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/workflows/build.yml +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/workflows/docs.yml +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/workflows/pypi.yml +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/workflows/vscode.yml +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.gitignore +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.pre-commit-config.yaml +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.vscode/extensions.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.vscode/settings.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/LICENSE +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/README.md +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/README.md +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/arc.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/boolq.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/datasets/math_test.csv +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/datasets/mmlu.csv +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/gpqa.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/gsm8k.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/hellaswag.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/mathematics.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/mmlu.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/piqa.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/.gitignore +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/arc.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/biology_qa.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/footer.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/gsm8k.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/hellaswag.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/index.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/mathematics.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/popularity.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/security_guide.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/theory_of_mind.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/tool_use.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_format/pre-render.sh +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_quarto.yml +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_variables.yml +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/datasets.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/eval-logs.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/eval-suites.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/eval-tuning.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/examples.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/aisi-logo.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/eval-log.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-answers.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-filter.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-history.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-home.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-info.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-logging-console.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-logging.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-main.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-messages.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-metadata.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-scoring.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-sort.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-splash.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-vscode-config.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-vscode-install.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-vscode-logview.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-vscode-output-channel.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-vscode-run-task.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/popularity.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/rate-limit.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/running-theory.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/index.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/log-viewer.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/models.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/scorers.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/theme.scss +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/tools.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/vscode.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/workflow.qmd +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/.env.example +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/.gitignore +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/README.md +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/requirements.txt +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/wikipedia.jsonl +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/wikipedia.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/biology_qa.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/popularity.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/security_guide.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/theory_of_mind.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/tool_use.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/pyproject.toml +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/requirements.txt +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/setup.cfg +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/__main__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/common.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/eval.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/info.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/main.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/score.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/util.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/view.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_display/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_display/_display.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_display/logger.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_display/rich.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/constants.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/generate.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/images.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/log.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/results.py +0 -0
- /inspect_ai-0.3.11/src/inspect_ai/_eval/types.py → /inspect_ai-0.13.3/src/inspect_ai/_eval/task/task.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/_async.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/appdirs.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/constants.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/datetime.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/dev.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/docstring.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/dotenv.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/error.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/file.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/git.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/http.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/images.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/json.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/notebook.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/path.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/pattern.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/platform.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/registry.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/retry.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/samples.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/text.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/url.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/version.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/schema.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/view.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/.gitignore +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/App.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/favicon.svg +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/index.html +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/bootstrap/css/bootstrap-icons.min.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/bootstrap/css/bootstrap.min.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/bootstrap/css/fonts/bootstrap-icons.woff +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/bootstrap/css/fonts/bootstrap-icons.woff2 +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/bootstrap/js/bootstrap.bundle.min.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/clipboard.min.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/json5.min.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/prism/prism-dark.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/prism/prism.min.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/prism/prism.min.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/purify.min.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/showdown.min.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/log-schema.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/log.d.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/hooks.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/htm/htm.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/htm/preact.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/htm/preact.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/preact-hooks.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/preact.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/Constants.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/Register.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/api/api-browser.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/api/api-vscode.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/api/index.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/AnsiDisplay.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/Card.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/ChatView.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/CopyButton.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/Dialog.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/LargeModal.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/LoadingScreen.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/MessageContent.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/RenderedContent.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/TabSet.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/ToolButton.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/VirtualList.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/ansi-output.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/logging/LoggingPanel.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SampleList.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/title/TitleBlock.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/Format.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/Git.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/Path.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/Type.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/events.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/sleep.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_dataset.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_examples/bias_detection.jsonl +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_examples/biology_qa.jsonl +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_examples/popularity.jsonl +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_examples/security_guide.jsonl +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_examples/theory_of_mind.jsonl +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_sources/csv.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_sources/example.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_sources/file.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_sources/hf.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_sources/json.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_util.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/log/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/log/_file.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/log/_log.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_model.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/anthropic.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/azureai.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/bedrock.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/cloudflare.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/google.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/hf.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/mistral.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/ollama.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/openai.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/providers.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/together.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/util.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_registry.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_tool.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_util.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/py.typed +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_answer.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_common.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_match.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_metric.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_metrics/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_metrics/accuracy.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_metrics/mean.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_metrics/std.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_model.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_multi.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_pattern.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_scorer.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_tool/tool.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_tool/tool_def.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_tool/web_search.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_util.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/_context/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/_context/concurrency.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/_context/logger.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/_context/resource.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/_context/subprocess.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai.egg-info/dependency_links.txt +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai.egg-info/entry_points.txt +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai.egg-info/requires.txt +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai.egg-info/top_level.txt +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/conftest.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/scorer/test_answer.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/scorer/test_pattern.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_anthropic.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_cloudflare.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_collapse_assistant_message.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_collapse_user_message.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_dataset/samples.csv +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_dataset/samples.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_dataset/samples.jsonl +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_dataset.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_eval_log/log_invalid.txt +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_eval_log/log_version_2.txt +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_eval_log/log_with_nan.txt +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_eval_log.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_examples.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_helpers/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_helpers/utils.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_hf.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_images/images.jsonl +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_images.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_list_task.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_logprobs.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_metric.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_model_package.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_num_choices.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_openai.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/.gitignore +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/inspect_package/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/inspect_package/inspect_ai.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/inspect_package/modelapi/custom.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/inspect_package/py.typed +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/pyproject.toml +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_plan.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_registry.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_retry.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_scorer.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_solver.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_stop_reason.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_subprocess.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/__init__.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/attribs.ipynb +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/multiple.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/multiple_dir/_decoy/testit.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/multiple_dir/_decoy2.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/multiple_dir/bar.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/multiple_dir/foo.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/.folder3/epsilon.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/folder1/_decoy.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/folder1/theta.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/folder2/.folder3/epsilon.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/folder2/another.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/folder2/first.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_tools.py +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.eslintrc.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.gitignore +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscode/extensions.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscode/launch.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscode/settings.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscode/tasks.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscode-test.mjs +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscodeignore +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.yarnrc +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/CHANGELOG.md +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/LICENSE +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/README.md +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/logo/inspect.png +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/logo/inspect.svg +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/templates/task.py.template +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/www/codicon/codicon.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/www/codicon/codicon.ttf +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/www/view/view-overrides.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/package.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/document.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/error.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/focus.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/notebook.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/symbol.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/task.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/templates.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/webview.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/appdirs.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/command.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/dispose.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/env.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/git.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/jsonrpc.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/log.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/nonce.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/path.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/port.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/process.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/python/code.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/python/exec.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/python/index.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/python/interpreter.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/random.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/string.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/text.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/wait.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/workspace.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/extension.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/inspect/index.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/inspect/list.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/inspect/logs.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/inspect/props.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/inspect/version.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/active-task/active-task-command.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/active-task/active-task-provider.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/activity-bar-provider.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/env-config-provider.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/task-config-commands.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/task-config-provider.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/task-outline-commands.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/task-outline-provider.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/env-config-webview.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/env-config-webview.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/task-config-webview.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/task-config-webview.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/vscode-controls.css +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/webview-utils.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/codelens/codelens-provider.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/inspect/inspect-constants.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/inspect/inspect-eval-commands.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/inspect/inspect-eval.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/inspect/inspect-manager.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/commands.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/logview-file-watcher.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/logview-link-provider.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/logview-manager.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/logview-webview.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/logview.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/settings/inspect-settings.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/settings/user-settings.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/workspace/workspace-env-commands.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/workspace/workspace-env-provider.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/workspace/workspace-init.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/workspace/workspace-state-provider.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/workspace/workspace-task-provider.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/test/extension.test.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/.gitignore +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/package.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/rollup.config.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/src/index.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/src/jsonrpc.ts +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/yarn.lock +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tsconfig.json +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/webpack.config.js +0 -0
- {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/yarn.lock +0 -0
@@ -1,5 +1,14 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## v0.3.13 (31 May 2024)
|
4
|
+
|
5
|
+
- Bugfix: Inspect view was not reliably updating when new evaluation logs were written.
|
6
|
+
|
7
|
+
## v0.3.12 (31 May 2024)
|
8
|
+
|
9
|
+
- Bugfix: `results` was not defined when no scorer was provided resulting in an error being thrown. Fixed by setting `results = EvalResults()` when no scorer is provided.
|
10
|
+
- Bugfix: The viewer was not properly handling samples without scores.
|
11
|
+
|
3
12
|
## v0.3.11 (30 May 2024)
|
4
13
|
|
5
14
|
- Update to non-beta version of Anthropic tool use (remove legacy xml tools implementation).
|
@@ -1,3 +1,7 @@
|
|
1
|
+
---
|
2
|
+
tbl-colwidths: [20,25,45]
|
3
|
+
---
|
4
|
+
|
1
5
|
# Solvers {#sec-solvers}
|
2
6
|
|
3
7
|
## Overview
|
@@ -147,7 +151,7 @@ You will likely want to experiment with using a distinct `model` for generating
|
|
147
151
|
|
148
152
|
## Custom Solvers
|
149
153
|
|
150
|
-
|
154
|
+
In this section we'll take a look at the source code for a couple of the built in solvers as a jumping off point for implementing your own solvers. A solver is an implementation of the `Solver` protocol (a function that transforms a `TaskState`):
|
151
155
|
|
152
156
|
``` python
|
153
157
|
async def solve(state: TaskState, generate: Generate) -> TaskState:
|
@@ -158,10 +162,52 @@ async def solve(state: TaskState, generate: Generate) -> TaskState:
|
|
158
162
|
|
159
163
|
Typically solvers can be customised with parameters (e.g. `template` for prompt engineering solvers). This means that a `Solver` is actually a function which returns the `solve()` function referenced above (this will become more clear in the examples below).
|
160
164
|
|
165
|
+
### Task States
|
166
|
+
|
167
|
+
Before presenting the examples we'll take a more in-depth look at the `TaskState` class. Task states consist of both lower level data members (e.g. `messages`, `output`) as well as a number of convenience properties. The core members of `TaskState` that are *modified* by solvers are `messages` / `user_prompt` and `output`:
|
168
|
+
|
169
|
+
| Member | Type | Description |
|
170
|
+
|-----------|-----------|---------------------------------------------------|
|
171
|
+
| `messages` | list\[ChatMessage\] | Chat conversation history for sample. It is automatically appended to by the `generate()` solver, and is often manipulated by other solvers (e.g. for prompt engineering or elicitation). |
|
172
|
+
| `user_prompt` | ChatMessageUser | Convenience property for accessing the first user message in the message history (commonly used for prompt engineering). |
|
173
|
+
| `output` | ModelOutput | The 'final' model output once we've completed all solving. This field is automatically updated with the last "assistant" message by the `generate()` solver. |
|
174
|
+
|
161
175
|
::: {.callout-note appearance="simple"}
|
162
|
-
|
176
|
+
Note that the `generate()` solver automatically updates both the `messages` and `output` fields. For very simple evaluations modifying the `user_prompt` and then calling `generate()` encompasses all of the required interaction with `TaskState`.
|
163
177
|
:::
|
164
178
|
|
179
|
+
There are two additional fields that solvers might modify (but they are typically for more advanced use cases):
|
180
|
+
|
181
|
+
| Member | Type | Description |
|
182
|
+
|---------|---------|------------------------------------------------------|
|
183
|
+
| `metadata` | dict | Original metadata from `Sample`, as well as any other custom metadata that solvers choose to write (typically used to coordinate between solvers and/or for custom logging). |
|
184
|
+
| `completed` | bool | Solvers can set `completed = True` to cause the task to exit the plan immediately. |
|
185
|
+
|
186
|
+
Sometimes its import to have access to the *original* prompt input for the task (as other solvers may have re-written or even removed it entirely). This is available using the `input` and `input_text` properties:
|
187
|
+
|
188
|
+
| Member | Type | Description |
|
189
|
+
|---------------|---------------|-------------------------------------------|
|
190
|
+
| `input` | str \| list\[ChatMessage\] | Original `Sample` input. |
|
191
|
+
| `input_text` | str | Convenience function for accessing the initial input from the `Sample` as a string. |
|
192
|
+
|
193
|
+
There are several other fields used to provide contextual data from either the task sample or evaluation:
|
194
|
+
|
195
|
+
| Member | Type | Description |
|
196
|
+
|---------------|---------------|------------------------------------------|
|
197
|
+
| `sample_id` | int \| str | Unique ID for sample. |
|
198
|
+
| `epoch` | int | Epoch for sample. |
|
199
|
+
| `choices` | list\[str\] \| None | Choices from sample (used only in multiple-choice evals). |
|
200
|
+
| `model` | ModelName | Name of model currently being evaluated. |
|
201
|
+
|
202
|
+
Finally, task states also include available tools as well as guidance for the model on which tools to use (if you haven't yet encountered the concept of tool use in language models, don't worry about understanding these fields, the [Tools](tools.qmd) article provides a more in-depth treatment):
|
203
|
+
|
204
|
+
| Member | Type | Description |
|
205
|
+
|---------------|--------------|------------------------------|
|
206
|
+
| `tools` | list\[Tool\] | Tools available to the model |
|
207
|
+
| `tool_choice` | ToolChoice | Tool choice directive. |
|
208
|
+
|
209
|
+
These fields are typically modified via the `use_tools()` solver, but they can also be modified directly for more advanced use cases.
|
210
|
+
|
165
211
|
### Example: Prompt Template
|
166
212
|
|
167
213
|
Here's the code for the `prompt_template()` solver:
|
@@ -188,8 +234,11 @@ def prompt_template(template: str, **params: dict[str, Any]):
|
|
188
234
|
A few things to note about this implementation:
|
189
235
|
|
190
236
|
1. The function applies the `@solver` decorator—this registers the `Solver` with Inspect, making it possible to capture its name and parameters for logging, as well as make it callable from a configuration file (e.g. a YAML specification of an eval).
|
237
|
+
|
191
238
|
2. The `solve()` function is declared as `async`. This is so that it can participate in Inspect's optimised scheduling for expensive model generation calls (this solver doesn't call `generate()` but others will).
|
239
|
+
|
192
240
|
3. The `resource()` function is used to read the specified `template`. This function accepts a string, file, or URL as its argument, and then returns a string with the contents of the resource.
|
241
|
+
|
193
242
|
4. We make use of the `user_prompt` property on the `TaskState`. This is a convenience property for locating the first `role="user"` message (otherwise you might need to skip over system messages, etc). Since this is a string templating solver, we use the `state.user_prompt.text` property (so we are dealing with prompt as a string, recall that it can also be a list of messages).
|
194
243
|
|
195
244
|
### Example: Self Critique
|
@@ -278,6 +327,10 @@ def self_critique(
|
|
278
327
|
|
279
328
|
Note that calls to `generate()` (for both the critique model and the model being evaluated) are called with `await`—this is critical to ensure that the solver participates correctly in the scheduling of generation work.
|
280
329
|
|
330
|
+
### Concurrency
|
331
|
+
|
332
|
+
When creating custom solvers, it's critical that you understand Inspect's concurrency model. More specifically, if your solver is doing non-trivial work (e.g. calling REST APIs, executing external processes, etc.) please review [Eval Tuning](#sec-eval-tuning) for a more in depth discussion.
|
333
|
+
|
281
334
|
## Early Termination
|
282
335
|
|
283
336
|
In some cases a solver has the context available to request an early termination of the plan (i.e. don't call the rest of the solvers). In this case, setting the `TaskState.completed` field will result in forgoing remaining solvers in the plan. For example, here's a simple solver that terminates the plan early:
|
@@ -318,7 +371,7 @@ In this example the `finish_up()` solver will always be called even if the plan
|
|
318
371
|
|
319
372
|
If your solvers allocate resources (for example, run a Docker container or mount a drive), you will want to make sure that these resources are cleaned up even in the case of an error occurring during the evaluation. To arrange for this, use a `Plan` object with a `cleanup` function:
|
320
373
|
|
321
|
-
```python
|
374
|
+
``` python
|
322
375
|
|
323
376
|
async def cleanup(state):
|
324
377
|
# cleanup resources
|
@@ -47,8 +47,7 @@ from inspect_ai.solver import Generate, Solver, TaskState
|
|
47
47
|
class LangChainAgent(Protocol):
|
48
48
|
async def __call__(
|
49
49
|
self, llm: BaseChatModel, input: dict[str, Any]
|
50
|
-
) -> str | list[str | dict[str, Any]]:
|
51
|
-
...
|
50
|
+
) -> str | list[str | dict[str, Any]]: ...
|
52
51
|
|
53
52
|
|
54
53
|
def langchain_solver(agent: LangChainAgent) -> Solver:
|
@@ -6,7 +6,7 @@ from inspect_ai._eval.eval import eval, eval_async, eval_retry, eval_retry_async
|
|
6
6
|
from inspect_ai._eval.list import list_tasks
|
7
7
|
from inspect_ai._eval.registry import task
|
8
8
|
from inspect_ai._eval.score import score, score_async
|
9
|
-
from inspect_ai._eval.
|
9
|
+
from inspect_ai._eval.task import Task, TaskInfo, Tasks
|
10
10
|
from inspect_ai._util.constants import PKG_NAME
|
11
11
|
|
12
12
|
__version__ = importlib_version(PKG_NAME)
|
@@ -11,7 +11,7 @@ from typing_extensions import Unpack
|
|
11
11
|
from inspect_ai._cli.common import CommonOptions, common_options, resolve_common_options
|
12
12
|
from inspect_ai._cli.util import parse_cli_args
|
13
13
|
from inspect_ai._eval.list import list_tasks
|
14
|
-
from inspect_ai._eval.
|
14
|
+
from inspect_ai._eval.task import TaskInfo
|
15
15
|
from inspect_ai.log import list_eval_logs
|
16
16
|
|
17
17
|
|
@@ -26,10 +26,10 @@ from inspect_ai.solver import Solver
|
|
26
26
|
from inspect_ai.util._context import init_async_context
|
27
27
|
|
28
28
|
from .loader import resolve_tasks
|
29
|
+
from .task import Tasks, TaskSpec
|
29
30
|
from .task.log import TaskLogger
|
30
31
|
from .task.run import task_run
|
31
32
|
from .task.util import task_file, task_run_dir
|
32
|
-
from .types import Tasks, TaskSpec
|
33
33
|
|
34
34
|
log = logging.getLogger(__name__)
|
35
35
|
|
@@ -18,8 +18,8 @@ from inspect_ai.model import Model, ModelName
|
|
18
18
|
|
19
19
|
from .list import task_files
|
20
20
|
from .registry import task_create
|
21
|
+
from .task import Task, TaskInfo, Tasks
|
21
22
|
from .task.constants import TASK_FILE_ATTR, TASK_RUN_DIR_ATTR
|
22
|
-
from .types import Task, TaskInfo, Tasks
|
23
23
|
|
24
24
|
|
25
25
|
def resolve_tasks(
|
@@ -17,9 +17,9 @@ from inspect_ai.model import ModelName
|
|
17
17
|
from inspect_ai.scorer import Metric, Score, Scorer, Target
|
18
18
|
from inspect_ai.solver import TaskState
|
19
19
|
|
20
|
+
from .task import Task
|
20
21
|
from .task.results import eval_results
|
21
22
|
from .task.util import task_run_dir
|
22
|
-
from .types import Task
|
23
23
|
|
24
24
|
|
25
25
|
def score(log: EvalLog, scorer: Scorer) -> EvalLog:
|
@@ -23,6 +23,7 @@ from inspect_ai.log import (
|
|
23
23
|
EvalConfig,
|
24
24
|
EvalError,
|
25
25
|
EvalLog,
|
26
|
+
EvalResults,
|
26
27
|
EvalStats,
|
27
28
|
)
|
28
29
|
from inspect_ai.log._log import eval_error
|
@@ -34,7 +35,7 @@ from inspect_ai.model import (
|
|
34
35
|
from inspect_ai.scorer import Score, Scorer, Target
|
35
36
|
from inspect_ai.solver import Generate, Plan, Solver, TaskState
|
36
37
|
|
37
|
-
from ..
|
38
|
+
from ..task import Task
|
38
39
|
from .generate import task_generate
|
39
40
|
from .images import samples_with_base64_images, states_with_base64_images
|
40
41
|
from .log import TaskLogger, collect_eval_data, log_output, log_plan
|
@@ -101,7 +102,9 @@ async def task_run(
|
|
101
102
|
plan = (
|
102
103
|
plan
|
103
104
|
if isinstance(plan, Plan)
|
104
|
-
else Plan(plan)
|
105
|
+
else Plan(plan)
|
106
|
+
if plan is not None
|
107
|
+
else task.plan
|
105
108
|
)
|
106
109
|
score = score and task.scorer is not None
|
107
110
|
scorer: Scorer | None = task.scorer if (score and task.scorer) else None
|
@@ -132,7 +135,6 @@ async def task_run(
|
|
132
135
|
len(plan.steps) + (1 if plan.finish else 0) + (1) # scorer
|
133
136
|
)
|
134
137
|
with td.progress(total=total_steps) as p:
|
135
|
-
|
136
138
|
# forward progress
|
137
139
|
def progress() -> None:
|
138
140
|
p.update(1)
|
@@ -195,6 +197,8 @@ async def task_run(
|
|
195
197
|
metrics=task.metrics,
|
196
198
|
)
|
197
199
|
logger.log_results(results)
|
200
|
+
else:
|
201
|
+
results = EvalResults()
|
198
202
|
|
199
203
|
# collect eval data
|
200
204
|
collect_eval_data(stats, logger)
|
@@ -295,7 +299,6 @@ async def resolve_dataset(
|
|
295
299
|
epochs: int,
|
296
300
|
log_images: bool,
|
297
301
|
) -> tuple[Dataset, list[Sample], list[TaskState]]:
|
298
|
-
|
299
302
|
# apply limit to dataset
|
300
303
|
dataset_limit = (
|
301
304
|
slice(0, len(dataset))
|
@@ -20,6 +20,7 @@ import { WorkSpace } from "./src/workspace/WorkSpace.mjs";
|
|
20
20
|
|
21
21
|
export function App() {
|
22
22
|
const [selected, setSelected] = useState(-1);
|
23
|
+
const [pendingLog, setPendingLog] = useState(undefined);
|
23
24
|
const [logs, setLogs] = useState({ log_dir: "", files: [] });
|
24
25
|
const [logHeaders, setLogHeaders] = useState({});
|
25
26
|
const [offcanvas, setOffcanvas] = useState(false);
|
@@ -126,24 +127,32 @@ export function App() {
|
|
126
127
|
// Ensure that we have a selected index when there is are
|
127
128
|
// new logs
|
128
129
|
useEffect(() => {
|
129
|
-
|
130
|
-
|
130
|
+
if (logs && pendingLog) {
|
131
|
+
const index = logs.files.findIndex((val) => {
|
132
|
+
return pendingLog.endsWith(val.name);
|
133
|
+
});
|
134
|
+
if (index > -1) {
|
135
|
+
setSelected(index);
|
136
|
+
}
|
137
|
+
setPendingLog(undefined);
|
138
|
+
}
|
139
|
+
}, [logs, pendingLog])
|
131
140
|
|
132
141
|
// listen for updateState messages from vscode
|
133
142
|
useEffect(() => {
|
134
|
-
const onMessage = (e) => {
|
143
|
+
const onMessage = async (e) => {
|
135
144
|
switch (e.data.type || e.data.message) {
|
136
145
|
case "updateState": {
|
137
146
|
if (e.data.url) {
|
138
|
-
|
139
147
|
const index = logs.files.findIndex((val) => {
|
140
|
-
return
|
148
|
+
return e.data.url.endsWith(val.name);
|
141
149
|
});
|
142
150
|
if (index > -1) {
|
143
151
|
// Select the correct index
|
144
152
|
setSelected(index);
|
145
153
|
} else {
|
146
|
-
|
154
|
+
await loadLogs();
|
155
|
+
setPendingLog(e.data.url);
|
147
156
|
}
|
148
157
|
}
|
149
158
|
}
|
@@ -153,7 +162,7 @@ export function App() {
|
|
153
162
|
return () => {
|
154
163
|
window.removeEventListener("message", onMessage);
|
155
164
|
};
|
156
|
-
}, [setCurrentLog]);
|
165
|
+
}, [logs, setCurrentLog, setPendingLog]);
|
157
166
|
|
158
167
|
useEffect(async () => {
|
159
168
|
// See whether a specific task_file has been passed.
|
@@ -176,6 +185,9 @@ export function App() {
|
|
176
185
|
// initial fetch of logs
|
177
186
|
await load();
|
178
187
|
|
188
|
+
// Select the first log
|
189
|
+
setSelected(0);
|
190
|
+
|
179
191
|
// poll every 1s for events
|
180
192
|
setInterval(() => {
|
181
193
|
api.client_events().then((events) => {
|
@@ -6,7 +6,8 @@ from ._prompt import (
|
|
6
6
|
prompt_template,
|
7
7
|
system_message,
|
8
8
|
)
|
9
|
-
from ._solver import Generate, Solver,
|
9
|
+
from ._solver import Generate, Solver, generate, solver
|
10
|
+
from ._task_state import TaskState
|
10
11
|
from ._tool.tool import Tool, tool
|
11
12
|
from ._tool.use_tools import use_tools
|
12
13
|
from ._tool.web_search import web_search
|
@@ -3,7 +3,8 @@ from typing import Any
|
|
3
3
|
from inspect_ai.model import ChatMessageSystem
|
4
4
|
from inspect_ai.util import resource
|
5
5
|
|
6
|
-
from ._solver import Generate, Solver,
|
6
|
+
from ._solver import Generate, Solver, solver
|
7
|
+
from ._task_state import TaskState
|
7
8
|
from ._util import append_system_message
|
8
9
|
|
9
10
|
|
@@ -18,110 +18,9 @@ from inspect_ai._util.registry import (
|
|
18
18
|
registry_name,
|
19
19
|
registry_tag,
|
20
20
|
)
|
21
|
-
from inspect_ai.model import
|
22
|
-
ChatMessage,
|
23
|
-
ChatMessageUser,
|
24
|
-
GenerateConfigArgs,
|
25
|
-
ModelName,
|
26
|
-
ModelOutput,
|
27
|
-
ToolChoice,
|
28
|
-
)
|
29
|
-
|
30
|
-
from ._tool.tool import Tool
|
31
|
-
|
21
|
+
from inspect_ai.model import GenerateConfigArgs
|
32
22
|
|
33
|
-
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
model: ModelName,
|
37
|
-
sample_id: int | str,
|
38
|
-
epoch: int,
|
39
|
-
input: str | list[ChatMessage],
|
40
|
-
choices: list[str] | None,
|
41
|
-
messages: list[ChatMessage],
|
42
|
-
tools: list[Tool] = [],
|
43
|
-
tool_choice: ToolChoice | None = None,
|
44
|
-
output: ModelOutput | None = None,
|
45
|
-
completed: bool = False,
|
46
|
-
metadata: dict[str, Any] = {},
|
47
|
-
) -> None:
|
48
|
-
self._model = model
|
49
|
-
|
50
|
-
self.sample_id = sample_id
|
51
|
-
"""Unique id for sample."""
|
52
|
-
|
53
|
-
self.epoch = epoch
|
54
|
-
"""Epoch number for sample."""
|
55
|
-
|
56
|
-
self._input = input
|
57
|
-
|
58
|
-
self.choices = choices
|
59
|
-
"""Sample choices."""
|
60
|
-
|
61
|
-
self.messages = messages
|
62
|
-
"""Chat conversation history for sample."""
|
63
|
-
|
64
|
-
self.tools = tools
|
65
|
-
"""Tools available to the model."""
|
66
|
-
|
67
|
-
self.tool_choice = tool_choice
|
68
|
-
"""Tool choice directive."""
|
69
|
-
|
70
|
-
self.output = output if output else ModelOutput(model=str(model), choices=[])
|
71
|
-
"""Model output."""
|
72
|
-
|
73
|
-
self.completed = completed
|
74
|
-
"""Flag to indicate that the solver loop should terminate."""
|
75
|
-
|
76
|
-
self.metadata = metadata
|
77
|
-
"""Additional task state metadata."""
|
78
|
-
|
79
|
-
@property
|
80
|
-
def model(self) -> ModelName:
|
81
|
-
"""Name of model being evaluated."""
|
82
|
-
return self._model
|
83
|
-
|
84
|
-
@property
|
85
|
-
def input(self) -> str | list[ChatMessage]:
|
86
|
-
"""Sample input."""
|
87
|
-
return self._input
|
88
|
-
|
89
|
-
@property
|
90
|
-
def input_text(self) -> str:
|
91
|
-
"""Sample input as text."""
|
92
|
-
if isinstance(self._input, str):
|
93
|
-
return self._input
|
94
|
-
else:
|
95
|
-
input = next(
|
96
|
-
(message.text for message in self._input if message.role == "user"),
|
97
|
-
None,
|
98
|
-
)
|
99
|
-
if input:
|
100
|
-
return input
|
101
|
-
else:
|
102
|
-
raise ValueError(
|
103
|
-
"input_text requested from TaskState but none available"
|
104
|
-
)
|
105
|
-
|
106
|
-
@property
|
107
|
-
def user_prompt(self) -> ChatMessageUser:
|
108
|
-
"""User prompt for this state.
|
109
|
-
|
110
|
-
Tasks are very general and can have may types of inputs.
|
111
|
-
However, in many cases solvers assume they can interact with
|
112
|
-
the state as a "chat" in a predictable fashion (e.g. prompt
|
113
|
-
engineering solvers). This property enables easy read and
|
114
|
-
write access to the user chat prompt. Raises an
|
115
|
-
exception if there is no user prompt
|
116
|
-
|
117
|
-
Returns:
|
118
|
-
First user `ChatMessage` in the task state.
|
119
|
-
"""
|
120
|
-
prompt = next((m for m in self.messages if m.role == "user"), None)
|
121
|
-
if prompt:
|
122
|
-
return prompt
|
123
|
-
else:
|
124
|
-
raise ValueError("user_prompt requested from TaskState but none available")
|
23
|
+
from ._task_state import TaskState
|
125
24
|
|
126
25
|
|
127
26
|
@runtime_checkable
|