inspect-ai 0.3.11__tar.gz → 0.13.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (447) hide show
  1. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/CHANGELOG.md +9 -0
  2. {inspect_ai-0.3.11/src/inspect_ai.egg-info → inspect_ai-0.13.3}/PKG-INFO +1 -1
  3. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/solvers.qmd +56 -3
  4. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/inspect_langchain.py +1 -2
  5. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/__init__.py +1 -1
  6. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/list.py +1 -1
  7. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/eval.py +1 -1
  8. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/list.py +1 -1
  9. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/loader.py +1 -1
  10. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/registry.py +1 -1
  11. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/score.py +1 -1
  12. inspect_ai-0.13.3/src/inspect_ai/_eval/task/__init__.py +3 -0
  13. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/run.py +7 -4
  14. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/util.py +1 -1
  15. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/App.mjs +19 -7
  16. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +1 -0
  17. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SamplesTab.mjs +1 -1
  18. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/__init__.py +2 -1
  19. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_critique.py +2 -1
  20. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_multiple_choice.py +2 -1
  21. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_plan.py +2 -1
  22. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_prompt.py +2 -1
  23. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_solver.py +2 -103
  24. inspect_ai-0.13.3/src/inspect_ai/solver/_task_state.py +145 -0
  25. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_tool/use_tools.py +2 -1
  26. {inspect_ai-0.3.11 → inspect_ai-0.13.3/src/inspect_ai.egg-info}/PKG-INFO +1 -1
  27. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai.egg-info/SOURCES.txt +3 -1
  28. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.gitattributes +0 -0
  29. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/dependabot.yml +0 -0
  30. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/pull_request_template.md +0 -0
  31. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/workflows/build.yml +0 -0
  32. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/workflows/docs.yml +0 -0
  33. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/workflows/pypi.yml +0 -0
  34. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.github/workflows/vscode.yml +0 -0
  35. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.gitignore +0 -0
  36. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.pre-commit-config.yaml +0 -0
  37. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.vscode/extensions.json +0 -0
  38. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/.vscode/settings.json +0 -0
  39. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/LICENSE +0 -0
  40. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/README.md +0 -0
  41. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/README.md +0 -0
  42. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/arc.py +0 -0
  43. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/boolq.py +0 -0
  44. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/datasets/math_test.csv +0 -0
  45. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/datasets/mmlu.csv +0 -0
  46. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/gpqa.py +0 -0
  47. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/gsm8k.py +0 -0
  48. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/hellaswag.py +0 -0
  49. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/mathematics.py +0 -0
  50. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/mmlu.py +0 -0
  51. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/benchmarks/piqa.py +0 -0
  52. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/.gitignore +0 -0
  53. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/arc.qmd +0 -0
  54. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/biology_qa.qmd +0 -0
  55. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/footer.qmd +0 -0
  56. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/gsm8k.qmd +0 -0
  57. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/hellaswag.qmd +0 -0
  58. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/index.qmd +0 -0
  59. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/mathematics.qmd +0 -0
  60. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/popularity.qmd +0 -0
  61. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/security_guide.qmd +0 -0
  62. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/theory_of_mind.qmd +0 -0
  63. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_examples/tool_use.qmd +0 -0
  64. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_format/pre-render.sh +0 -0
  65. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_quarto.yml +0 -0
  66. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/_variables.yml +0 -0
  67. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/datasets.qmd +0 -0
  68. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/eval-logs.qmd +0 -0
  69. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/eval-suites.qmd +0 -0
  70. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/eval-tuning.qmd +0 -0
  71. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/examples.qmd +0 -0
  72. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/aisi-logo.png +0 -0
  73. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/eval-log.png +0 -0
  74. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-answers.png +0 -0
  75. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-filter.png +0 -0
  76. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-history.png +0 -0
  77. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-home.png +0 -0
  78. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-info.png +0 -0
  79. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-logging-console.png +0 -0
  80. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-logging.png +0 -0
  81. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-main.png +0 -0
  82. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-messages.png +0 -0
  83. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-metadata.png +0 -0
  84. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-scoring.png +0 -0
  85. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-sort.png +0 -0
  86. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-view-splash.png +0 -0
  87. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-vscode-config.png +0 -0
  88. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-vscode-install.png +0 -0
  89. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-vscode-logview.png +0 -0
  90. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-vscode-output-channel.png +0 -0
  91. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect-vscode-run-task.png +0 -0
  92. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/inspect.png +0 -0
  93. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/popularity.png +0 -0
  94. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/rate-limit.png +0 -0
  95. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/images/running-theory.png +0 -0
  96. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/index.qmd +0 -0
  97. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/log-viewer.qmd +0 -0
  98. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/models.qmd +0 -0
  99. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/scorers.qmd +0 -0
  100. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/theme.scss +0 -0
  101. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/tools.qmd +0 -0
  102. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/vscode.qmd +0 -0
  103. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/docs/workflow.qmd +0 -0
  104. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/.env.example +0 -0
  105. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/.gitignore +0 -0
  106. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/README.md +0 -0
  107. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/requirements.txt +0 -0
  108. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/wikipedia.jsonl +0 -0
  109. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/agents/langchain/wikipedia.py +0 -0
  110. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/biology_qa.py +0 -0
  111. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/popularity.py +0 -0
  112. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/security_guide.py +0 -0
  113. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/theory_of_mind.py +0 -0
  114. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/examples/tool_use.py +0 -0
  115. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/pyproject.toml +0 -0
  116. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/requirements.txt +0 -0
  117. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/setup.cfg +0 -0
  118. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/__main__.py +0 -0
  119. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/common.py +0 -0
  120. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/eval.py +0 -0
  121. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/info.py +0 -0
  122. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/main.py +0 -0
  123. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/score.py +0 -0
  124. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/util.py +0 -0
  125. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_cli/view.py +0 -0
  126. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_display/__init__.py +0 -0
  127. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_display/_display.py +0 -0
  128. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_display/logger.py +0 -0
  129. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_display/rich.py +0 -0
  130. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/constants.py +0 -0
  131. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/generate.py +0 -0
  132. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/images.py +0 -0
  133. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/log.py +0 -0
  134. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_eval/task/results.py +0 -0
  135. /inspect_ai-0.3.11/src/inspect_ai/_eval/types.py → /inspect_ai-0.13.3/src/inspect_ai/_eval/task/task.py +0 -0
  136. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/_async.py +0 -0
  137. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/appdirs.py +0 -0
  138. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/constants.py +0 -0
  139. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/datetime.py +0 -0
  140. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/dev.py +0 -0
  141. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/docstring.py +0 -0
  142. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/dotenv.py +0 -0
  143. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/error.py +0 -0
  144. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/file.py +0 -0
  145. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/git.py +0 -0
  146. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/http.py +0 -0
  147. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/images.py +0 -0
  148. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/json.py +0 -0
  149. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/notebook.py +0 -0
  150. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/path.py +0 -0
  151. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/pattern.py +0 -0
  152. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/platform.py +0 -0
  153. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/registry.py +0 -0
  154. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/retry.py +0 -0
  155. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/samples.py +0 -0
  156. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/text.py +0 -0
  157. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/url.py +0 -0
  158. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_util/version.py +0 -0
  159. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/schema.py +0 -0
  160. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/view.py +0 -0
  161. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/.gitignore +0 -0
  162. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/App.css +0 -0
  163. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/favicon.svg +0 -0
  164. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/index.html +0 -0
  165. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/bootstrap/css/bootstrap-icons.min.css +0 -0
  166. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/bootstrap/css/bootstrap.min.css +0 -0
  167. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/bootstrap/css/fonts/bootstrap-icons.woff +0 -0
  168. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/bootstrap/css/fonts/bootstrap-icons.woff2 +0 -0
  169. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/bootstrap/js/bootstrap.bundle.min.js +0 -0
  170. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/clipboard.min.js +0 -0
  171. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/json5.min.js +0 -0
  172. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/prism/prism-dark.css +0 -0
  173. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/prism/prism.min.css +0 -0
  174. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/prism/prism.min.js +0 -0
  175. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/purify.min.js +0 -0
  176. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/libs/showdown.min.js +0 -0
  177. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/log-schema.json +0 -0
  178. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/log.d.ts +0 -0
  179. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/hooks.js +0 -0
  180. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/htm/htm.mjs +0 -0
  181. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/htm/preact.js +0 -0
  182. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/htm/preact.mjs +0 -0
  183. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/preact-hooks.mjs +0 -0
  184. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/preact/preact.mjs +0 -0
  185. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/Constants.mjs +0 -0
  186. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/Register.mjs +0 -0
  187. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/api/api-browser.mjs +0 -0
  188. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/api/api-vscode.mjs +0 -0
  189. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/api/index.mjs +0 -0
  190. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -0
  191. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/AnsiDisplay.css +0 -0
  192. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/AnsiDisplay.mjs +0 -0
  193. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/AppErrorBoundary.mjs +0 -0
  194. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/Card.mjs +0 -0
  195. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/ChatView.mjs +0 -0
  196. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/CopyButton.mjs +0 -0
  197. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/Dialog.mjs +0 -0
  198. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/EmptyPanel.mjs +0 -0
  199. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/ErrorPanel.mjs +0 -0
  200. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/LabeledValue.mjs +0 -0
  201. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/LargeModal.mjs +0 -0
  202. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/LoadingScreen.mjs +0 -0
  203. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/MarkdownDiv.mjs +0 -0
  204. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/MessageContent.mjs +0 -0
  205. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/MetaDataView.mjs +0 -0
  206. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/MorePopOver.mjs +0 -0
  207. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/RenderedContent.mjs +0 -0
  208. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/TabSet.mjs +0 -0
  209. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/ToolButton.mjs +0 -0
  210. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/VirtualList.mjs +0 -0
  211. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/components/ansi-output.js +0 -0
  212. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/log-reader/Log-Reader.mjs +0 -0
  213. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/log-reader/Native-Log-Reader.mjs +0 -0
  214. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/log-reader/Open-AI-Log-Reader.mjs +0 -0
  215. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/logging/LoggingPanel.mjs +0 -0
  216. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/navbar/Navbar.mjs +0 -0
  217. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/plan/PlanCard.mjs +0 -0
  218. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SampleDialog.mjs +0 -0
  219. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SampleDisplay.mjs +0 -0
  220. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SampleList.mjs +0 -0
  221. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SampleScoreView.mjs +0 -0
  222. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/SamplesTools.mjs +0 -0
  223. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/tools/EpochFilter.mjs +0 -0
  224. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +0 -0
  225. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +0 -0
  226. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/sidebar/Sidebar.mjs +0 -0
  227. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/title/TitleBlock.mjs +0 -0
  228. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/usage/ModelTokenTable.mjs +0 -0
  229. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/usage/UsageCard.mjs +0 -0
  230. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/Format.mjs +0 -0
  231. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/Git.mjs +0 -0
  232. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/Path.mjs +0 -0
  233. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/Type.mjs +0 -0
  234. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/events.mjs +0 -0
  235. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/utils/sleep.mjs +0 -0
  236. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/workspace/TaskErrorPanel.mjs +0 -0
  237. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/_view/www/src/workspace/WorkSpace.mjs +0 -0
  238. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/__init__.py +0 -0
  239. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_dataset.py +0 -0
  240. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_examples/bias_detection.jsonl +0 -0
  241. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_examples/biology_qa.jsonl +0 -0
  242. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_examples/popularity.jsonl +0 -0
  243. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_examples/security_guide.jsonl +0 -0
  244. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_examples/theory_of_mind.jsonl +0 -0
  245. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_sources/csv.py +0 -0
  246. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_sources/example.py +0 -0
  247. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_sources/file.py +0 -0
  248. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_sources/hf.py +0 -0
  249. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_sources/json.py +0 -0
  250. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/dataset/_util.py +0 -0
  251. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/log/__init__.py +0 -0
  252. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/log/_file.py +0 -0
  253. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/log/_log.py +0 -0
  254. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/__init__.py +0 -0
  255. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_model.py +0 -0
  256. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/anthropic.py +0 -0
  257. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/azureai.py +0 -0
  258. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/bedrock.py +0 -0
  259. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/cloudflare.py +0 -0
  260. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/google.py +0 -0
  261. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/hf.py +0 -0
  262. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/mistral.py +0 -0
  263. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/ollama.py +0 -0
  264. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/openai.py +0 -0
  265. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/providers.py +0 -0
  266. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/together.py +0 -0
  267. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_providers/util.py +0 -0
  268. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_registry.py +0 -0
  269. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_tool.py +0 -0
  270. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/model/_util.py +0 -0
  271. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/py.typed +0 -0
  272. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/__init__.py +0 -0
  273. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_answer.py +0 -0
  274. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_common.py +0 -0
  275. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_match.py +0 -0
  276. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_metric.py +0 -0
  277. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_metrics/__init__.py +0 -0
  278. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_metrics/accuracy.py +0 -0
  279. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_metrics/mean.py +0 -0
  280. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_metrics/std.py +0 -0
  281. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_model.py +0 -0
  282. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_multi.py +0 -0
  283. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_pattern.py +0 -0
  284. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/scorer/_scorer.py +0 -0
  285. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_tool/tool.py +0 -0
  286. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_tool/tool_def.py +0 -0
  287. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_tool/web_search.py +0 -0
  288. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/solver/_util.py +0 -0
  289. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/__init__.py +0 -0
  290. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/_context/__init__.py +0 -0
  291. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/_context/concurrency.py +0 -0
  292. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/_context/logger.py +0 -0
  293. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/_context/resource.py +0 -0
  294. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai/util/_context/subprocess.py +0 -0
  295. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai.egg-info/dependency_links.txt +0 -0
  296. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai.egg-info/entry_points.txt +0 -0
  297. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai.egg-info/requires.txt +0 -0
  298. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/src/inspect_ai.egg-info/top_level.txt +0 -0
  299. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/conftest.py +0 -0
  300. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/scorer/test_answer.py +0 -0
  301. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/scorer/test_pattern.py +0 -0
  302. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_anthropic.py +0 -0
  303. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_cloudflare.py +0 -0
  304. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_collapse_assistant_message.py +0 -0
  305. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_collapse_user_message.py +0 -0
  306. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_dataset/samples.csv +0 -0
  307. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_dataset/samples.json +0 -0
  308. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_dataset/samples.jsonl +0 -0
  309. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_dataset.py +0 -0
  310. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_eval_log/log_invalid.txt +0 -0
  311. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_eval_log/log_version_2.txt +0 -0
  312. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_eval_log/log_with_nan.txt +0 -0
  313. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_eval_log.py +0 -0
  314. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_examples.py +0 -0
  315. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_helpers/__init__.py +0 -0
  316. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_helpers/utils.py +0 -0
  317. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_hf.py +0 -0
  318. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_images/images.jsonl +0 -0
  319. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_images.py +0 -0
  320. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_list_task.py +0 -0
  321. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_logprobs.py +0 -0
  322. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_metric.py +0 -0
  323. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_model_package.py +0 -0
  324. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_num_choices.py +0 -0
  325. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_openai.py +0 -0
  326. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/.gitignore +0 -0
  327. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/inspect_package/__init__.py +0 -0
  328. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/inspect_package/inspect_ai.py +0 -0
  329. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/inspect_package/modelapi/custom.py +0 -0
  330. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/inspect_package/py.typed +0 -0
  331. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_package/pyproject.toml +0 -0
  332. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_plan.py +0 -0
  333. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_registry.py +0 -0
  334. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_retry.py +0 -0
  335. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_scorer.py +0 -0
  336. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_solver.py +0 -0
  337. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_stop_reason.py +0 -0
  338. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_subprocess.py +0 -0
  339. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/__init__.py +0 -0
  340. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/attribs.ipynb +0 -0
  341. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/multiple.py +0 -0
  342. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/multiple_dir/_decoy/testit.py +0 -0
  343. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/multiple_dir/_decoy2.py +0 -0
  344. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/multiple_dir/bar.py +0 -0
  345. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/multiple_dir/foo.py +0 -0
  346. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/.folder3/epsilon.py +0 -0
  347. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/folder1/_decoy.py +0 -0
  348. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/folder1/theta.py +0 -0
  349. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/folder2/.folder3/epsilon.py +0 -0
  350. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/folder2/another.py +0 -0
  351. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_task_list/recurse/folder2/first.py +0 -0
  352. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tests/test_tools.py +0 -0
  353. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.eslintrc.json +0 -0
  354. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.gitignore +0 -0
  355. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscode/extensions.json +0 -0
  356. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscode/launch.json +0 -0
  357. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscode/settings.json +0 -0
  358. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscode/tasks.json +0 -0
  359. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscode-test.mjs +0 -0
  360. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.vscodeignore +0 -0
  361. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/.yarnrc +0 -0
  362. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/CHANGELOG.md +0 -0
  363. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/LICENSE +0 -0
  364. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/README.md +0 -0
  365. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/logo/inspect.png +0 -0
  366. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/logo/inspect.svg +0 -0
  367. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/templates/task.py.template +0 -0
  368. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/www/codicon/codicon.css +0 -0
  369. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/www/codicon/codicon.ttf +0 -0
  370. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/assets/www/view/view-overrides.css +0 -0
  371. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/package.json +0 -0
  372. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/document.ts +0 -0
  373. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/error.ts +0 -0
  374. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/focus.ts +0 -0
  375. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/notebook.ts +0 -0
  376. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/symbol.ts +0 -0
  377. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/task.ts +0 -0
  378. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/templates.ts +0 -0
  379. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/components/webview.ts +0 -0
  380. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/appdirs.ts +0 -0
  381. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/command.ts +0 -0
  382. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/dispose.ts +0 -0
  383. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/env.ts +0 -0
  384. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/git.ts +0 -0
  385. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/jsonrpc.ts +0 -0
  386. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/log.ts +0 -0
  387. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/nonce.ts +0 -0
  388. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/path.ts +0 -0
  389. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/port.ts +0 -0
  390. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/process.ts +0 -0
  391. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/python/code.ts +0 -0
  392. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/python/exec.ts +0 -0
  393. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/python/index.ts +0 -0
  394. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/python/interpreter.ts +0 -0
  395. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/random.ts +0 -0
  396. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/string.ts +0 -0
  397. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/text.ts +0 -0
  398. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/wait.ts +0 -0
  399. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/core/workspace.ts +0 -0
  400. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/extension.ts +0 -0
  401. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/inspect/index.ts +0 -0
  402. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/inspect/list.ts +0 -0
  403. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/inspect/logs.ts +0 -0
  404. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/inspect/props.ts +0 -0
  405. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/inspect/version.ts +0 -0
  406. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/active-task/active-task-command.ts +0 -0
  407. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/active-task/active-task-provider.ts +0 -0
  408. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/activity-bar-provider.ts +0 -0
  409. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/env-config-provider.ts +0 -0
  410. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/task-config-commands.ts +0 -0
  411. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/task-config-provider.ts +0 -0
  412. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/task-outline-commands.ts +0 -0
  413. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/task-outline-provider.ts +0 -0
  414. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/env-config-webview.css +0 -0
  415. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/env-config-webview.ts +0 -0
  416. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/task-config-webview.css +0 -0
  417. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/task-config-webview.ts +0 -0
  418. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/vscode-controls.css +0 -0
  419. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/activity-bar/webview/webview-utils.ts +0 -0
  420. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/codelens/codelens-provider.ts +0 -0
  421. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/inspect/inspect-constants.ts +0 -0
  422. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/inspect/inspect-eval-commands.ts +0 -0
  423. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/inspect/inspect-eval.ts +0 -0
  424. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/inspect/inspect-manager.ts +0 -0
  425. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/commands.ts +0 -0
  426. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/logview-file-watcher.ts +0 -0
  427. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/logview-link-provider.ts +0 -0
  428. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/logview-manager.ts +0 -0
  429. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/logview-webview.ts +0 -0
  430. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/logview/logview.ts +0 -0
  431. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/settings/inspect-settings.ts +0 -0
  432. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/settings/user-settings.ts +0 -0
  433. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/workspace/workspace-env-commands.ts +0 -0
  434. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/workspace/workspace-env-provider.ts +0 -0
  435. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/workspace/workspace-init.ts +0 -0
  436. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/workspace/workspace-state-provider.ts +0 -0
  437. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/providers/workspace/workspace-task-provider.ts +0 -0
  438. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/src/test/extension.test.ts +0 -0
  439. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/.gitignore +0 -0
  440. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/package.json +0 -0
  441. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/rollup.config.js +0 -0
  442. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/src/index.ts +0 -0
  443. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/src/jsonrpc.ts +0 -0
  444. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tools/ts-to-mjs/yarn.lock +0 -0
  445. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/tsconfig.json +0 -0
  446. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/webpack.config.js +0 -0
  447. {inspect_ai-0.3.11 → inspect_ai-0.13.3}/tools/vscode/yarn.lock +0 -0
@@ -1,5 +1,14 @@
1
1
  # Changelog
2
2
 
3
+ ## v0.3.13 (31 May 2024)
4
+
5
+ - Bugfix: Inspect view was not reliably updating when new evaluation logs were written.
6
+
7
+ ## v0.3.12 (31 May 2024)
8
+
9
+ - Bugfix: `results` was not defined when no scorer was provided resulting in an error being thrown. Fixed by setting `results = EvalResults()` when no scorer is provided.
10
+ - Bugfix: The viewer was not properly handling samples without scores.
11
+
3
12
  ## v0.3.11 (30 May 2024)
4
13
 
5
14
  - Update to non-beta version of Anthropic tool use (remove legacy xml tools implementation).
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: inspect_ai
3
- Version: 0.3.11
3
+ Version: 0.13.3
4
4
  Summary: Framework for large language model evaluations
5
5
  Author: UK AI Safety Institute
6
6
  License: MIT License
@@ -1,3 +1,7 @@
1
+ ---
2
+ tbl-colwidths: [20,25,45]
3
+ ---
4
+
1
5
  # Solvers {#sec-solvers}
2
6
 
3
7
  ## Overview
@@ -147,7 +151,7 @@ You will likely want to experiment with using a distinct `model` for generating
147
151
 
148
152
  ## Custom Solvers
149
153
 
150
- Let's take a look at the source code for a couple of the built in solvers as a jumping off point for implementing your own solvers. A solver is an implementation of the `Solver` protocol (a function that transforms a `TaskState`):
154
+ In this section we'll take a look at the source code for a couple of the built in solvers as a jumping off point for implementing your own solvers. A solver is an implementation of the `Solver` protocol (a function that transforms a `TaskState`):
151
155
 
152
156
  ``` python
153
157
  async def solve(state: TaskState, generate: Generate) -> TaskState:
@@ -158,10 +162,52 @@ async def solve(state: TaskState, generate: Generate) -> TaskState:
158
162
 
159
163
  Typically solvers can be customised with parameters (e.g. `template` for prompt engineering solvers). This means that a `Solver` is actually a function which returns the `solve()` function referenced above (this will become more clear in the examples below).
160
164
 
165
+ ### Task States
166
+
167
+ Before presenting the examples we'll take a more in-depth look at the `TaskState` class. Task states consist of both lower level data members (e.g. `messages`, `output`) as well as a number of convenience properties. The core members of `TaskState` that are *modified* by solvers are `messages` / `user_prompt` and `output`:
168
+
169
+ | Member | Type | Description |
170
+ |-----------|-----------|---------------------------------------------------|
171
+ | `messages` | list\[ChatMessage\] | Chat conversation history for sample. It is automatically appended to by the `generate()` solver, and is often manipulated by other solvers (e.g. for prompt engineering or elicitation). |
172
+ | `user_prompt` | ChatMessageUser | Convenience property for accessing the first user message in the message history (commonly used for prompt engineering). |
173
+ | `output` | ModelOutput | The 'final' model output once we've completed all solving. This field is automatically updated with the last "assistant" message by the `generate()` solver. |
174
+
161
175
  ::: {.callout-note appearance="simple"}
162
- When creating custom solvers, it's critical that you understand Inspect's concurrency model. More specifically, if your solver is doing non-trivial work (e.g. calling REST APIs, executing external processes, etc.) please review [Eval Tuning](#sec-eval-tuning) before proceeding.
176
+ Note that the `generate()` solver automatically updates both the `messages` and `output` fields. For very simple evaluations modifying the `user_prompt` and then calling `generate()` encompasses all of the required interaction with `TaskState`.
163
177
  :::
164
178
 
179
+ There are two additional fields that solvers might modify (but they are typically for more advanced use cases):
180
+
181
+ | Member | Type | Description |
182
+ |---------|---------|------------------------------------------------------|
183
+ | `metadata` | dict | Original metadata from `Sample`, as well as any other custom metadata that solvers choose to write (typically used to coordinate between solvers and/or for custom logging). |
184
+ | `completed` | bool | Solvers can set `completed = True` to cause the task to exit the plan immediately. |
185
+
186
+ Sometimes its import to have access to the *original* prompt input for the task (as other solvers may have re-written or even removed it entirely). This is available using the `input` and `input_text` properties:
187
+
188
+ | Member | Type | Description |
189
+ |---------------|---------------|-------------------------------------------|
190
+ | `input` | str \| list\[ChatMessage\] | Original `Sample` input. |
191
+ | `input_text` | str | Convenience function for accessing the initial input from the `Sample` as a string. |
192
+
193
+ There are several other fields used to provide contextual data from either the task sample or evaluation:
194
+
195
+ | Member | Type | Description |
196
+ |---------------|---------------|------------------------------------------|
197
+ | `sample_id` | int \| str | Unique ID for sample. |
198
+ | `epoch` | int | Epoch for sample. |
199
+ | `choices` | list\[str\] \| None | Choices from sample (used only in multiple-choice evals). |
200
+ | `model` | ModelName | Name of model currently being evaluated. |
201
+
202
+ Finally, task states also include available tools as well as guidance for the model on which tools to use (if you haven't yet encountered the concept of tool use in language models, don't worry about understanding these fields, the [Tools](tools.qmd) article provides a more in-depth treatment):
203
+
204
+ | Member | Type | Description |
205
+ |---------------|--------------|------------------------------|
206
+ | `tools` | list\[Tool\] | Tools available to the model |
207
+ | `tool_choice` | ToolChoice | Tool choice directive. |
208
+
209
+ These fields are typically modified via the `use_tools()` solver, but they can also be modified directly for more advanced use cases.
210
+
165
211
  ### Example: Prompt Template
166
212
 
167
213
  Here's the code for the `prompt_template()` solver:
@@ -188,8 +234,11 @@ def prompt_template(template: str, **params: dict[str, Any]):
188
234
  A few things to note about this implementation:
189
235
 
190
236
  1. The function applies the `@solver` decorator—this registers the `Solver` with Inspect, making it possible to capture its name and parameters for logging, as well as make it callable from a configuration file (e.g. a YAML specification of an eval).
237
+
191
238
  2. The `solve()` function is declared as `async`. This is so that it can participate in Inspect's optimised scheduling for expensive model generation calls (this solver doesn't call `generate()` but others will).
239
+
192
240
  3. The `resource()` function is used to read the specified `template`. This function accepts a string, file, or URL as its argument, and then returns a string with the contents of the resource.
241
+
193
242
  4. We make use of the `user_prompt` property on the `TaskState`. This is a convenience property for locating the first `role="user"` message (otherwise you might need to skip over system messages, etc). Since this is a string templating solver, we use the `state.user_prompt.text` property (so we are dealing with prompt as a string, recall that it can also be a list of messages).
194
243
 
195
244
  ### Example: Self Critique
@@ -278,6 +327,10 @@ def self_critique(
278
327
 
279
328
  Note that calls to `generate()` (for both the critique model and the model being evaluated) are called with `await`—this is critical to ensure that the solver participates correctly in the scheduling of generation work.
280
329
 
330
+ ### Concurrency
331
+
332
+ When creating custom solvers, it's critical that you understand Inspect's concurrency model. More specifically, if your solver is doing non-trivial work (e.g. calling REST APIs, executing external processes, etc.) please review [Eval Tuning](#sec-eval-tuning) for a more in depth discussion.
333
+
281
334
  ## Early Termination
282
335
 
283
336
  In some cases a solver has the context available to request an early termination of the plan (i.e. don't call the rest of the solvers). In this case, setting the `TaskState.completed` field will result in forgoing remaining solvers in the plan. For example, here's a simple solver that terminates the plan early:
@@ -318,7 +371,7 @@ In this example the `finish_up()` solver will always be called even if the plan
318
371
 
319
372
  If your solvers allocate resources (for example, run a Docker container or mount a drive), you will want to make sure that these resources are cleaned up even in the case of an error occurring during the evaluation. To arrange for this, use a `Plan` object with a `cleanup` function:
320
373
 
321
- ```python
374
+ ``` python
322
375
 
323
376
  async def cleanup(state):
324
377
  # cleanup resources
@@ -47,8 +47,7 @@ from inspect_ai.solver import Generate, Solver, TaskState
47
47
  class LangChainAgent(Protocol):
48
48
  async def __call__(
49
49
  self, llm: BaseChatModel, input: dict[str, Any]
50
- ) -> str | list[str | dict[str, Any]]:
51
- ...
50
+ ) -> str | list[str | dict[str, Any]]: ...
52
51
 
53
52
 
54
53
  def langchain_solver(agent: LangChainAgent) -> Solver:
@@ -6,7 +6,7 @@ from inspect_ai._eval.eval import eval, eval_async, eval_retry, eval_retry_async
6
6
  from inspect_ai._eval.list import list_tasks
7
7
  from inspect_ai._eval.registry import task
8
8
  from inspect_ai._eval.score import score, score_async
9
- from inspect_ai._eval.types import Task, TaskInfo, Tasks
9
+ from inspect_ai._eval.task import Task, TaskInfo, Tasks
10
10
  from inspect_ai._util.constants import PKG_NAME
11
11
 
12
12
  __version__ = importlib_version(PKG_NAME)
@@ -11,7 +11,7 @@ from typing_extensions import Unpack
11
11
  from inspect_ai._cli.common import CommonOptions, common_options, resolve_common_options
12
12
  from inspect_ai._cli.util import parse_cli_args
13
13
  from inspect_ai._eval.list import list_tasks
14
- from inspect_ai._eval.types import TaskInfo
14
+ from inspect_ai._eval.task import TaskInfo
15
15
  from inspect_ai.log import list_eval_logs
16
16
 
17
17
 
@@ -26,10 +26,10 @@ from inspect_ai.solver import Solver
26
26
  from inspect_ai.util._context import init_async_context
27
27
 
28
28
  from .loader import resolve_tasks
29
+ from .task import Tasks, TaskSpec
29
30
  from .task.log import TaskLogger
30
31
  from .task.run import task_run
31
32
  from .task.util import task_file, task_run_dir
32
- from .types import Tasks, TaskSpec
33
33
 
34
34
  log = logging.getLogger(__name__)
35
35
 
@@ -8,7 +8,7 @@ from typing import Any, Callable
8
8
  from inspect_ai._util.error import exception_message
9
9
  from inspect_ai._util.file import file
10
10
 
11
- from .types import TaskInfo
11
+ from .task import TaskInfo
12
12
 
13
13
  logger = getLogger(__name__)
14
14
 
@@ -18,8 +18,8 @@ from inspect_ai.model import Model, ModelName
18
18
 
19
19
  from .list import task_files
20
20
  from .registry import task_create
21
+ from .task import Task, TaskInfo, Tasks
21
22
  from .task.constants import TASK_FILE_ATTR, TASK_RUN_DIR_ATTR
22
- from .types import Task, TaskInfo, Tasks
23
23
 
24
24
 
25
25
  def resolve_tasks(
@@ -14,7 +14,7 @@ from inspect_ai._util.registry import (
14
14
  )
15
15
  from inspect_ai.model import ModelName
16
16
 
17
- from .types import Task
17
+ from .task import Task
18
18
 
19
19
  MODEL_PARAM = "model"
20
20
 
@@ -17,9 +17,9 @@ from inspect_ai.model import ModelName
17
17
  from inspect_ai.scorer import Metric, Score, Scorer, Target
18
18
  from inspect_ai.solver import TaskState
19
19
 
20
+ from .task import Task
20
21
  from .task.results import eval_results
21
22
  from .task.util import task_run_dir
22
- from .types import Task
23
23
 
24
24
 
25
25
  def score(log: EvalLog, scorer: Scorer) -> EvalLog:
@@ -0,0 +1,3 @@
1
+ from .task import Task, TaskInfo, TaskSpec, Tasks # noqa: I001, F401
2
+
3
+ __all__ = ["Task", "TaskInfo", "TaskSpec", "Tasks"]
@@ -23,6 +23,7 @@ from inspect_ai.log import (
23
23
  EvalConfig,
24
24
  EvalError,
25
25
  EvalLog,
26
+ EvalResults,
26
27
  EvalStats,
27
28
  )
28
29
  from inspect_ai.log._log import eval_error
@@ -34,7 +35,7 @@ from inspect_ai.model import (
34
35
  from inspect_ai.scorer import Score, Scorer, Target
35
36
  from inspect_ai.solver import Generate, Plan, Solver, TaskState
36
37
 
37
- from ..types import Task
38
+ from ..task import Task
38
39
  from .generate import task_generate
39
40
  from .images import samples_with_base64_images, states_with_base64_images
40
41
  from .log import TaskLogger, collect_eval_data, log_output, log_plan
@@ -101,7 +102,9 @@ async def task_run(
101
102
  plan = (
102
103
  plan
103
104
  if isinstance(plan, Plan)
104
- else Plan(plan) if plan is not None else task.plan
105
+ else Plan(plan)
106
+ if plan is not None
107
+ else task.plan
105
108
  )
106
109
  score = score and task.scorer is not None
107
110
  scorer: Scorer | None = task.scorer if (score and task.scorer) else None
@@ -132,7 +135,6 @@ async def task_run(
132
135
  len(plan.steps) + (1 if plan.finish else 0) + (1) # scorer
133
136
  )
134
137
  with td.progress(total=total_steps) as p:
135
-
136
138
  # forward progress
137
139
  def progress() -> None:
138
140
  p.update(1)
@@ -195,6 +197,8 @@ async def task_run(
195
197
  metrics=task.metrics,
196
198
  )
197
199
  logger.log_results(results)
200
+ else:
201
+ results = EvalResults()
198
202
 
199
203
  # collect eval data
200
204
  collect_eval_data(stats, logger)
@@ -295,7 +299,6 @@ async def resolve_dataset(
295
299
  epochs: int,
296
300
  log_images: bool,
297
301
  ) -> tuple[Dataset, list[Sample], list[TaskState]]:
298
-
299
302
  # apply limit to dataset
300
303
  dataset_limit = (
301
304
  slice(0, len(dataset))
@@ -7,7 +7,7 @@ from inspect_ai.dataset import Sample
7
7
  from inspect_ai.model import ChatMessage, ChatMessageUser
8
8
  from inspect_ai.solver import TaskState
9
9
 
10
- from ..types import Task
10
+ from ..task import Task
11
11
  from .constants import TASK_FILE_ATTR, TASK_RUN_DIR_ATTR
12
12
 
13
13
 
@@ -20,6 +20,7 @@ import { WorkSpace } from "./src/workspace/WorkSpace.mjs";
20
20
 
21
21
  export function App() {
22
22
  const [selected, setSelected] = useState(-1);
23
+ const [pendingLog, setPendingLog] = useState(undefined);
23
24
  const [logs, setLogs] = useState({ log_dir: "", files: [] });
24
25
  const [logHeaders, setLogHeaders] = useState({});
25
26
  const [offcanvas, setOffcanvas] = useState(false);
@@ -126,24 +127,32 @@ export function App() {
126
127
  // Ensure that we have a selected index when there is are
127
128
  // new logs
128
129
  useEffect(() => {
129
- setSelected(0);
130
- }, [logs])
130
+ if (logs && pendingLog) {
131
+ const index = logs.files.findIndex((val) => {
132
+ return pendingLog.endsWith(val.name);
133
+ });
134
+ if (index > -1) {
135
+ setSelected(index);
136
+ }
137
+ setPendingLog(undefined);
138
+ }
139
+ }, [logs, pendingLog])
131
140
 
132
141
  // listen for updateState messages from vscode
133
142
  useEffect(() => {
134
- const onMessage = (e) => {
143
+ const onMessage = async (e) => {
135
144
  switch (e.data.type || e.data.message) {
136
145
  case "updateState": {
137
146
  if (e.data.url) {
138
-
139
147
  const index = logs.files.findIndex((val) => {
140
- return val.name.endsWith(e.data.url);
148
+ return e.data.url.endsWith(val.name);
141
149
  });
142
150
  if (index > -1) {
143
151
  // Select the correct index
144
152
  setSelected(index);
145
153
  } else {
146
- // TODO: Error
154
+ await loadLogs();
155
+ setPendingLog(e.data.url);
147
156
  }
148
157
  }
149
158
  }
@@ -153,7 +162,7 @@ export function App() {
153
162
  return () => {
154
163
  window.removeEventListener("message", onMessage);
155
164
  };
156
- }, [setCurrentLog]);
165
+ }, [logs, setCurrentLog, setPendingLog]);
157
166
 
158
167
  useEffect(async () => {
159
168
  // See whether a specific task_file has been passed.
@@ -176,6 +185,9 @@ export function App() {
176
185
  // initial fetch of logs
177
186
  await load();
178
187
 
188
+ // Select the first log
189
+ setSelected(0);
190
+
179
191
  // poll every 1s for events
180
192
  setInterval(() => {
181
193
  api.client_events().then((events) => {
@@ -24,6 +24,7 @@ export const samplesDescriptor = (samples, epochs, context) => {
24
24
  const uniqScoreValues = [
25
25
  ...new Set(
26
26
  samples
27
+ .filter(sample => !!sample.score)
27
28
  .map((sample) => sample.score.value)
28
29
  .filter((value) => {
29
30
  return value !== null;
@@ -92,7 +92,7 @@ export const SamplesTab = (props) => {
92
92
  // Focus the sample list
93
93
  useEffect(() => {
94
94
  const listEl = sampleListRef.current;
95
- if (listEl) {
95
+ if (listEl && listEl.base) {
96
96
  listEl.base.focus();
97
97
  }
98
98
  }, [items]);
@@ -6,7 +6,8 @@ from ._prompt import (
6
6
  prompt_template,
7
7
  system_message,
8
8
  )
9
- from ._solver import Generate, Solver, TaskState, generate, solver
9
+ from ._solver import Generate, Solver, generate, solver
10
+ from ._task_state import TaskState
10
11
  from ._tool.tool import Tool, tool
11
12
  from ._tool.use_tools import use_tools
12
13
  from ._tool.web_search import web_search
@@ -5,7 +5,8 @@ from inspect_ai.model import (
5
5
  )
6
6
  from inspect_ai.util import resource
7
7
 
8
- from ._solver import Generate, Solver, TaskState, solver
8
+ from ._solver import Generate, Solver, solver
9
+ from ._task_state import TaskState
9
10
 
10
11
 
11
12
  @solver
@@ -4,7 +4,8 @@ from random import Random
4
4
 
5
5
  from inspect_ai.util import resource
6
6
 
7
- from ._solver import Generate, Solver, TaskState, solver
7
+ from ._solver import Generate, Solver, solver
8
+ from ._task_state import TaskState
8
9
 
9
10
  logger = logging.getLogger(__name__)
10
11
 
@@ -11,7 +11,8 @@ from inspect_ai._util.registry import (
11
11
  registry_tag,
12
12
  )
13
13
 
14
- from ._solver import Solver, TaskState
14
+ from ._solver import Solver
15
+ from ._task_state import TaskState
15
16
 
16
17
 
17
18
  class Plan:
@@ -3,7 +3,8 @@ from typing import Any
3
3
  from inspect_ai.model import ChatMessageSystem
4
4
  from inspect_ai.util import resource
5
5
 
6
- from ._solver import Generate, Solver, TaskState, solver
6
+ from ._solver import Generate, Solver, solver
7
+ from ._task_state import TaskState
7
8
  from ._util import append_system_message
8
9
 
9
10
 
@@ -18,110 +18,9 @@ from inspect_ai._util.registry import (
18
18
  registry_name,
19
19
  registry_tag,
20
20
  )
21
- from inspect_ai.model import (
22
- ChatMessage,
23
- ChatMessageUser,
24
- GenerateConfigArgs,
25
- ModelName,
26
- ModelOutput,
27
- ToolChoice,
28
- )
29
-
30
- from ._tool.tool import Tool
31
-
21
+ from inspect_ai.model import GenerateConfigArgs
32
22
 
33
- class TaskState:
34
- def __init__(
35
- self,
36
- model: ModelName,
37
- sample_id: int | str,
38
- epoch: int,
39
- input: str | list[ChatMessage],
40
- choices: list[str] | None,
41
- messages: list[ChatMessage],
42
- tools: list[Tool] = [],
43
- tool_choice: ToolChoice | None = None,
44
- output: ModelOutput | None = None,
45
- completed: bool = False,
46
- metadata: dict[str, Any] = {},
47
- ) -> None:
48
- self._model = model
49
-
50
- self.sample_id = sample_id
51
- """Unique id for sample."""
52
-
53
- self.epoch = epoch
54
- """Epoch number for sample."""
55
-
56
- self._input = input
57
-
58
- self.choices = choices
59
- """Sample choices."""
60
-
61
- self.messages = messages
62
- """Chat conversation history for sample."""
63
-
64
- self.tools = tools
65
- """Tools available to the model."""
66
-
67
- self.tool_choice = tool_choice
68
- """Tool choice directive."""
69
-
70
- self.output = output if output else ModelOutput(model=str(model), choices=[])
71
- """Model output."""
72
-
73
- self.completed = completed
74
- """Flag to indicate that the solver loop should terminate."""
75
-
76
- self.metadata = metadata
77
- """Additional task state metadata."""
78
-
79
- @property
80
- def model(self) -> ModelName:
81
- """Name of model being evaluated."""
82
- return self._model
83
-
84
- @property
85
- def input(self) -> str | list[ChatMessage]:
86
- """Sample input."""
87
- return self._input
88
-
89
- @property
90
- def input_text(self) -> str:
91
- """Sample input as text."""
92
- if isinstance(self._input, str):
93
- return self._input
94
- else:
95
- input = next(
96
- (message.text for message in self._input if message.role == "user"),
97
- None,
98
- )
99
- if input:
100
- return input
101
- else:
102
- raise ValueError(
103
- "input_text requested from TaskState but none available"
104
- )
105
-
106
- @property
107
- def user_prompt(self) -> ChatMessageUser:
108
- """User prompt for this state.
109
-
110
- Tasks are very general and can have may types of inputs.
111
- However, in many cases solvers assume they can interact with
112
- the state as a "chat" in a predictable fashion (e.g. prompt
113
- engineering solvers). This property enables easy read and
114
- write access to the user chat prompt. Raises an
115
- exception if there is no user prompt
116
-
117
- Returns:
118
- First user `ChatMessage` in the task state.
119
- """
120
- prompt = next((m for m in self.messages if m.role == "user"), None)
121
- if prompt:
122
- return prompt
123
- else:
124
- raise ValueError("user_prompt requested from TaskState but none available")
23
+ from ._task_state import TaskState
125
24
 
126
25
 
127
26
  @runtime_checkable