PyPI - inspect-ai - Versions diffs - 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl - Mend

inspect-ai 0.3.58py3-none-any.whl → 0.3.60py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

inspect_ai/_cli/common.py +3 -1
inspect_ai/_cli/eval.py +15 -9
inspect_ai/_display/core/active.py +4 -1
inspect_ai/_display/core/config.py +3 -3
inspect_ai/_display/core/panel.py +7 -3
inspect_ai/_display/plain/__init__.py +0 -0
inspect_ai/_display/plain/display.py +203 -0
inspect_ai/_display/rich/display.py +0 -5
inspect_ai/_display/textual/widgets/port_mappings.py +110 -0
inspect_ai/_display/textual/widgets/samples.py +79 -12
inspect_ai/_display/textual/widgets/sandbox.py +37 -0
inspect_ai/_eval/eval.py +10 -1
inspect_ai/_eval/loader.py +79 -19
inspect_ai/_eval/registry.py +6 -0
inspect_ai/_eval/score.py +3 -1
inspect_ai/_eval/task/results.py +51 -22
inspect_ai/_eval/task/run.py +47 -13
inspect_ai/_eval/task/sandbox.py +10 -5
inspect_ai/_util/constants.py +1 -0
inspect_ai/_util/port_names.py +61 -0
inspect_ai/_util/text.py +23 -0
inspect_ai/_view/www/App.css +31 -1
inspect_ai/_view/www/dist/assets/index.css +31 -1
inspect_ai/_view/www/dist/assets/index.js +25498 -2044
inspect_ai/_view/www/log-schema.json +32 -2
inspect_ai/_view/www/package.json +2 -0
inspect_ai/_view/www/src/App.mjs +14 -16
inspect_ai/_view/www/src/Types.mjs +1 -2
inspect_ai/_view/www/src/api/Types.ts +133 -0
inspect_ai/_view/www/src/api/{api-browser.mjs → api-browser.ts} +25 -13
inspect_ai/_view/www/src/api/api-http.ts +219 -0
inspect_ai/_view/www/src/api/api-shared.ts +47 -0
inspect_ai/_view/www/src/api/{api-vscode.mjs → api-vscode.ts} +22 -19
inspect_ai/_view/www/src/api/{client-api.mjs → client-api.ts} +93 -53
inspect_ai/_view/www/src/api/index.ts +51 -0
inspect_ai/_view/www/src/api/jsonrpc.ts +225 -0
inspect_ai/_view/www/src/components/ChatView.mjs +133 -43
inspect_ai/_view/www/src/components/DownloadButton.mjs +1 -1
inspect_ai/_view/www/src/components/ExpandablePanel.mjs +0 -4
inspect_ai/_view/www/src/components/LargeModal.mjs +19 -20
inspect_ai/_view/www/src/components/TabSet.mjs +3 -1
inspect_ai/_view/www/src/components/VirtualList.mjs +266 -84
inspect_ai/_view/www/src/index.js +77 -4
inspect_ai/_view/www/src/log/{remoteLogFile.mjs → remoteLogFile.ts} +62 -46
inspect_ai/_view/www/src/navbar/Navbar.mjs +4 -1
inspect_ai/_view/www/src/navbar/SecondaryBar.mjs +19 -10
inspect_ai/_view/www/src/samples/SampleDialog.mjs +5 -1
inspect_ai/_view/www/src/samples/SampleDisplay.mjs +23 -15
inspect_ai/_view/www/src/samples/SampleList.mjs +19 -49
inspect_ai/_view/www/src/samples/SampleScores.mjs +1 -1
inspect_ai/_view/www/src/samples/SampleTranscript.mjs +8 -3
inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +38 -26
inspect_ai/_view/www/src/samples/SamplesTab.mjs +14 -11
inspect_ai/_view/www/src/samples/SamplesTools.mjs +8 -8
inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +712 -89
inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +2 -2
inspect_ai/_view/www/src/samples/tools/filters.mjs +260 -87
inspect_ai/_view/www/src/samples/transcript/ErrorEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/EventPanel.mjs +29 -24
inspect_ai/_view/www/src/samples/transcript/EventRow.mjs +1 -1
inspect_ai/_view/www/src/samples/transcript/InfoEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/InputEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +31 -10
inspect_ai/_view/www/src/samples/transcript/SampleInitEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/SampleLimitEventView.mjs +23 -2
inspect_ai/_view/www/src/samples/transcript/ScoreEventView.mjs +24 -2
inspect_ai/_view/www/src/samples/transcript/StepEventView.mjs +33 -3
inspect_ai/_view/www/src/samples/transcript/SubtaskEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +25 -2
inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +193 -11
inspect_ai/_view/www/src/samples/transcript/Types.mjs +10 -0
inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +26 -2
inspect_ai/_view/www/src/types/log.d.ts +13 -2
inspect_ai/_view/www/src/utils/Format.mjs +10 -3
inspect_ai/_view/www/src/utils/{Json.mjs → json-worker.ts} +13 -9
inspect_ai/_view/www/src/utils/vscode.ts +36 -0
inspect_ai/_view/www/src/workspace/WorkSpace.mjs +11 -5
inspect_ai/_view/www/vite.config.js +7 -0
inspect_ai/_view/www/yarn.lock +116 -0
inspect_ai/approval/_human/__init__.py +0 -0
inspect_ai/approval/_human/manager.py +1 -1
inspect_ai/approval/_policy.py +12 -6
inspect_ai/log/_log.py +1 -1
inspect_ai/log/_samples.py +16 -0
inspect_ai/log/_transcript.py +4 -1
inspect_ai/model/_call_tools.py +59 -0
inspect_ai/model/_conversation.py +16 -7
inspect_ai/model/_generate_config.py +12 -12
inspect_ai/model/_model.py +117 -18
inspect_ai/model/_model_output.py +22 -2
inspect_ai/model/_openai.py +383 -0
inspect_ai/model/_providers/anthropic.py +152 -55
inspect_ai/model/_providers/azureai.py +21 -21
inspect_ai/model/_providers/bedrock.py +37 -40
inspect_ai/model/_providers/goodfire.py +248 -0
inspect_ai/model/_providers/google.py +46 -54
inspect_ai/model/_providers/groq.py +7 -3
inspect_ai/model/_providers/hf.py +6 -0
inspect_ai/model/_providers/mistral.py +13 -12
inspect_ai/model/_providers/openai.py +51 -218
inspect_ai/model/_providers/openai_o1.py +11 -12
inspect_ai/model/_providers/providers.py +23 -1
inspect_ai/model/_providers/together.py +12 -12
inspect_ai/model/_providers/util/__init__.py +2 -3
inspect_ai/model/_providers/util/hf_handler.py +1 -1
inspect_ai/model/_providers/util/llama31.py +1 -1
inspect_ai/model/_providers/util/util.py +0 -76
inspect_ai/model/_providers/vertex.py +1 -4
inspect_ai/scorer/_metric.py +3 -0
inspect_ai/scorer/_reducer/reducer.py +1 -1
inspect_ai/scorer/_scorer.py +4 -3
inspect_ai/solver/__init__.py +4 -5
inspect_ai/solver/_basic_agent.py +1 -1
inspect_ai/solver/_bridge/__init__.py +3 -0
inspect_ai/solver/_bridge/bridge.py +100 -0
inspect_ai/solver/_bridge/patch.py +170 -0
inspect_ai/solver/_prompt.py +35 -5
inspect_ai/solver/_solver.py +6 -0
inspect_ai/solver/_task_state.py +80 -38
inspect_ai/tool/__init__.py +2 -0
inspect_ai/tool/_tool.py +12 -1
inspect_ai/tool/_tool_call.py +10 -0
inspect_ai/tool/_tool_def.py +16 -5
inspect_ai/tool/_tool_with.py +21 -4
inspect_ai/tool/beta/__init__.py +5 -0
inspect_ai/tool/beta/_computer/__init__.py +3 -0
inspect_ai/tool/beta/_computer/_common.py +133 -0
inspect_ai/tool/beta/_computer/_computer.py +155 -0
inspect_ai/tool/beta/_computer/_computer_split.py +198 -0
inspect_ai/tool/beta/_computer/_resources/Dockerfile +100 -0
inspect_ai/tool/beta/_computer/_resources/README.md +30 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/entrypoint.sh +18 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/novnc_startup.sh +20 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/x11vnc_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xfce_startup.sh +13 -0
inspect_ai/tool/beta/_computer/_resources/entrypoint/xvfb_startup.sh +48 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Firefox Web Browser.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/Visual Studio Code.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/image_home_dir/Desktop/XPaint.desktop +10 -0
inspect_ai/tool/beta/_computer/_resources/tool/__init__.py +0 -0
inspect_ai/tool/beta/_computer/_resources/tool/_logger.py +22 -0
inspect_ai/tool/beta/_computer/_resources/tool/_run.py +42 -0
inspect_ai/tool/beta/_computer/_resources/tool/_tool_result.py +33 -0
inspect_ai/tool/beta/_computer/_resources/tool/_x11_client.py +262 -0
inspect_ai/tool/beta/_computer/_resources/tool/computer_tool.py +85 -0
inspect_ai/tool/beta/_computer/_resources/tool/requirements.txt +0 -0
inspect_ai/util/__init__.py +2 -0
inspect_ai/util/_display.py +5 -0
inspect_ai/util/_limit.py +26 -0
inspect_ai/util/_sandbox/docker/docker.py +64 -1
inspect_ai/util/_sandbox/docker/internal.py +3 -1
inspect_ai/util/_sandbox/docker/prereqs.py +1 -1
inspect_ai/util/_sandbox/environment.py +14 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/METADATA +3 -2
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/RECORD +159 -126
inspect_ai/_view/www/src/api/Types.mjs +0 -117
inspect_ai/_view/www/src/api/api-http.mjs +0 -300
inspect_ai/_view/www/src/api/api-shared.mjs +0 -10
inspect_ai/_view/www/src/api/index.mjs +0 -49
inspect_ai/_view/www/src/api/jsonrpc.mjs +0 -208
inspect_ai/_view/www/src/samples/transcript/TranscriptState.mjs +0 -70
inspect_ai/_view/www/src/utils/vscode.mjs +0 -16
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/LICENSE +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/WHEEL +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/entry_points.txt +0 -0
{inspect_ai-0.3.58.dist-info → inspect_ai-0.3.60.dist-info}/top_level.txt +0 -0

inspect_ai/_view/www/yarn.lock CHANGED Viewed

@@ -169,6 +169,70 @@
     "@babel/helper-validator-identifier" "^7.24.7"
     to-fast-properties "^2.0.0"
+"@codemirror/autocomplete@^6.0.0":
+  version "6.18.3"
+  resolved "https://registry.yarnpkg.com/@codemirror/autocomplete/-/autocomplete-6.18.3.tgz#f9ea79a2f369662516f71bc0b2f819454d3c8e00"
+  integrity sha512-1dNIOmiM0z4BIBwxmxEfA1yoxh1MF/6KPBbh20a5vphGV0ictKlgQsbJs6D6SkR6iJpGbpwRsa6PFMNlg9T9pQ==
+  dependencies:
+    "@codemirror/language" "^6.0.0"
+    "@codemirror/state" "^6.0.0"
+    "@codemirror/view" "^6.17.0"
+    "@lezer/common" "^1.0.0"
+"@codemirror/commands@^6.0.0":
+  version "6.7.1"
+  resolved "https://registry.yarnpkg.com/@codemirror/commands/-/commands-6.7.1.tgz#04561e95bc0779eaa49efd63e916c4efb3bbf6d6"
+  integrity sha512-llTrboQYw5H4THfhN4U3qCnSZ1SOJ60ohhz+SzU0ADGtwlc533DtklQP0vSFaQuCPDn3BPpOd1GbbnUtwNjsrw==
+  dependencies:
+    "@codemirror/language" "^6.0.0"
+    "@codemirror/state" "^6.4.0"
+    "@codemirror/view" "^6.27.0"
+    "@lezer/common" "^1.1.0"
+"@codemirror/language@^6.0.0":
+  version "6.10.6"
+  resolved "https://registry.yarnpkg.com/@codemirror/language/-/language-6.10.6.tgz#3770aa55fce575b45b1037b390b576907f0061c7"
+  integrity sha512-KrsbdCnxEztLVbB5PycWXFxas4EOyk/fPAfruSOnDDppevQgid2XZ+KbJ9u+fDikP/e7MW7HPBTvTb8JlZK9vA==
+  dependencies:
+    "@codemirror/state" "^6.0.0"
+    "@codemirror/view" "^6.23.0"
+    "@lezer/common" "^1.1.0"
+    "@lezer/highlight" "^1.0.0"
+    "@lezer/lr" "^1.0.0"
+    style-mod "^4.0.0"
+"@codemirror/lint@^6.0.0":
+  version "6.8.4"
+  resolved "https://registry.yarnpkg.com/@codemirror/lint/-/lint-6.8.4.tgz#7d8aa5d1a6dec89ffcc23ad45ddca2e12e90982d"
+  integrity sha512-u4q7PnZlJUojeRe8FJa/njJcMctISGgPQ4PnWsd9268R4ZTtU+tfFYmwkBvgcrK2+QQ8tYFVALVb5fVJykKc5A==
+  dependencies:
+    "@codemirror/state" "^6.0.0"
+    "@codemirror/view" "^6.35.0"
+    crelt "^1.0.5"
+"@codemirror/search@^6.0.0":
+  version "6.5.8"
+  resolved "https://registry.yarnpkg.com/@codemirror/search/-/search-6.5.8.tgz#b59b3659b46184cc75d6108d7c050a4ca344c3a0"
+  integrity sha512-PoWtZvo7c1XFeZWmmyaOp2G0XVbOnm+fJzvghqGAktBW3cufwJUWvSCcNG0ppXiBEM05mZu6RhMtXPv2hpllig==
+  dependencies:
+    "@codemirror/state" "^6.0.0"
+    "@codemirror/view" "^6.0.0"
+    crelt "^1.0.5"
+"@codemirror/state@^6.0.0", "@codemirror/state@^6.4.0":
+  version "6.4.1"
+  resolved "https://registry.yarnpkg.com/@codemirror/state/-/state-6.4.1.tgz#da57143695c056d9a3c38705ed34136e2b68171b"
+  integrity sha512-QkEyUiLhsJoZkbumGZlswmAhA7CBU02Wrz7zvH4SrcifbsqwlXShVXg65f3v/ts57W3dqyamEriMhij1Z3Zz4A==
+"@codemirror/view@^6.0.0", "@codemirror/view@^6.17.0", "@codemirror/view@^6.23.0", "@codemirror/view@^6.27.0", "@codemirror/view@^6.35.0":
+  version "6.35.0"
+  resolved "https://registry.yarnpkg.com/@codemirror/view/-/view-6.35.0.tgz#890e8e31a58edf65cdf193049fe9f3fdec20cc82"
+  integrity sha512-I0tYy63q5XkaWsJ8QRv5h6ves7kvtrBWjBcnf/bzohFJQc5c14a1AQRdE8QpPF9eMp5Mq2FMm59TCj1gDfE7kw==
+  dependencies:
+    "@codemirror/state" "^6.4.0"
+    style-mod "^4.1.0"
+    w3c-keyname "^2.2.4"
 "@esbuild/aix-ppc64@0.21.5":
   version "0.21.5"
   resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.5.tgz#c7184a326533fcdf1b8ee0733e21c713b975575f"
@@ -372,6 +436,25 @@
     "@jridgewell/resolve-uri" "^3.1.0"
     "@jridgewell/sourcemap-codec" "^1.4.14"
+"@lezer/common@^1.0.0", "@lezer/common@^1.1.0":
+  version "1.2.3"
+  resolved "https://registry.yarnpkg.com/@lezer/common/-/common-1.2.3.tgz#138fcddab157d83da557554851017c6c1e5667fd"
+  integrity sha512-w7ojc8ejBqr2REPsWxJjrMFsA/ysDCFICn8zEOR9mrqzOu2amhITYuLD8ag6XZf0CFXDrhKqw7+tW8cX66NaDA==
+"@lezer/highlight@^1.0.0":
+  version "1.2.1"
+  resolved "https://registry.yarnpkg.com/@lezer/highlight/-/highlight-1.2.1.tgz#596fa8f9aeb58a608be0a563e960c373cbf23f8b"
+  integrity sha512-Z5duk4RN/3zuVO7Jq0pGLJ3qynpxUVsh7IbUbGj88+uV2ApSAn6kWg2au3iJb+0Zi7kKtqffIESgNcRXWZWmSA==
+  dependencies:
+    "@lezer/common" "^1.0.0"
+"@lezer/lr@^1.0.0":
+  version "1.4.2"
+  resolved "https://registry.yarnpkg.com/@lezer/lr/-/lr-1.4.2.tgz#931ea3dea8e9de84e90781001dae30dea9ff1727"
+  integrity sha512-pu0K1jCIdnQ12aWNaAVU5bzi7Bd1w54J3ECgANPmYLtQKP0HBj2cE/5coBD66MT10xbtIuUr7tg0Shbsvk0mDA==
+  dependencies:
+    "@lezer/common" "^1.0.0"
 "@nodelib/fs.scandir@2.1.5":
   version "2.1.5"
   resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5"
@@ -619,6 +702,19 @@ clipboard@^2.0.11:
     select "^1.1.2"
     tiny-emitter "^2.0.0"
+codemirror@^6.0.1:
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/codemirror/-/codemirror-6.0.1.tgz#62b91142d45904547ee3e0e0e4c1a79158035a29"
+  integrity sha512-J8j+nZ+CdWmIeFIGXEFbFPtpiYacFMDR8GlHK3IyHQJMCaVRfGx9NT+Hxivv1ckLWPvNdZqndbr/7lVhrf/Svg==
+  dependencies:
+    "@codemirror/autocomplete" "^6.0.0"
+    "@codemirror/commands" "^6.0.0"
+    "@codemirror/language" "^6.0.0"
+    "@codemirror/lint" "^6.0.0"
+    "@codemirror/search" "^6.0.0"
+    "@codemirror/state" "^6.0.0"
+    "@codemirror/view" "^6.0.0"
 color-convert@^1.9.0:
   version "1.9.3"
   resolved "https://registry.yarnpkg.com/color-convert/-/color-convert-1.9.3.tgz#bb71850690e1f136567de629d2d5471deda4c1e8"
@@ -653,6 +749,11 @@ convert-source-map@^2.0.0:
   resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-2.0.0.tgz#4b560f649fc4e918dd0ab75cf4961e8bc882d82a"
   integrity sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==
+crelt@^1.0.5:
+  version "1.0.6"
+  resolved "https://registry.yarnpkg.com/crelt/-/crelt-1.0.6.tgz#7cc898ea74e190fb6ef9dae57f8f81cf7302df72"
+  integrity sha512-VQ2MBenTq1fWZUH9DJNGti7kKv6EeAuYr3cLwxUWhIu1baTaXh4Ib5W2CqHVqib4/MqbYGJqiL3Zb8GJZr3l4g==
 cross-spawn@^7.0.2:
   version "7.0.3"
   resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6"
@@ -885,6 +986,11 @@ file-entry-cache@^8.0.0:
   dependencies:
     flat-cache "^4.0.0"
+filtrex@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/filtrex/-/filtrex-3.1.0.tgz#5ec00994615ff10e5e09c89bb290c855cb408c21"
+  integrity sha512-mHzZ2wUISETF1OaEcNRiGz1ljuIV8c/C9td9qyAZ+wTwigkAk5RO9YrCxQKk5H9v7joDRFIBik9U5RTK9eXZ/A==
 find-up@^5.0.0:
   version "5.0.0"
   resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc"
@@ -1367,6 +1473,11 @@ strip-json-comments@^3.1.1:
   resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
   integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==
+style-mod@^4.0.0, style-mod@^4.1.0:
+  version "4.1.2"
+  resolved "https://registry.yarnpkg.com/style-mod/-/style-mod-4.1.2.tgz#ca238a1ad4786520f7515a8539d5a63691d7bf67"
+  integrity sha512-wnD1HyVqpJUI2+eKZ+eo1UwghftP6yuFheBqqe+bWCotBjC2K1YnteJILRMs3SM4V/0dLEW1SC27MWP5y+mwmw==
 supports-color@^5.3.0:
   version "5.5.0"
   resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f"
@@ -1442,6 +1553,11 @@ vite@^5.3.2:
   optionalDependencies:
     fsevents "~2.3.3"
+w3c-keyname@^2.2.4:
+  version "2.2.8"
+  resolved "https://registry.yarnpkg.com/w3c-keyname/-/w3c-keyname-2.2.8.tgz#7b17c8c6883d4e8b86ac8aba79d39e880f8869c5"
+  integrity sha512-dpojBhNsCNN7T82Tm7k26A6G9ML3NkhDsnw9n/eoxSRlVBB4CEtIQ/KTCLI2Fwf3ataSXRhYFkQi3SlnFwPvPQ==
 which@^2.0.1:
   version "2.0.2"
   resolved "https://registry.yarnpkg.com/which/-/which-2.0.2.tgz#7c6a8dd0a636a0327e10b59c9286eee93f3f51b1"

inspect_ai/approval/_human/__init__.py ADDED Viewed

File without changes

inspect_ai/approval/_human/manager.py CHANGED Viewed

@@ -40,7 +40,7 @@ class HumanApprovalManager:
         future = cast(Future[Approval], asyncio.get_event_loop().create_future())
         sample = sample_active()
         assert sample
-        assert sample.sample.id
+        assert sample.sample.id is not None
         pending = PendingApprovalRequest(
             request=request,
             task=sample.task,

inspect_ai/approval/_policy.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import fnmatch
-import re
+import sys
 from dataclasses import dataclass
 from pathlib import Path
-from re import Pattern
 from typing import Any, Generator, cast
 from pydantic import BaseModel, Field, model_validator
 from inspect_ai._util.config import read_config_object
+from inspect_ai._util.format import format_function_call
 from inspect_ai._util.registry import registry_create, registry_lookup
 from inspect_ai.solver._task_state import TaskState
 from inspect_ai.tool._tool_call import ToolCall, ToolCallView
@@ -30,17 +30,23 @@ def policy_approver(policies: str | list[ApprovalPolicy]) -> Approver:
         policies = approval_policies_from_config(policies)
     # compile policy into approvers and regexes for matching
-    policy_matchers: list[tuple[list[Pattern[str]], Approver]] = []
+    policy_matchers: list[tuple[list[str], Approver]] = []
     for policy in policies:
         tools = [policy.tools] if isinstance(policy.tools, str) else policy.tools
-        patterns = [re.compile(fnmatch.translate(tool)) for tool in tools]
-        policy_matchers.append((patterns, policy.approver))
+        globs = [f"{tool}*" for tool in tools]
+        policy_matchers.append((globs, policy.approver))
     # generator for policies that match a tool_call
     def tool_approvers(tool_call: ToolCall) -> Generator[Approver, None, None]:
         for policy_matcher in iter(policy_matchers):
+            function_call = format_function_call(
+                tool_call.function, tool_call.arguments, width=sys.maxsize
+            )
             if any(
-                [pattern.match(tool_call.function) for pattern in policy_matcher[0]]
+                [
+                    fnmatch.fnmatch(function_call, pattern)
+                    for pattern in policy_matcher[0]
+                ]
             ):
                 yield policy_matcher[1]

inspect_ai/log/_log.py CHANGED Viewed

@@ -114,7 +114,7 @@ class EvalConfig(BaseModel):
 class EvalSampleLimit(BaseModel):
-    type: Literal["context", "time", "message", "token", "operator"]
+    type: Literal["context", "time", "message", "token", "operator", "custom"]
     """The type of limit"""
     limit: int

inspect_ai/log/_samples.py CHANGED Viewed

@@ -113,6 +113,14 @@ def sample_active() -> ActiveSample | None:
     return _sample_active.get(None)
+def active_sample_token_limit() -> int | None:
+    active = sample_active()
+    if active:
+        return active.token_limit
+    else:
+        return None
 def set_active_sample_token_limit(token_limit: int | None) -> None:
     active = sample_active()
     if active:
@@ -125,6 +133,14 @@ def set_active_sample_total_tokens(total_tokens: int) -> None:
         active.total_tokens = total_tokens
+def active_sample_message_limit() -> int | None:
+    active = sample_active()
+    if active:
+        return active.message_limit
+    else:
+        return None
 def set_active_sample_message_limit(message_limit: int | None) -> None:
     active = sample_active()
     if active:

inspect_ai/log/_transcript.py CHANGED Viewed

@@ -70,7 +70,7 @@ class SampleLimitEvent(BaseEvent):
     event: Literal["sample_limit"] = Field(default="sample_limit")
     """Event type."""
-    type: Literal["message", "time", "token", "operator"]
+    type: Literal["message", "time", "token", "operator", "custom"]
     """Type of limit that halted processing"""
     message: str
@@ -124,6 +124,9 @@ class ModelEvent(BaseEvent):
     output: ModelOutput
     """Output from model."""
+    error: str | None = Field(default=None)
+    """Error which occurred during model call."""
     cache: Literal["read", "write"] | None = Field(default=None)
     """Was this a cache read or write."""

inspect_ai/model/_call_tools.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import inspect
+import json
 import types
 from dataclasses import is_dataclass
 from logging import getLogger
@@ -21,6 +22,7 @@ from typing import (
     is_typeddict,
 )
+import yaml
 from jsonschema import Draft7Validator
 from pydantic import BaseModel
@@ -328,6 +330,10 @@ def tool_params(input: dict[str, Any], func: Callable[..., Any]) -> dict[str, An
     type_hints = get_type_hints(func)
     docstring = inspect.getdoc(func)
+    # if the function takes **kwargs: Any then just pass the tool arguments through
+    if "kwargs" in type_hints and type_hints["kwargs"] == Any:
+        return input
     # build params
     params: dict[str, Any] = {}
     for param_name, param in signature.parameters.items():
@@ -465,3 +471,56 @@ def truncate_tool_output(
         )
     else:
         return None
+def tool_parse_error_message(arguments: str, ex: Exception) -> str:
+    return f"Error parsing the following tool call arguments:\n\n{arguments}\n\nError details: {ex}"
+def parse_tool_call(
+    id: str, function: str, arguments: str, tools: list[ToolInfo] | None = None
+) -> ToolCall:
+    error: str | None = None
+    arguments_dict: dict[str, Any] = {}
+    def report_parse_error(ex: Exception) -> None:
+        nonlocal error
+        error = tool_parse_error_message(arguments, ex)
+        logger.info(error)
+    # if the arguments is a dict, then handle it with a plain json.loads
+    arguments = arguments.strip()
+    if arguments.startswith("{"):
+        try:
+            arguments_dict = json.loads(arguments)
+        except json.JSONDecodeError as ex:
+            report_parse_error(ex)
+    # otherwise parse it as yaml (which will pickup unquoted strings, numbers, and true/false)
+    # and then create a dict that maps it to the first function argument
+    elif function and tools:
+        tool_info = next(
+            (
+                tool
+                for tool in tools
+                if tool.name == function and len(tool.parameters.properties) > 0
+            ),
+            None,
+        )
+        if tool_info:
+            param_names = list(tool_info.parameters.properties.keys())
+            try:
+                value = yaml.safe_load(arguments)
+                arguments_dict[param_names[0]] = value
+            except yaml.error.YAMLError:
+                # If the yaml parser fails, we treat it as a string argument.
+                arguments_dict[param_names[0]] = arguments
+    # return ToolCall with error payload
+    return ToolCall(
+        id=id,
+        function=function,
+        arguments=arguments_dict,
+        type="function",
+        parse_error=error,
+    )

inspect_ai/model/_conversation.py CHANGED Viewed

@@ -15,13 +15,16 @@ MESSAGE_TITLE = "Message"
 def conversation_tool_mesage(message: ChatMessageTool) -> None:
     if display_type() == "conversation":
         # truncate output to 100 lines
-        output = message.error.message if message.error else message.text.strip()
-        content = lines_display(output, 100)
-        conversation_panel(
-            title=f"Tool Output: {message.function}",
-            content=content,
+        output = (
+            message.error.message.strip() if message.error else message.text.strip()
         )
+        if output:
+            content = lines_display(output, 100)
+            conversation_panel(
+                title=f"Tool Output: {message.function}",
+                content=content,
+            )
 def conversation_assistant_message(
@@ -42,8 +45,14 @@ def conversation_assistant_message(
         # print tool calls
         if message.tool_calls:
-            content.append(Text())
+            if content:
+                content.append(Text())
             content.extend(render_tool_calls(message.tool_calls))
         # print the assistant message
         conversation_panel(title="Assistant", content=content)
+def conversation_assistant_error(error: Exception) -> None:
+    if display_type() == "conversation":
+        conversation_panel(title="Assistant", content=repr(error))

inspect_ai/model/_generate_config.py CHANGED Viewed

@@ -34,7 +34,7 @@ class GenerateConfigArgs(TypedDict, total=False):
     """Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."""
     best_of: int | None
-    """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). OpenAI only."""
+    """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
     frequency_penalty: float | None
     """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
@@ -48,9 +48,6 @@ class GenerateConfigArgs(TypedDict, total=False):
     seed: int | None
     """Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
-    suffix: str | None
-    """The suffix that comes after a completion of inserted text. OpenAI only."""
     top_k: int | None
     """Randomly sample the next word from the top_k most likely next words. Anthropic, Google, and HuggingFace only."""
@@ -58,14 +55,17 @@ class GenerateConfigArgs(TypedDict, total=False):
     """How many chat completion choices to generate for each input message. OpenAI, Grok, Google, and TogetherAI only."""
     logprobs: bool | None
-    """Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
+    """Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
     top_logprobs: int | None
-    """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, and Huggingface only."""
+    """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, and Huggingface only."""
     parallel_tool_calls: bool | None
     """Whether to enable parallel function calling during tool use (defaults to True). OpenAI and Groq only."""
+    internal_tools: bool | None
+    """Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic)."""
     max_tool_output: int | None
     """Maximum tool output (in bytes). Defaults to 16 * 1024."""
@@ -104,7 +104,7 @@ class GenerateConfig(BaseModel):
     """Sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."""
     best_of: int | None = Field(default=None)
-    """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). OpenAI and vLLM only."""
+    """Generates best_of completions server-side and returns the 'best' (the one with the highest log probability per token). vLLM only."""
     frequency_penalty: float | None = Field(default=None)
     """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. OpenAI, Google, Grok, Groq, and vLLM only."""
@@ -118,9 +118,6 @@ class GenerateConfig(BaseModel):
     seed: int | None = Field(default=None)
     """Random seed. OpenAI, Google, Mistral, Groq, HuggingFace, and vLLM only."""
-    suffix: str | None = Field(default=None)
-    """The suffix that comes after a completion of inserted text. OpenAI only."""
     top_k: int | None = Field(default=None)
     """Randomly sample the next word from the top_k most likely next words. Anthropic, Google, HuggingFace, and vLLM only."""
@@ -128,14 +125,17 @@ class GenerateConfig(BaseModel):
     """How many chat completion choices to generate for each input message. OpenAI, Grok, Google, TogetherAI, and vLLM only."""
     logprobs: bool | None = Field(default=None)
-    """Return log probabilities of the output tokens. OpenAI, Google, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
+    """Return log probabilities of the output tokens. OpenAI, Grok, TogetherAI, Huggingface, llama-cpp-python, and vLLM only."""
     top_logprobs: int | None = Field(default=None)
-    """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Google, Grok, Huggingface, and vLLM only."""
+    """Number of most likely tokens (0-20) to return at each token position, each with an associated log probability. OpenAI, Grok, Huggingface, and vLLM only."""
     parallel_tool_calls: bool | None = Field(default=None)
     """Whether to enable parallel function calling during tool use (defaults to True). OpenAI and Groq only."""
+    internal_tools: bool | None = Field(default=None)
+    """Whether to automatically map tools to model internal implementations (e.g. 'computer' for anthropic)."""
     max_tool_output: int | None = Field(default=None)
     """Maximum tool output (in bytes). Defaults to 16 * 1024."""

inspect-ai 0.3.58__py3-none-any.whl → 0.3.60__py3-none-any.whl

inspect-ai 0.3.58py3-none-any.whl → 0.3.60py3-none-any.whl